unicode_text/combining.lua

63 lines
2.2 KiB
Lua

#!/usr/bin/env lua5.1
--[[
Copyright © 2023 Nils Dagsson Moskopp (erle)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu
steigern. Gelegentlich packe ich sogar einen handfesten Buffer
Overflow oder eine Format String Vulnerability zwischen die anderen
Codezeilen und schreibe das auch nicht dran.
]]--
is_combining_character = {}
local add_codepoint_range = function(table_, value, first, last)
for codepoint = first, last do
table_[codepoint] = value
end
end
-- Combining Diacritical Marks
add_codepoint_range(is_combining_character, true, 0x0300, 0x036F)
assert( is_combining_character[0x0300] )
assert( is_combining_character[0x0301] )
assert( is_combining_character[0x036E] )
assert( is_combining_character[0x036F] )
-- Combining Diacritical Marks Extended
add_codepoint_range(is_combining_character, true, 0x1AB0, 0x1AFF)
-- Malayalam
add_codepoint_range(is_combining_character, true, 0x0D00, 0x0D03)
add_codepoint_range(is_combining_character, true, 0x0D3B, 0x0D3C)
add_codepoint_range(is_combining_character, true, 0x0D3E, 0x0D44)
add_codepoint_range(is_combining_character, true, 0x0D46, 0x0D48)
add_codepoint_range(is_combining_character, true, 0x0D4A, 0x0D4E)
is_combining_character[0x0D57] = true
add_codepoint_range(is_combining_character, true, 0x0D62, 0x0D63)
-- Thai
is_combining_character[0x0E31] = true
add_codepoint_range(is_combining_character, true, 0x0E34, 0x0E3A)
add_codepoint_range(is_combining_character, true, 0x0E47, 0x0E4E)
-- Combining Diacritical Marks for Symbols
add_codepoint_range(is_combining_character, true, 0x20D0, 0x20FF)
-- Combining Half Marks
add_codepoint_range(is_combining_character, true, 0xFE20, 0xFE2F)
-- Combining Diacritical Marks Supplement
add_codepoint_range(is_combining_character, true, 0x1DC0, 0x1DFF)
assert( nil == is_combining_character[0x0077] )
assert( nil == is_combining_character[0x00F0] )
assert( nil == is_combining_character[0x2665] )
assert( nil == is_combining_character[0x010348] )