* Rewrite everything

This commit is contained in:
Nils Dagsson Moskopp 2023-03-17 03:39:41 +01:00
parent fbc7ddaa5f
commit 7376c3e7b7
7 changed files with 725 additions and 0 deletions

1
U+FFFD.hex Normal file
View File

@ -0,0 +1 @@
FFFD:0000018003C006600C301998399C7F3E7E7E3E7C1FF80E70066003C001800000

62
combining.lua Normal file
View File

@ -0,0 +1,62 @@
#!/usr/bin/env lua5.1
--[[
Copyright © 2023 Nils Dagsson Moskopp (erle)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu
steigern. Gelegentlich packe ich sogar einen handfesten Buffer
Overflow oder eine Format String Vulnerability zwischen die anderen
Codezeilen und schreibe das auch nicht dran.
]]--
is_combining_character = {}
local add_codepoint_range = function(table_, value, first, last)
for codepoint = first, last do
table_[codepoint] = value
end
end
-- Combining Diacritical Marks
add_codepoint_range(is_combining_character, true, 0x0300, 0x036F)
assert( is_combining_character[0x0300] )
assert( is_combining_character[0x0301] )
assert( is_combining_character[0x036E] )
assert( is_combining_character[0x036F] )
-- Combining Diacritical Marks Extended
add_codepoint_range(is_combining_character, true, 0x1AB0, 0x1AFF)
-- Malayalam
add_codepoint_range(is_combining_character, true, 0x0D00, 0x0D03)
add_codepoint_range(is_combining_character, true, 0x0D3B, 0x0D3C)
add_codepoint_range(is_combining_character, true, 0x0D3E, 0x0D44)
add_codepoint_range(is_combining_character, true, 0x0D46, 0x0D48)
add_codepoint_range(is_combining_character, true, 0x0D4A, 0x0D4E)
is_combining_character[0x0D57] = true
add_codepoint_range(is_combining_character, true, 0x0D62, 0x0D63)
-- Thai
is_combining_character[0x0E31] = true
add_codepoint_range(is_combining_character, true, 0x0E34, 0x0E3A)
add_codepoint_range(is_combining_character, true, 0x0E47, 0x0E4E)
-- Combining Diacritical Marks for Symbols
add_codepoint_range(is_combining_character, true, 0x20D0, 0x20FF)
-- Combining Half Marks
add_codepoint_range(is_combining_character, true, 0xFE20, 0xFE2F)
-- Combining Diacritical Marks Supplement
add_codepoint_range(is_combining_character, true, 0x1DC0, 0x1DFF)
assert( nil == is_combining_character[0x0077] )
assert( nil == is_combining_character[0x00F0] )
assert( nil == is_combining_character[0x2665] )
assert( nil == is_combining_character[0x010348] )

360
hexfont.lua Executable file
View File

@ -0,0 +1,360 @@
#!/usr/bin/env lua5.1
--[[
Copyright © 2023 Nils Dagsson Moskopp (erle)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu
steigern. Gelegentlich packe ich sogar einen handfesten Buffer
Overflow oder eine Format String Vulnerability zwischen die anderen
Codezeilen und schreibe das auch nicht dran.
]]--
dofile("combining.lua")
dofile("pixelops.lua")
dofile("utf8.lua")
-- a lookup table was chosen for readability
-- DO NOT EVER REFUCKTOR IT INTO A FUNCTION!
local hex_to_bin = {
["0"] = "0000",
["1"] = "0001",
["2"] = "0010",
["3"] = "0011",
["4"] = "0100",
["5"] = "0101",
["6"] = "0110",
["7"] = "0111",
["8"] = "1000",
["9"] = "1001",
["A"] = "1010",
["B"] = "1011",
["C"] = "1100",
["D"] = "1101",
["E"] = "1110",
["F"] = "1111",
}
-- convert a binary bitmap to pixels accepted by tga_encoder
--
-- properties.background_color and properties.foreground_color must
-- have the same amount of entries. Use one entry for grayscale or
-- colormapped (palette) output, use three entries for RGB and four
-- entries for RGBA.
--
local bitmap_to_pixels = function(bitmap_hex, properties)
-- bitmap_hex must be a string of uppercase hexadecimal digits
assert(
"string" == type(bitmap_hex) and
bitmap_hex:match("[0123456789ABCDEF]+") == bitmap_hex
)
local properties = properties or {}
assert(
"table" == type(properties)
)
local background_color = properties.background_color or { 0 }
local foreground_color = properties.foreground_color or { 255 }
-- background and foreground color must have equal color depth
assert(
#background_color == #foreground_color
)
local colormap = {
background_color,
foreground_color,
}
local kerning = properties.kerning or false
assert(
"boolean" == type(kerning)
)
-- scanline order “bottom-top” was chosen as the default to match
-- the default scanline order of tga_encoder and to require users
-- using another file format encoder to care about scanline order
-- (users who “do not care about scanline order” might find their
-- glyphs upside down … the fault, naturally, lies with the user)
local scanline_order = properties.scanline_order or "bottom-top"
assert(
"bottom-top" == scanline_order or
"top-bottom" == scanline_order
)
local height = 16
local width = bitmap_hex:len() * 4 / height
assert(
16 == width or -- full-width character
8 == width -- half-width character
)
-- convert hexadecimal bitmap to binary bitmap
local bitmap_bin_table = {}
for i = 1, #bitmap_hex do
local character = bitmap_hex:sub(i,i)
bitmap_bin_table[i] = hex_to_bin[character]
end
bitmap_bin = table.concat(bitmap_bin_table)
-- decode binary bitmap with “top-bottom” scanline order
-- (i.e. the first encoded pixel is the top left pixel)
local pixels = {}
for scanline = 1, height do
pixels[scanline] = {}
for w = 1, width do
local i = ( ( scanline - 1 ) * width ) + w
local pixel
pixel = colormap[tonumber(bitmap_bin:sub(i,i)) + 1]
pixels[scanline][w] = pixel
end
end
-- flip image upside down for ”bottom-top” scanline order
-- (i.e. the first encoded pixel is the bottom left pixel)
if "bottom-top" == scanline_order then
pixels = pixelops.flip_vertical(pixels)
end
if kerning then
-- remove rightmost column if it is empty
local remove_rightmost_column = true
for h = 1, height do
if foreground_color == pixels[h][width] then
remove_rightmost_column = false
end
end
if remove_rightmost_column then
for h = 1, height do
pixels[h][width] = nil
end
end
-- remove leftmost column if it and the column to its right are
-- both empty, glyphs touch too often without the extra check
local remove_leftmost_column = true
for h = 1, height do
if (
foreground_color == pixels[h][1] or
foreground_color == pixels[h][2]
) then
remove_leftmost_column = false
end
end
if remove_leftmost_column then
for h = 1, height do
for w = 1, width do
pixels[h][w] = pixels[h][w+1]
end
end
end
end
return pixels
end
hexfont = setmetatable(
{},
{
__call = function(self, ...)
local new_hexfont = setmetatable(
{},
{
__index = self
}
)
new_hexfont:constructor(...)
return new_hexfont
end
}
)
local iter = function(table_)
local index = 0
local total = #table_
return function()
index = index + 1
if index <= total
then
return table_[index]
end
end
end
hexfont.constructor = function(self)
local minimal_hexfont = {
-- U+FFFD REPLACEMENT CHARACTER
"FFFD:0000018003C006600C301998399C7F3E7E7E3E7C1FF80E70066003C001800000"
}
self:load_glyphs(
iter(minimal_hexfont)
)
end
-- Usage:
-- hexfont.load_glyphs(io.lines("unifont.hex"))
-- hexfont.load_glyphs(io.lines("unifont_upper.hex"))
hexfont.load_glyphs = function(self, iterator)
assert( "function" == type(iterator) )
for line in iterator do
assert("string" == type(line))
local codepoint_hex, bitmap_hex = line:match(
"([0123456789ABCDEF]+):([01234567890ABCDEF]+)"
)
codepoint = tonumber(codepoint_hex, 16)
self[codepoint] = bitmap_hex
end
end
-- Test: Glyphs are correctly loaded
local font = hexfont()
assert(
font[0xFFFD] == "0000018003C006600C301998399C7F3E7E7E3E7C1FF80E70066003C001800000"
)
font = nil
hexfont.render_line = function(self, text, properties)
assert(
"string" == type(text)
)
properties = properties or {}
assert(
"table" == type(properties)
)
-- default colors are black (0) & white (255) in 1 bit color depth
local background_color = properties.background_color or { 0 }
local foreground_color = properties.foreground_color or { 255 }
-- background and foreground color must have equal color depth
assert(
#background_color == #foreground_color
)
local minimal_width = properties.minimal_width or 0
assert(
"number" == type(minimal_width)
)
local tabulator_size = properties.tabulator_size or 8 * 8
assert(
"number" == type(tabulator_size)
)
local result = {}
for i = 1, 16 do
result[i] = {}
end
local codepoints = utf8.text_to_codepoints(text)
-- FIXME: only works for LTR, should use UAX #9
for i = 1, #codepoints do
local codepoint = codepoints[i]
local bitmap_hex = self[codepoint]
-- use U+FFFD as fallback character
if nil == bitmap_hex then
bitmap_hex = self[0xFFFD]
end
local bitmap = bitmap_to_pixels(
bitmap_hex,
properties
)
if 0x0009 == codepoint then -- HT (horizontal tab)
local result_width = #result[1]
local tab_stop = math.floor(
result_width / tabulator_size + 1
) * tabulator_size
result = pixelops.right_pad(
result,
tab_stop - result_width,
background_color
)
else
local result_width = #result[1]
local bitmap_width = #bitmap[1]
if is_combining_character[codepoint] then
-- render combining glyph over previous glyph
-- FIXME: this is horrible, but seems to work
for j = 1, 16 do
for k = 1, bitmap_width do
if foreground_color == bitmap[j][k] then
result[j][result_width - bitmap_width + k] = bitmap[j][k]
end
end
end
else
-- append current glyph at right edge of result
for j = 1, 16 do
for k = 1, bitmap_width do
result[j][result_width + k] = bitmap[j][k]
end
end
end
end
end
return result
end
hexfont.render_text = function(self, text, properties)
local properties = properties or {}
assert(
"table" == type(properties)
)
local background_color = properties.background_color or { 0 }
local foreground_color = properties.foreground_color or { 255 }
-- background and foreground color must have equal color depth
assert(
#background_color == #foreground_color
)
local scanline_order = properties.scanline_order or "bottom-top"
assert(
"bottom-top" == scanline_order or
"top-bottom" == scanline_order
)
local result
-- TODO: implement UAX #14
for utf8_line in string.gmatch(text .. "\n", "([^\n]*)\n") do
local pixels = self:render_line(
utf8_line,
properties
)
assert( nil ~= pixels )
if nil == result then
result = pixels
else
local result_width = #result[1]
local pixels_width = #pixels[1]
if result_width > pixels_width then
pixels = pixelops.right_pad(
pixels,
result_width - pixels_width,
background_color
)
elseif result_width < pixels_width then
result = pixelops.right_pad(
result,
pixels_width - result_width,
background_color
)
end
assert(
#result[1] == #pixels[1]
)
if "bottom-top" == scanline_order then
for i = #pixels, 1, -1 do
table.insert(result, 1, pixels[i])
end
end
if "top-bottom" == scanline_order then
for i = 1, #pixels do
result[#result+1] = pixels[i]
end
end
end
end
return result
end

20
init.lua Normal file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env lua5.1
--[[
Copyright © 2023 Nils Dagsson Moskopp (erle)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu
steigern. Gelegentlich packe ich sogar einen handfesten Buffer
Overflow oder eine Format String Vulnerability zwischen die anderen
Codezeilen und schreibe das auch nicht dran.
]]--
unicode_text = {}
dofile("hexfont.lua")
unicode_text.hexfont = hexfont

177
pixelops.lua Executable file
View File

@ -0,0 +1,177 @@
#!/usr/bin/env lua5.1
--[[
Copyright © 2023 Nils Dagsson Moskopp (erle)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu
steigern. Gelegentlich packe ich sogar einen handfesten Buffer
Overflow oder eine Format String Vulnerability zwischen die anderen
Codezeilen und schreibe das auch nicht dran.
]]--
pixelops = {}
-- Use this to invert the scanline order!
pixelops.flip_vertical = function(pixels)
local result = {}
local height = #pixels
for h = 1, height do
local scanline = height - h + 1
result[scanline] = pixels[h]
end
return result
end
-- Test helper
local tc = table.concat
-- Test: pixelops.flip_vertical() with one-byte pixels
local i = {
{ { 1 }, { 2 }, { 3 } },
{ { 4 }, { 5 }, { 6 } },
{ { 7 }, { 8 }, { 9 } },
}
assert(
"123" == tc({i[1][1][1], i[1][2][1], i[1][3][1]}) and
"456" == tc({i[2][1][1], i[2][2][1], i[2][3][1]}) and
"789" == tc({i[3][1][1], i[3][2][1], i[3][3][1]})
)
local j = pixelops.flip_vertical(i)
assert(
"789" == tc({j[1][1][1], j[1][2][1], j[1][3][1]}) and
"456" == tc({j[2][1][1], j[2][2][1], j[2][3][1]}) and
"123" == tc({j[3][1][1], j[3][2][1], j[3][3][1]})
)
local k = pixelops.flip_vertical(j)
assert(
"123" == tc({k[1][1][1], k[1][2][1], k[1][3][1]}) and
"456" == tc({k[2][1][1], k[2][2][1], k[2][3][1]}) and
"789" == tc({k[3][1][1], k[3][2][1], k[3][3][1]})
)
i = nil
j = nil
k = nil
-- Test: pixelops.flip_vertical() with three-byte pixels
local i = {
{ { 1, 1, 0 }, { 2, 2, 0 }, { 3, 3, 0 } },
{ { 4, 4, 0 }, { 5, 5, 0 }, { 6, 6, 0 } },
{ { 7, 7, 0 }, { 8, 8, 0 }, { 9, 9, 0 } },
}
assert(
"110220330" == tc({tc(i[1][1]), tc(i[1][2]), tc(i[1][3])}) and
"440550660" == tc({tc(i[2][1]), tc(i[2][2]), tc(i[2][3])}) and
"770880990" == tc({tc(i[3][1]), tc(i[3][2]), tc(i[3][3])})
)
local j = pixelops.flip_vertical(i)
assert(
"770880990" == tc({tc(j[1][1]), tc(j[1][2]), tc(j[1][3])}) and
"440550660" == tc({tc(j[2][1]), tc(j[2][2]), tc(j[2][3])}) and
"110220330" == tc({tc(j[3][1]), tc(j[3][2]), tc(j[3][3])})
)
local k = pixelops.flip_vertical(j)
assert(
"110220330" == tc({tc(k[1][1]), tc(k[1][2]), tc(k[1][3])}) and
"440550660" == tc({tc(k[2][1]), tc(k[2][2]), tc(k[2][3])}) and
"770880990" == tc({tc(k[3][1]), tc(k[3][2]), tc(k[3][3])})
)
i = nil
j = nil
k = nil
-- Test: pixelops.flip_vertical() with four-byte pixels
local i = {
{ { 1, 1, 0, 0 }, { 2, 2, 0, 0 }, { 3, 3, 0, 0 } },
{ { 4, 4, 0, 0 }, { 5, 5, 0, 0 }, { 6, 6, 0, 0 } },
{ { 7, 7, 0, 0 }, { 8, 8, 0, 0 }, { 9, 9, 0, 0 } },
}
assert(
"110022003300" == tc({tc(i[1][1]), tc(i[1][2]), tc(i[1][3])}) and
"440055006600" == tc({tc(i[2][1]), tc(i[2][2]), tc(i[2][3])}) and
"770088009900" == tc({tc(i[3][1]), tc(i[3][2]), tc(i[3][3])})
)
local j = pixelops.flip_vertical(i)
assert(
"770088009900" == tc({tc(j[1][1]), tc(j[1][2]), tc(j[1][3])}) and
"440055006600" == tc({tc(j[2][1]), tc(j[2][2]), tc(j[2][3])}) and
"110022003300" == tc({tc(j[3][1]), tc(j[3][2]), tc(j[3][3])})
)
local k = pixelops.flip_vertical(j)
assert(
"110022003300" == tc({tc(k[1][1]), tc(k[1][2]), tc(k[1][3])}) and
"440055006600" == tc({tc(k[2][1]), tc(k[2][2]), tc(k[2][3])}) and
"770088009900" == tc({tc(k[3][1]), tc(k[3][2]), tc(k[3][3])})
)
i = nil
j = nil
k = nil
pixelops.right_pad = function(pixels, amount, padding)
assert(
"table" == type(pixels)
)
assert(
"number" == type(amount) and
amount > 0
)
assert(
"table" == type(padding)
)
local result = {}
local height = #pixels
local width = #pixels[1]
-- copy every pixel
for h = 1, height do
result[h] = {}
for w = 1, width do
result[h][w] = pixels[h][w]
end
end
-- pad on right side
for h = 1, height do
for a = 1, amount do
result[h][width + a] = padding
end
end
return result
end
-- Test: pixelops.right_pad() with 1-byte pixels
local i = {
{ { 1 }, { 0 } },
{ { 0 }, { 1 } },
{ { 1 }, { 0 } },
}
assert(
"10" == tc({i[1][1][1], i[1][2][1]}) and
"01" == tc({i[2][1][1], i[2][2][1]}) and
"10" == tc({i[3][1][1], i[3][2][1]})
)
local j = pixelops.right_pad(
i,
1,
{ 2 }
)
assert(
"102" == tc({j[1][1][1], j[1][2][1], j[1][3][1]}) and
"012" == tc({j[2][1][1], j[2][2][1], j[2][3][1]}) and
"102" == tc({j[3][1][1], j[3][2][1], j[3][3][1]})
)
local k = pixelops.right_pad(
i,
2,
{ 3 }
)
assert(
"1033" == tc({k[1][1][1], k[1][2][1], k[1][3][1], k[1][4][1]}) and
"0133" == tc({k[2][1][1], k[2][2][1], k[2][3][1], k[2][4][1]}) and
"1033" == tc({k[3][1][1], k[3][2][1], k[3][3][1], k[3][4][1]})
)
i = nil
j = nil
k = nil

23
test.lua Executable file
View File

@ -0,0 +1,23 @@
#!/usr/bin/env lua5.1
-- -*- coding: utf-8 -*-
dofile("init.lua")
font = unicode_text.hexfont()
font:load_glyphs( io.lines("/usr/share/unifont/unifont.hex") )
font:load_glyphs( io.lines("unifont_upper.hex") )
dofile("tga_encoder.lua")
local pixels = font:render_text("ABC 123 😀\
\
𐍈")
local image = tga_encoder.image(pixels)
image:save("test.tga")
local file = io.open("UTF-8-demo.txt")
tga_encoder.image(
font:render_text(
file:read("*all")
)
):save("UTF-8-demo.tga")
file:close()

82
utf8.lua Normal file
View File

@ -0,0 +1,82 @@
#!/usr/bin/env lua5.1
--[[
Copyright © 2023 Nils Dagsson Moskopp (erle)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu
steigern. Gelegentlich packe ich sogar einen handfesten Buffer
Overflow oder eine Format String Vulnerability zwischen die anderen
Codezeilen und schreibe das auch nicht dran.
]]--
utf8 = {}
-- convert an UTF-8 string into a table with codepoints
-- inspired by <https://lua-users.org/wiki/LuaUnicode>
utf8.text_to_codepoints = function(text)
assert(
"string" == type(text)
)
local result = {}
local sequence_length = 0
local i = 1
while i <= #text do
value = nil
local byte_1, byte_2, byte_3, byte_4
byte_1 = string.byte(text, i)
local sequence_length =
byte_1 <= 127 and 1 or -- 0xxxxxxx
byte_1 <= 223 and 2 or -- 110xxxxx 10xxxxxx
byte_1 <= 239 and 3 or -- 1110xxxx 10xxxxxx 10xxxxxx
byte_1 <= 247 and 4 or -- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
error("invalid UTF-8 sequence")
if sequence_length > 1 then
byte_2 = string.byte(text, i+1)
end
if sequence_length > 2 then
byte_3 = string.byte(text, i+2)
end
if sequence_length > 3 then
byte_4 = string.byte(text, i+3)
end
if 1 == sequence_length then
-- 0xxxxxxx
value = byte_1
elseif 2 == sequence_length then
-- 110xxxxx 10xxxxxx
value =
(byte_1 % 64) * 64 +
(byte_2 % 64)
elseif 3 == sequence_length then
-- 1110xxxx 10xxxxxx 10xxxxxx
value =
(byte_1 % 32) * 4096 +
(byte_2 % 64) * 64 +
(byte_3 % 64)
elseif 4 == sequence_length then
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
value =
(byte_1 % 16) * 262144 +
(byte_2 % 64) * 4096 +
(byte_3 % 64) * 64 +
(byte_4 % 64)
end
table.insert(result, value)
i = i + sequence_length
end
return result
end
-- Test one codepoint for each byte length:
local codepoints = utf8.text_to_codepoints(
"wð♥𐍈" -- U+0077 U+00F0 U+2665 U+10348
)
assert(
table.concat(codepoints, " ") == "119 240 9829 66376"
)