From 7376c3e7b77658e8ca00fcf44b08a21ea1779cbb Mon Sep 17 00:00:00 2001 From: Nils Dagsson Moskopp Date: Fri, 17 Mar 2023 03:39:41 +0100 Subject: [PATCH] * Rewrite everything --- U+FFFD.hex | 1 + combining.lua | 62 +++++++++ hexfont.lua | 360 ++++++++++++++++++++++++++++++++++++++++++++++++++ init.lua | 20 +++ pixelops.lua | 177 +++++++++++++++++++++++++ test.lua | 23 ++++ utf8.lua | 82 ++++++++++++ 7 files changed, 725 insertions(+) create mode 100644 U+FFFD.hex create mode 100644 combining.lua create mode 100755 hexfont.lua create mode 100644 init.lua create mode 100755 pixelops.lua create mode 100755 test.lua create mode 100644 utf8.lua diff --git a/U+FFFD.hex b/U+FFFD.hex new file mode 100644 index 0000000..29a76ba --- /dev/null +++ b/U+FFFD.hex @@ -0,0 +1 @@ +FFFD:0000018003C006600C301998399C7F3E7E7E3E7C1FF80E70066003C001800000 diff --git a/combining.lua b/combining.lua new file mode 100644 index 0000000..9f6b2e8 --- /dev/null +++ b/combining.lua @@ -0,0 +1,62 @@ +#!/usr/bin/env lua5.1 + +--[[ +Copyright © 2023 Nils Dagsson Moskopp (erle) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu +steigern. Gelegentlich packe ich sogar einen handfesten Buffer +Overflow oder eine Format String Vulnerability zwischen die anderen +Codezeilen und schreibe das auch nicht dran. +]]-- + +is_combining_character = {} + +local add_codepoint_range = function(table_, value, first, last) + for codepoint = first, last do + table_[codepoint] = value + end +end + +-- Combining Diacritical Marks +add_codepoint_range(is_combining_character, true, 0x0300, 0x036F) + +assert( is_combining_character[0x0300] ) +assert( is_combining_character[0x0301] ) +assert( is_combining_character[0x036E] ) +assert( is_combining_character[0x036F] ) + +-- Combining Diacritical Marks Extended +add_codepoint_range(is_combining_character, true, 0x1AB0, 0x1AFF) + +-- Malayalam +add_codepoint_range(is_combining_character, true, 0x0D00, 0x0D03) +add_codepoint_range(is_combining_character, true, 0x0D3B, 0x0D3C) +add_codepoint_range(is_combining_character, true, 0x0D3E, 0x0D44) +add_codepoint_range(is_combining_character, true, 0x0D46, 0x0D48) +add_codepoint_range(is_combining_character, true, 0x0D4A, 0x0D4E) +is_combining_character[0x0D57] = true +add_codepoint_range(is_combining_character, true, 0x0D62, 0x0D63) + +-- Thai +is_combining_character[0x0E31] = true +add_codepoint_range(is_combining_character, true, 0x0E34, 0x0E3A) +add_codepoint_range(is_combining_character, true, 0x0E47, 0x0E4E) + +-- Combining Diacritical Marks for Symbols +add_codepoint_range(is_combining_character, true, 0x20D0, 0x20FF) + +-- Combining Half Marks +add_codepoint_range(is_combining_character, true, 0xFE20, 0xFE2F) + +-- Combining Diacritical Marks Supplement +add_codepoint_range(is_combining_character, true, 0x1DC0, 0x1DFF) + +assert( nil == is_combining_character[0x0077] ) +assert( nil == is_combining_character[0x00F0] ) +assert( nil == is_combining_character[0x2665] ) +assert( nil == is_combining_character[0x010348] ) diff --git a/hexfont.lua b/hexfont.lua new file mode 100755 index 0000000..2b31cd0 --- /dev/null +++ b/hexfont.lua @@ -0,0 +1,360 @@ +#!/usr/bin/env lua5.1 + +--[[ +Copyright © 2023 Nils Dagsson Moskopp (erle) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu +steigern. Gelegentlich packe ich sogar einen handfesten Buffer +Overflow oder eine Format String Vulnerability zwischen die anderen +Codezeilen und schreibe das auch nicht dran. +]]-- + +dofile("combining.lua") +dofile("pixelops.lua") +dofile("utf8.lua") + +-- a lookup table was chosen for readability +-- DO NOT EVER REFUCKTOR IT INTO A FUNCTION! +local hex_to_bin = { + ["0"] = "0000", + ["1"] = "0001", + ["2"] = "0010", + ["3"] = "0011", + ["4"] = "0100", + ["5"] = "0101", + ["6"] = "0110", + ["7"] = "0111", + ["8"] = "1000", + ["9"] = "1001", + ["A"] = "1010", + ["B"] = "1011", + ["C"] = "1100", + ["D"] = "1101", + ["E"] = "1110", + ["F"] = "1111", +} + +-- convert a binary bitmap to pixels accepted by tga_encoder +-- +-- properties.background_color and properties.foreground_color must +-- have the same amount of entries. Use one entry for grayscale or +-- colormapped (palette) output, use three entries for RGB and four +-- entries for RGBA. +-- +local bitmap_to_pixels = function(bitmap_hex, properties) + -- bitmap_hex must be a string of uppercase hexadecimal digits + assert( + "string" == type(bitmap_hex) and + bitmap_hex:match("[0123456789ABCDEF]+") == bitmap_hex + ) + + local properties = properties or {} + assert( + "table" == type(properties) + ) + + local background_color = properties.background_color or { 0 } + local foreground_color = properties.foreground_color or { 255 } + -- background and foreground color must have equal color depth + assert( + #background_color == #foreground_color + ) + local colormap = { + background_color, + foreground_color, + } + + local kerning = properties.kerning or false + assert( + "boolean" == type(kerning) + ) + + -- scanline order “bottom-top” was chosen as the default to match + -- the default scanline order of tga_encoder and to require users + -- using another file format encoder to care about scanline order + -- (users who “do not care about scanline order” might find their + -- glyphs upside down … the fault, naturally, lies with the user) + local scanline_order = properties.scanline_order or "bottom-top" + assert( + "bottom-top" == scanline_order or + "top-bottom" == scanline_order + ) + + local height = 16 + local width = bitmap_hex:len() * 4 / height + assert( + 16 == width or -- full-width character + 8 == width -- half-width character + ) + + -- convert hexadecimal bitmap to binary bitmap + local bitmap_bin_table = {} + for i = 1, #bitmap_hex do + local character = bitmap_hex:sub(i,i) + bitmap_bin_table[i] = hex_to_bin[character] + end + bitmap_bin = table.concat(bitmap_bin_table) + + -- decode binary bitmap with “top-bottom” scanline order + -- (i.e. the first encoded pixel is the top left pixel) + local pixels = {} + for scanline = 1, height do + pixels[scanline] = {} + for w = 1, width do + local i = ( ( scanline - 1 ) * width ) + w + local pixel + pixel = colormap[tonumber(bitmap_bin:sub(i,i)) + 1] + pixels[scanline][w] = pixel + end + end + + -- flip image upside down for ”bottom-top” scanline order + -- (i.e. the first encoded pixel is the bottom left pixel) + if "bottom-top" == scanline_order then + pixels = pixelops.flip_vertical(pixels) + end + + if kerning then + -- remove rightmost column if it is empty + local remove_rightmost_column = true + for h = 1, height do + if foreground_color == pixels[h][width] then + remove_rightmost_column = false + end + end + if remove_rightmost_column then + for h = 1, height do + pixels[h][width] = nil + end + end + -- remove leftmost column if it and the column to its right are + -- both empty, glyphs touch too often without the extra check + local remove_leftmost_column = true + for h = 1, height do + if ( + foreground_color == pixels[h][1] or + foreground_color == pixels[h][2] + ) then + remove_leftmost_column = false + end + end + if remove_leftmost_column then + for h = 1, height do + for w = 1, width do + pixels[h][w] = pixels[h][w+1] + end + end + end + end + + return pixels +end + +hexfont = setmetatable( + {}, + { + __call = function(self, ...) + local new_hexfont = setmetatable( + {}, + { + __index = self + } + ) + new_hexfont:constructor(...) + return new_hexfont + end + } +) + +local iter = function(table_) + local index = 0 + local total = #table_ + return function() + index = index + 1 + if index <= total + then + return table_[index] + end + end +end + +hexfont.constructor = function(self) + local minimal_hexfont = { + -- U+FFFD REPLACEMENT CHARACTER + "FFFD:0000018003C006600C301998399C7F3E7E7E3E7C1FF80E70066003C001800000" + } + self:load_glyphs( + iter(minimal_hexfont) + ) +end + +-- Usage: +-- hexfont.load_glyphs(io.lines("unifont.hex")) +-- hexfont.load_glyphs(io.lines("unifont_upper.hex")) +hexfont.load_glyphs = function(self, iterator) + assert( "function" == type(iterator) ) + for line in iterator do + assert("string" == type(line)) + local codepoint_hex, bitmap_hex = line:match( + "([0123456789ABCDEF]+):([01234567890ABCDEF]+)" + ) + codepoint = tonumber(codepoint_hex, 16) + self[codepoint] = bitmap_hex + end +end + +-- Test: Glyphs are correctly loaded +local font = hexfont() +assert( + font[0xFFFD] == "0000018003C006600C301998399C7F3E7E7E3E7C1FF80E70066003C001800000" +) +font = nil + +hexfont.render_line = function(self, text, properties) + assert( + "string" == type(text) + ) + + properties = properties or {} + assert( + "table" == type(properties) + ) + + -- default colors are black (0) & white (255) in 1 bit color depth + local background_color = properties.background_color or { 0 } + local foreground_color = properties.foreground_color or { 255 } + -- background and foreground color must have equal color depth + assert( + #background_color == #foreground_color + ) + + local minimal_width = properties.minimal_width or 0 + assert( + "number" == type(minimal_width) + ) + + local tabulator_size = properties.tabulator_size or 8 * 8 + assert( + "number" == type(tabulator_size) + ) + + local result = {} + for i = 1, 16 do + result[i] = {} + end + local codepoints = utf8.text_to_codepoints(text) + -- FIXME: only works for LTR, should use UAX #9 + for i = 1, #codepoints do + local codepoint = codepoints[i] + local bitmap_hex = self[codepoint] + -- use U+FFFD as fallback character + if nil == bitmap_hex then + bitmap_hex = self[0xFFFD] + end + local bitmap = bitmap_to_pixels( + bitmap_hex, + properties + ) + if 0x0009 == codepoint then -- HT (horizontal tab) + local result_width = #result[1] + local tab_stop = math.floor( + result_width / tabulator_size + 1 + ) * tabulator_size + result = pixelops.right_pad( + result, + tab_stop - result_width, + background_color + ) + else + local result_width = #result[1] + local bitmap_width = #bitmap[1] + if is_combining_character[codepoint] then + -- render combining glyph over previous glyph + -- FIXME: this is horrible, but seems to work + for j = 1, 16 do + for k = 1, bitmap_width do + if foreground_color == bitmap[j][k] then + result[j][result_width - bitmap_width + k] = bitmap[j][k] + end + end + end + else + -- append current glyph at right edge of result + for j = 1, 16 do + for k = 1, bitmap_width do + result[j][result_width + k] = bitmap[j][k] + end + end + end + end + end + return result +end + +hexfont.render_text = function(self, text, properties) + local properties = properties or {} + assert( + "table" == type(properties) + ) + + local background_color = properties.background_color or { 0 } + local foreground_color = properties.foreground_color or { 255 } + -- background and foreground color must have equal color depth + assert( + #background_color == #foreground_color + ) + + local scanline_order = properties.scanline_order or "bottom-top" + assert( + "bottom-top" == scanline_order or + "top-bottom" == scanline_order + ) + + local result + -- TODO: implement UAX #14 + for utf8_line in string.gmatch(text .. "\n", "([^\n]*)\n") do + local pixels = self:render_line( + utf8_line, + properties + ) + assert( nil ~= pixels ) + if nil == result then + result = pixels + else + local result_width = #result[1] + local pixels_width = #pixels[1] + if result_width > pixels_width then + pixels = pixelops.right_pad( + pixels, + result_width - pixels_width, + background_color + ) + elseif result_width < pixels_width then + result = pixelops.right_pad( + result, + pixels_width - result_width, + background_color + ) + end + assert( + #result[1] == #pixels[1] + ) + if "bottom-top" == scanline_order then + for i = #pixels, 1, -1 do + table.insert(result, 1, pixels[i]) + end + end + if "top-bottom" == scanline_order then + for i = 1, #pixels do + result[#result+1] = pixels[i] + end + end + end + end + return result +end diff --git a/init.lua b/init.lua new file mode 100644 index 0000000..8cf4571 --- /dev/null +++ b/init.lua @@ -0,0 +1,20 @@ +#!/usr/bin/env lua5.1 + +--[[ +Copyright © 2023 Nils Dagsson Moskopp (erle) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu +steigern. Gelegentlich packe ich sogar einen handfesten Buffer +Overflow oder eine Format String Vulnerability zwischen die anderen +Codezeilen und schreibe das auch nicht dran. +]]-- + +unicode_text = {} + +dofile("hexfont.lua") +unicode_text.hexfont = hexfont diff --git a/pixelops.lua b/pixelops.lua new file mode 100755 index 0000000..d96f126 --- /dev/null +++ b/pixelops.lua @@ -0,0 +1,177 @@ +#!/usr/bin/env lua5.1 + +--[[ +Copyright © 2023 Nils Dagsson Moskopp (erle) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu +steigern. Gelegentlich packe ich sogar einen handfesten Buffer +Overflow oder eine Format String Vulnerability zwischen die anderen +Codezeilen und schreibe das auch nicht dran. +]]-- + +pixelops = {} + +-- Use this to invert the scanline order! +pixelops.flip_vertical = function(pixels) + local result = {} + local height = #pixels + for h = 1, height do + local scanline = height - h + 1 + result[scanline] = pixels[h] + end + return result +end + +-- Test helper +local tc = table.concat + +-- Test: pixelops.flip_vertical() with one-byte pixels +local i = { + { { 1 }, { 2 }, { 3 } }, + { { 4 }, { 5 }, { 6 } }, + { { 7 }, { 8 }, { 9 } }, +} +assert( + "123" == tc({i[1][1][1], i[1][2][1], i[1][3][1]}) and + "456" == tc({i[2][1][1], i[2][2][1], i[2][3][1]}) and + "789" == tc({i[3][1][1], i[3][2][1], i[3][3][1]}) +) +local j = pixelops.flip_vertical(i) +assert( + "789" == tc({j[1][1][1], j[1][2][1], j[1][3][1]}) and + "456" == tc({j[2][1][1], j[2][2][1], j[2][3][1]}) and + "123" == tc({j[3][1][1], j[3][2][1], j[3][3][1]}) +) +local k = pixelops.flip_vertical(j) +assert( + "123" == tc({k[1][1][1], k[1][2][1], k[1][3][1]}) and + "456" == tc({k[2][1][1], k[2][2][1], k[2][3][1]}) and + "789" == tc({k[3][1][1], k[3][2][1], k[3][3][1]}) +) +i = nil +j = nil +k = nil + +-- Test: pixelops.flip_vertical() with three-byte pixels +local i = { + { { 1, 1, 0 }, { 2, 2, 0 }, { 3, 3, 0 } }, + { { 4, 4, 0 }, { 5, 5, 0 }, { 6, 6, 0 } }, + { { 7, 7, 0 }, { 8, 8, 0 }, { 9, 9, 0 } }, +} +assert( + "110220330" == tc({tc(i[1][1]), tc(i[1][2]), tc(i[1][3])}) and + "440550660" == tc({tc(i[2][1]), tc(i[2][2]), tc(i[2][3])}) and + "770880990" == tc({tc(i[3][1]), tc(i[3][2]), tc(i[3][3])}) +) +local j = pixelops.flip_vertical(i) +assert( + "770880990" == tc({tc(j[1][1]), tc(j[1][2]), tc(j[1][3])}) and + "440550660" == tc({tc(j[2][1]), tc(j[2][2]), tc(j[2][3])}) and + "110220330" == tc({tc(j[3][1]), tc(j[3][2]), tc(j[3][3])}) +) +local k = pixelops.flip_vertical(j) +assert( + "110220330" == tc({tc(k[1][1]), tc(k[1][2]), tc(k[1][3])}) and + "440550660" == tc({tc(k[2][1]), tc(k[2][2]), tc(k[2][3])}) and + "770880990" == tc({tc(k[3][1]), tc(k[3][2]), tc(k[3][3])}) +) +i = nil +j = nil +k = nil + +-- Test: pixelops.flip_vertical() with four-byte pixels +local i = { + { { 1, 1, 0, 0 }, { 2, 2, 0, 0 }, { 3, 3, 0, 0 } }, + { { 4, 4, 0, 0 }, { 5, 5, 0, 0 }, { 6, 6, 0, 0 } }, + { { 7, 7, 0, 0 }, { 8, 8, 0, 0 }, { 9, 9, 0, 0 } }, +} +assert( + "110022003300" == tc({tc(i[1][1]), tc(i[1][2]), tc(i[1][3])}) and + "440055006600" == tc({tc(i[2][1]), tc(i[2][2]), tc(i[2][3])}) and + "770088009900" == tc({tc(i[3][1]), tc(i[3][2]), tc(i[3][3])}) +) +local j = pixelops.flip_vertical(i) +assert( + "770088009900" == tc({tc(j[1][1]), tc(j[1][2]), tc(j[1][3])}) and + "440055006600" == tc({tc(j[2][1]), tc(j[2][2]), tc(j[2][3])}) and + "110022003300" == tc({tc(j[3][1]), tc(j[3][2]), tc(j[3][3])}) +) +local k = pixelops.flip_vertical(j) +assert( + "110022003300" == tc({tc(k[1][1]), tc(k[1][2]), tc(k[1][3])}) and + "440055006600" == tc({tc(k[2][1]), tc(k[2][2]), tc(k[2][3])}) and + "770088009900" == tc({tc(k[3][1]), tc(k[3][2]), tc(k[3][3])}) +) +i = nil +j = nil +k = nil + +pixelops.right_pad = function(pixels, amount, padding) + assert( + "table" == type(pixels) + ) + assert( + "number" == type(amount) and + amount > 0 + ) + assert( + "table" == type(padding) + ) + local result = {} + local height = #pixels + local width = #pixels[1] + -- copy every pixel + for h = 1, height do + result[h] = {} + for w = 1, width do + result[h][w] = pixels[h][w] + end + end + -- pad on right side + for h = 1, height do + for a = 1, amount do + result[h][width + a] = padding + end + end + return result +end + +-- Test: pixelops.right_pad() with 1-byte pixels +local i = { + { { 1 }, { 0 } }, + { { 0 }, { 1 } }, + { { 1 }, { 0 } }, +} +assert( + "10" == tc({i[1][1][1], i[1][2][1]}) and + "01" == tc({i[2][1][1], i[2][2][1]}) and + "10" == tc({i[3][1][1], i[3][2][1]}) +) +local j = pixelops.right_pad( + i, + 1, + { 2 } +) +assert( + "102" == tc({j[1][1][1], j[1][2][1], j[1][3][1]}) and + "012" == tc({j[2][1][1], j[2][2][1], j[2][3][1]}) and + "102" == tc({j[3][1][1], j[3][2][1], j[3][3][1]}) +) +local k = pixelops.right_pad( + i, + 2, + { 3 } +) +assert( + "1033" == tc({k[1][1][1], k[1][2][1], k[1][3][1], k[1][4][1]}) and + "0133" == tc({k[2][1][1], k[2][2][1], k[2][3][1], k[2][4][1]}) and + "1033" == tc({k[3][1][1], k[3][2][1], k[3][3][1], k[3][4][1]}) +) +i = nil +j = nil +k = nil diff --git a/test.lua b/test.lua new file mode 100755 index 0000000..34b25d8 --- /dev/null +++ b/test.lua @@ -0,0 +1,23 @@ +#!/usr/bin/env lua5.1 +-- -*- coding: utf-8 -*- + +dofile("init.lua") + +font = unicode_text.hexfont() +font:load_glyphs( io.lines("/usr/share/unifont/unifont.hex") ) +font:load_glyphs( io.lines("unifont_upper.hex") ) + +dofile("tga_encoder.lua") +local pixels = font:render_text("ABC 123 😀\ +\ +wð♥𐍈") +local image = tga_encoder.image(pixels) +image:save("test.tga") + +local file = io.open("UTF-8-demo.txt") +tga_encoder.image( + font:render_text( + file:read("*all") + ) +):save("UTF-8-demo.tga") +file:close() diff --git a/utf8.lua b/utf8.lua new file mode 100644 index 0000000..dcd9b6f --- /dev/null +++ b/utf8.lua @@ -0,0 +1,82 @@ +#!/usr/bin/env lua5.1 + +--[[ +Copyright © 2023 Nils Dagsson Moskopp (erle) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +Dieses Programm hat das Ziel, die Medienkompetenz der Leser zu +steigern. Gelegentlich packe ich sogar einen handfesten Buffer +Overflow oder eine Format String Vulnerability zwischen die anderen +Codezeilen und schreibe das auch nicht dran. +]]-- + +utf8 = {} + +-- convert an UTF-8 string into a table with codepoints +-- inspired by +utf8.text_to_codepoints = function(text) + assert( + "string" == type(text) + ) + local result = {} + local sequence_length = 0 + local i = 1 + while i <= #text do + value = nil + local byte_1, byte_2, byte_3, byte_4 + byte_1 = string.byte(text, i) + local sequence_length = + byte_1 <= 127 and 1 or -- 0xxxxxxx + byte_1 <= 223 and 2 or -- 110xxxxx 10xxxxxx + byte_1 <= 239 and 3 or -- 1110xxxx 10xxxxxx 10xxxxxx + byte_1 <= 247 and 4 or -- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + error("invalid UTF-8 sequence") + if sequence_length > 1 then + byte_2 = string.byte(text, i+1) + end + if sequence_length > 2 then + byte_3 = string.byte(text, i+2) + end + if sequence_length > 3 then + byte_4 = string.byte(text, i+3) + end + if 1 == sequence_length then + -- 0xxxxxxx + value = byte_1 + elseif 2 == sequence_length then + -- 110xxxxx 10xxxxxx + value = + (byte_1 % 64) * 64 + + (byte_2 % 64) + elseif 3 == sequence_length then + -- 1110xxxx 10xxxxxx 10xxxxxx + value = + (byte_1 % 32) * 4096 + + (byte_2 % 64) * 64 + + (byte_3 % 64) + elseif 4 == sequence_length then + -- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + value = + (byte_1 % 16) * 262144 + + (byte_2 % 64) * 4096 + + (byte_3 % 64) * 64 + + (byte_4 % 64) + end + + table.insert(result, value) + i = i + sequence_length + end + return result +end + +-- Test one codepoint for each byte length: +local codepoints = utf8.text_to_codepoints( + "wð♥𐍈" -- U+0077 U+00F0 U+2665 U+10348 +) +assert( + table.concat(codepoints, " ") == "119 240 9829 66376" +)