Changed UTF8 routines and added char fallback mechanism

This commit is contained in:
Pierre-Yves Rollo 2018-11-01 10:47:39 +01:00
parent bb24d91645
commit 95c9da849d
2 changed files with 268 additions and 86 deletions

176
font_api/fallbacks.lua Normal file

@ -0,0 +1,176 @@
--[[
font_api mod for Minetest - Library to add font display capability
to display_api mod.
(c) Pierre-Yves Rollo
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
--]]
-- This is the unicode char fallback map. If a char is not present in
-- font, this maps indicates which char to try to use instead next.
return {
-- Lowercase chars
['a'] = 'A', ['b'] = 'B', ['c'] = 'C', ['d'] = 'D',
['e'] = 'E', ['f'] = 'F', ['g'] = 'G', ['h'] = 'H',
['i'] = 'I', ['j'] = 'J', ['k'] = 'K', ['l'] = 'L',
['m'] = 'M', ['n'] = 'N', ['o'] = 'O', ['p'] = 'P',
['q'] = 'Q', ['r'] = 'R', ['s'] = 'S', ['t'] = 'T',
['u'] = 'U', ['v'] = 'V', ['w'] = 'W', ['x'] = 'X',
['y'] = 'Y', ['z'] = 'Z',
-- Special
['¢'] = 'c', ['£'] = 'L', ['¥'] = 'Y', [''] = 'E',
['©'] = '(C)', ['®'] = '(R)', [''] = 'TM',
['ª'] = 'a', ['º'] = 'o',
['«'] = '"', ['»'] = '"', ['´'] = '\'',
['¹'] = '1', ['²'] = '2', ['³'] = '3',
['µ'] = 'u', ['¤'] = 'o',
['¼'] = '1/4', ['½'] = '1/2', ['¾'] = '3/4',
[''] = '1/8', [''] = '3/8', [''] = '5/8', [''] = '7/8',
['¿'] = '?',
-- Upper case accents
['À'] = 'A', ['Á'] = 'A', ['Â'] = 'A', ['Ã'] = 'A',
['Ä'] = 'A', ['Å'] = 'A',
['Æ'] = 'AE', ['Ç'] = 'C',
['È'] = 'E', ['É'] = 'E', ['Ê'] = 'E', ['Ë'] = 'E',
['Ì'] = 'I', ['Í'] = 'I', ['Î'] = 'I', ['Ï'] = 'I',
['Ð'] = 'D', ['Ñ'] = 'N',
['Ò'] = 'O', ['Ó'] = 'O', ['Ô'] = 'O', ['Õ'] = 'O',
['Ö'] = 'O', ['Ø'] = 'O',
['Ú'] = 'U', ['Ù'] = 'U', ['Û'] = 'U', ['Ü'] = 'U',
['×'] = 'x', ['Ý'] = 'Y',
-- Lower case accents
['à'] = 'a', ['à'] = 'a', ['á'] = 'a', ['â'] = 'a',
['ã'] = 'a', ['ä'] = 'a', ['å'] = 'a',
['æ'] = 'ae', ['ç'] = 'c',
['è'] = 'e', ['é'] = 'e', ['ê'] = 'e', ['ë'] = 'e',
['ì'] = 'i', ['í'] = 'i', ['î'] = 'i', ['ï'] = 'i',
['ð'] = 'd', ['ñ'] = 'n',
['ò'] = 'o', ['ó'] = 'o', ['ô'] = 'o', ['õ'] = 'o',
['ö'] = 'o', ['ø'] = 'o',
['ù'] = 'u', ['ú'] = 'u', ['û'] = 'u', ['ü'] = 'u',
['ý'] = 'y', ['ÿ'] = 'y',
-- Extended latin A
['Ā'] = 'A', ['ā'] = 'a', ['Ă'] = 'A', ['ă'] = 'a',
['Ą'] = 'A', ['ą'] = 'a', ['Ć'] = 'C', ['ć'] = 'c',
['Ĉ'] = 'C', ['ĉ'] = 'c', ['Ċ'] = 'C', ['ċ'] = 'c',
['Č'] = 'C', ['č'] = 'c', ['Ď'] = 'D', ['ď'] = 'd',
['Đ'] = 'D', ['đ'] = 'd', ['Ē'] = 'E', ['ē'] = 'e',
['Ĕ'] = 'E', ['ĕ'] = 'e', ['Ė'] = 'E', ['ė'] = 'e',
['Ę'] = 'E', ['ę'] = 'e', ['Ě'] = 'E', ['ě'] = 'e',
['Ĝ'] = 'G', ['Ğ'] = 'G', ['ğ'] = 'g', ['ĝ'] = 'g',
['Ġ'] = 'G', ['ġ'] = 'g', ['Ģ'] = 'G', ['ģ'] = 'g',
['Ĥ'] = 'H', ['ĥ'] = 'h', ['Ħ'] = 'H', ['ħ'] = 'h',
['Ĩ'] = 'I', ['ĩ'] = 'i', ['Ī'] = 'I', ['ī'] = 'i',
['Ĭ'] = 'I', ['ĭ'] = 'i', ['Į'] = 'I', ['į'] = 'i',
['ı'] = 'i', ['İ'] = 'I', ['IJ'] = 'IJ', ['ij'] = 'ij',
['Ĵ'] = 'J', ['ĵ'] = 'j', ['ķ'] = 'k', ['Ķ'] = 'K',
['ĸ'] = 'k',
['Ĺ'] = 'L', ['ĺ'] = 'l', ['Ļ'] = 'L', ['ļ'] = 'l',
['Ľ'] = 'L', ['ľ'] = 'l', ['Ŀ'] = 'L', ['ŀ'] = 'l',
['Ł'] = 'L', ['ł'] = 'l', ['Ń'] = 'N', ['ń'] = 'n',
['Ņ'] = 'N', ['ņ'] = 'n', ['Ň'] = 'N', ['ň'] = 'n',
['ʼn'] = 'n', ['Ŋ'] = 'n', ['ŋ'] = 'n',
['Ō'] = 'O', ['ō'] = 'o', ['Ŏ'] = 'O', ['ŏ'] = 'o',
['ő'] = 'o', ['Ő'] = 'O', ['œ'] = 'oe', ['Œ'] = 'OE',
['Ŕ'] = 'R', ['ŕ'] = 'r', ['Ŗ'] = 'R', ['ŗ'] = 'r',
['Ř'] = 'R', ['ř'] = 'r', ['Ś'] = 'S', ['ś'] = 's',
['Ŝ'] = 'S', ['ŝ'] = 's', ['Ş'] = 'S', ['ş'] = 's',
['Š'] = 'S', ['š'] = 's', ['Ţ'] = 'T', ['ţ'] = 't',
['ť'] = 't', ['Ŧ'] = 'T', ['Ť'] = 'T', ['ŧ'] = 't',
['Ũ'] = 'U', ['ũ'] = 'u', ['Ū'] = 'U', ['ū'] = 'u',
['Ŭ'] = 'U', ['ŭ'] = 'u', ['Ů'] = 'U', ['ů'] = 'u',
['Ű'] = 'U', ['ű'] = 'u', ['Ų'] = 'U', ['ų'] = 'u',
['Ŵ'] = 'W', ['ŵ'] = 'w', ['Ŷ'] = 'Y', ['ŷ'] = 'y',
['Ÿ'] = 'Y',
['Ź'] = 'Z', ['ź'] = 'z', ['Ż'] = 'Z', ['ż'] = 'z',
['Ž'] = 'Z', ['ž'] = 'z', ['ſ'] = 's',
-- Extended latin B
['ƀ'] = 'b', ['Ɓ'] = 'B', ['Ƃ'] = 'B', ['ƃ'] = 'b',
['Ɔ'] = 'O',
['Ƈ'] = 'C', ['ƈ'] = 'c', ['Ɖ'] = 'D', ['Ɗ'] = 'D',
['Ƌ'] = 'D', ['ƌ'] = 'd', ['Ǝ'] = 'E', ['Ə'] = 'e',
['Ɛ'] = 'E',
['Ƒ'] = 'F', ['ƒ'] = 'f', ['Ɠ'] = 'G',
['ƕ'] = 'hv', ['Ɨ'] = 'I', ['Ƙ'] = 'K', ['ƙ'] = 'k',
['ƚ'] = 'l', ['Ɯ'] = 'M', ['Ɲ'] = 'N', ['ƞ'] = 'n',
['Ɵ'] = 'O',
['Ơ'] = 'O', ['ơ'] = 'o', ['Ƣ'] = 'OI', ['ƣ'] = 'oi',
['Ƥ'] = 'P', ['ƥ'] = 'p', ['Ʀ'] = 'YR',
['Ƨ'] = 'S', ['ƨ'] = 's', ['ƫ'] = 't',
['Ƭ'] = 'T', ['ƭ'] = 't', ['Ʈ'] = 'T',
['Ư'] = 'U', ['ư'] = 'u', ['Ʋ'] = 'V',
['Ƴ'] = 'Y', ['ƴ'] = 'y', ['Ƶ'] = 'Z', ['ƶ'] = 'z',
['ƻ'] = '2', ['Ƽ'] = '5', ['ƽ'] = '5',
['DŽ'] = 'DZ', ['Dž'] = 'Dz', ['dž'] = 'dz',
['LJ'] = 'LJ', ['Lj'] = 'Lj', ['lj'] = 'lj',
['NJ'] = 'NJ', ['Nj'] = 'Nj', ['nj'] = 'nj',
['Ǎ'] = 'A', ['ǎ'] = 'a', ['Ǐ'] = 'I', ['ǐ'] = 'i',
['Ǒ'] = 'O', ['ǒ'] = 'o', ['Ǔ'] = 'U', ['ǔ'] = 'u',
['Ǖ'] = 'U', ['ǖ'] = 'u', ['Ǘ'] = 'U', ['ǘ'] = 'u',
['Ǚ'] = 'U', ['ǚ'] = 'u', ['Ǜ'] = 'U', ['ǜ'] = 'u',
['ǝ'] = 'e',
['Ǟ'] = 'A', ['ǟ'] = 'a', ['Ǡ'] = 'A', ['ǡ'] = 'a',
['Ǣ'] = 'Æ', ['ǣ'] = 'æ', ['Ǥ'] = 'G', ['ǥ'] = 'g',
['Ǧ'] = 'G', ['ǧ'] = 'g', ['Ǩ'] = 'K', ['ǩ'] = 'k',
['Ǫ'] = 'Q', ['ǫ'] = 'q', ['Ǭ'] = 'Q', ['ǭ'] = 'q',
['ǰ'] = 'J',
['DZ'] = 'DZ', ['Dz'] = 'Dz', ['dz'] = 'dz',
['Ǵ'] = 'G', ['ǵ'] = 'g', ['Ƕ'] = 'H',
['Ǹ'] = 'N', ['ǹ'] = 'n', ['Ǻ'] = 'A', ['ǻ'] = 'a',
['Ǽ'] = 'Æ', ['ǽ'] = 'æ', ['Ǿ'] = 'Ø', ['ǿ'] = 'ø',
['Ȁ'] = 'A', ['ȁ'] = 'a', ['Ȃ'] = 'A', ['ȃ'] = 'a',
['Ȅ'] = 'E', ['ȅ'] = 'e', ['Ȇ'] = 'E', ['ȇ'] = 'e',
['Ȉ'] = 'I', ['ȉ'] = 'i', ['Ȋ'] = 'I', ['ȋ'] = 'i',
['Ȍ'] = 'O', ['ȍ'] = 'o', ['Ȏ'] = 'O', ['ȏ'] = 'o',
['Ȑ'] = 'R', ['ȑ'] = 'r', ['Ȓ'] = 'R', ['ȓ'] = 'r',
['Ȕ'] = 'U', ['ȕ'] = 'u', ['Ȗ'] = 'U', ['ȗ'] = 'u',
['Ș'] = 'S', ['ș'] = 's', ['Ț'] = 'T', ['ț'] = 't',
['Ȟ'] = 'H', ['ȟ'] = 'h', ['Ƞ'] = 'N',
['ȡ'] = 'd',
['Ȣ'] = 'OU', ['ȣ'] = 'ou', ['Ȥ'] = 'Z', ['ȥ'] = 'z',
['Ȧ'] = 'A', ['ȧ'] = 'a', ['Ȩ'] = 'E', ['ȩ'] = 'e',
['Ȫ'] = 'O', ['ȫ'] = 'o', ['Ȭ'] = 'O', ['ȭ'] = 'o',
['Ȯ'] = 'O', ['ȯ'] = 'o', ['Ȱ'] = 'O', ['ȱ'] = 'o',
['Ȳ'] = 'Y', ['ȳ'] = 'y', ['ȴ'] = 'l',
['ȵ'] = 'n', ['ȶ'] = 't', ['ȷ'] = 'j',
['ȸ'] = 'db', ['ȹ'] = 'qp', ['Ⱥ'] = 'A',
['Ȼ'] = 'C', ['ȼ'] = 'c', ['Ƚ'] = 'L',
['Ⱦ'] = 'T', ['ȿ'] = 's', ['ɀ'] = 'z',
['Ƀ'] = 'B', ['Ʉ'] = 'U', ['Ʌ'] = 'V',
['Ɇ'] = 'E', ['ɇ'] = 'e', ['Ɉ'] = 'J', ['ɉ'] = 'j',
['Ɋ'] = 'Q', ['ɋ'] = 'q', ['Ɍ'] = 'R', ['ɍ'] = 'r',
['Ɏ'] = 'Y', ['ɏ'] = 'y', ['ɐ'] = 'a',
['ɓ'] = 'b', ['ɔ'] = 'o',
['ɕ'] = 'c', ['ɖ'] = 'd', ['ɗ'] = 'd',
['ɘ'] = 'e', ['ə'] = 'e', ['ɚ'] = 'e',
['ɛ'] = 'e', ['ɜ'] = 'e', ['ɝ'] = 'e', ['ɞ'] = 'e',
['ɟ'] = 'j',
['ɠ'] = 'g', ['ɡ'] = 'g', ['ɢ'] = 'G',
['ɥ'] = 'h', ['ɦ'] = 'h', ['ɧ'] = 'h',
['ɨ'] = 'i', ['ɪ'] = 'I',
['ɫ'] = 'l', ['ɬ'] = 'l', ['ɭ'] = 'l',
['ɮ'] = 'lz',
['ɯ'] = 'm', ['ɰ'] = 'm', ['ɱ'] = 'm',
['ɲ'] = 'n', ['ɳ'] = 'n', ['ɴ'] = 'N',
['ɵ'] = 'o', ['ɶ'] = 'Œ',
['ɹ'] = 'r', ['ɺ'] = 'r', ['ɻ'] = 'r',
['ɼ'] = 'r', ['ɽ'] = 'r', ['ɾ'] = 'r', ['ɿ'] = 'r',
}

@ -17,59 +17,44 @@
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
--]] --]]
-- Fallback table
local fallbacks = dofile(font_api.path.."/fallbacks.lua")
-- Local functions -- Local functions
------------------ ------------------
-- Table deep copy -- Returns number of UTF8 bytes of the first char of the string
local function get_char_bytes(str)
local function deep_copy(input) local msb = str:byte(1)
local output = {} if msb ~= nil then
local key, value if msb < 0x80 then return 1 end
for key, value in pairs(input) do if msb >= 0xF0 then return 4 end
if type(value) == 'table' then if msb >= 0xE0 then return 3 end
output[key] = deep_copy(value) if msb >= 0xC2 then return 2 end
else
output[key] = value
end
end end
return output
end end
-- Returns next char, managing ascii and unicode plane 0 (0000-FFFF). -- Returns the unicode codepoint of the first char of the string
local function char_to_codepoint(str)
local function get_next_char(text, pos) local bytes = get_char_bytes(str)
if bytes == 1 then
local msb = text:byte(pos) return str:byte(1)
-- 1 byte char, ascii equivalent codepoints elseif bytes == 2 then
if msb < 0x80 then return (str:byte(1) - 0xC2) * 0x40
return msb, pos + 1 + str:byte(2)
elseif bytes == 3 then
return (str:byte(1) - 0xE0) * 0x1000
+ str:byte(2) % 0x40 * 0x40
+ str:byte(3) % 0x40
elseif bytes == 4 then -- Not tested
return (str:byte(1) - 0xF0) * 0x40000
+ str:byte(2) % 0x40 * 0x1000
+ str:byte(3) % 0x40 * 0x40
+ str:byte(4) % 0x40
end end
-- 4 bytes char not managed (Only 16 bits codepoints are managed)
if msb >= 0xF0 then
return 0, pos + 4
end
-- 3 bytes char
if msb >= 0xE0 then
return (msb - 0xE0) * 0x1000
+ text:byte(pos + 1) % 0x40 * 0x40
+ text:byte(pos + 2) % 0x40,
pos + 3
end
-- 2 bytes char (little endian)
if msb >= 0xC2 then
return (msb - 0xC2) * 0x40 + text:byte(pos + 1),
pos + 2
end
-- Not an UTF char
return 0, pos + 1
end end
-- Split multiline text into array of lines, with <maxlines> maximum lines. -- Split multiline text into array of lines, with <maxlines> maximum lines.
local function split_lines(text, maxlines) local function split_lines(text, maxlines)
local splits = text:split("\n") local splits = text:split("\n")
if maxlines then if maxlines then
@ -86,42 +71,75 @@ end
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
--- Font class --- Font class
font_api.Font = {} local Font = {}
font_api.Font = Font
function font_api.Font:new(def) function Font:new(def)
if type(def) ~= "table" then if type(def) ~= "table" then
minetest.log("error", "Font definition must be a table.") minetest.log("error",
"[font_api] Font definition must be a table.")
return nil return nil
end end
if def.height == nil or def.height <= 0 then if def.height == nil or def.height <= 0 then
minetest.log("error", "Font definition must have a positive height.") minetest.log("error",
"[font_api] Font definition must have a positive height.")
return nil return nil
end end
if type(def.widths) ~= "table" then if type(def.widths) ~= "table" then
minetest.log("error", "Font definition must have a widths array.") minetest.log("error",
"[font_api] Font definition must have a widths array.")
return nil return nil
end end
if def.widths[0] == nil then if def.widths[0] == nil then
minetest.log("error", minetest.log("error",
"Font must have a char with codepoint 0 (=unknown char).") "[font_api] Font must have a char with codepoint 0 (=unknown char).")
return nil return nil
end end
local font = deep_copy(def) local font = table.copy(def)
setmetatable(font, self) setmetatable(font, self)
self.__index = self self.__index = self
return font return font
end end
--- Gets the next char of a text
-- @return Codepoint of first char,
-- @return Remaining string without this first char
function Font:get_next_char(text)
local bytes = get_char_bytes(text)
if bytes == nil then
minetest.log("warning",
"[font_api] Encountered a non UTF char, not displaying text.")
return nil, ''
end
local codepoint = char_to_codepoint(text)
-- Fallback mechanism
if self.widths[codepoint] == nil then
local char = text:sub(1, bytes)
if fallbacks[char] then
return self:get_next_char(fallbacks[char]..text:sub(bytes+1))
else
return 0, text:sub(bytes+1) -- Ultimate fallback
end
else
return codepoint, text:sub(bytes+1)
end
end
--- Returns the width of a given char --- Returns the width of a given char
-- @param char : codepoint of the char -- @param char : codepoint of the char
-- @return Char width -- @return Char width
function font_api.Font:get_char_width(char) function Font:get_char_width(char)
-- Replace chars with no texture by the NULL(0) char -- Replace chars with no texture by the NULL(0) char
if self.widths[char] ~= nil then if self.widths[char] ~= nil then
return self.widths[char] return self.widths[char]
@ -134,7 +152,7 @@ end
-- @param nb_of_lines : number of text lines (default 1) -- @param nb_of_lines : number of text lines (default 1)
-- @return Text height -- @return Text height
function font_api.Font:get_height(nb_of_lines) function Font:get_height(nb_of_lines)
if nb_of_lines == nil then nb_of_lines = 1 end if nb_of_lines == nil then nb_of_lines = 1 end
if nb_of_lines > 0 then if nb_of_lines > 0 then
@ -154,16 +172,14 @@ end
-- @param line Line of text which the width will be computed. -- @param line Line of text which the width will be computed.
-- @return Text width -- @return Text width
function font_api.Font:get_width(line) function Font:get_width(line)
local codepoint
local char
local width = 0 local width = 0
local pos = 1 line = line or ''
-- TODO: Use iterator while line ~= "" do
while pos <= #line do codepoint, line = self:get_next_char(line)
char, pos = get_next_char(line, pos) width = width + self:get_char_width(codepoint)
width = width + self:get_char_width(char)
end end
return width return width
@ -176,30 +192,21 @@ end
-- @param y Vertical position of the line in texture -- @param y Vertical position of the line in texture
-- @return Texture string -- @return Texture string
function font_api.Font:make_line_texture(line, texturew, x, y) function Font:make_line_texture(line, texturew, x, y)
local codepoint
local texture = "" local texture = ""
local char line = line or ''
local pos = 1
-- TODO: Use iterator while line ~= '' do
while pos <= #line do codepoint, line = self:get_next_char(line)
char, pos = get_next_char(line, pos)
-- Replace chars with no texture by the NULL(0) char
if self.widths[char] == nil
then
print(string.format("["..font_api.name
.."] Missing char %d (%04x)",char,char))
char = 0
end
-- Add image only if it is visible (at least partly) -- Add image only if it is visible (at least partly)
if x + self.widths[char] >= 0 and x <= texturew then if x + self.widths[codepoint] >= 0 and x <= texturew then
texture = texture.. texture = texture..
string.format(":%d,%d=font_%s_%04x.png", string.format(":%d,%d=font_%s_%04x.png",
x, y, self.name, char) x, y, self.name, codepoint)
end end
x = x + self.widths[char] x = x + self.widths[codepoint]
end end
return texture return texture
@ -215,7 +222,7 @@ end
-- @param color Color of the text (optional) -- @param color Color of the text (optional)
-- @return Texture string -- @return Texture string
function font_api.Font:make_text_texture(text, texturew, textureh, maxlines, function Font:make_text_texture(text, texturew, textureh, maxlines,
halign, valign, color) halign, valign, color)
local texture = "" local texture = ""
local lines = {} local lines = {}
@ -263,4 +270,3 @@ function font_api.Font:make_text_texture(text, texturew, textureh, maxlines,
if color then texture = texture.."^[colorize:"..color end if color then texture = texture.."^[colorize:"..color end
return texture return texture
end end