diff --git a/build/html_entities.lua b/build/html_entities.lua
index 57ebf04..60083f1 100644
--- a/build/html_entities.lua
+++ b/build/html_entities.lua
@@ -5,7 +5,7 @@ local res, code = https.request"https://html.spec.whatwg.org/entities.json"
assert(code == 200)
local entity_map = {}
for entity, chars in pairs(assert(modlib.json:read_string(res))) do
- entity_map[entity:sub(2, #entity - 1)] = table.concat(modlib.table.map(chars.codepoints, modlib.text.utf8))
+ entity_map[entity:sub(2, #entity - 1)] = table.concat(modlib.table.map(chars.codepoints, modlib.utf8.char))
end
local entries = {}
for entity, chars in pairs(entity_map) do
diff --git a/init.lua b/init.lua
index 755e9d2..c0109cf 100644
--- a/init.lua
+++ b/init.lua
@@ -45,6 +45,7 @@ for _, file in pairs{
"table",
"vararg",
"text",
+ "utf8",
"vector",
"quaternion",
"trie",
diff --git a/json.lua b/json.lua
index e4f9bb3..1168d07 100644
--- a/json.lua
+++ b/json.lua
@@ -101,7 +101,7 @@ for i = 0, 5 do
end
-- TODO SAX vs DOM
-local utf8 = modlib.text.utf8
+local utf8_char = modlib.utf8.char
function read(self, read_)
local index = 0
local char
@@ -149,7 +149,7 @@ function read(self, read_)
end
end
local function utf8_codepoint(codepoint)
- return syntax_assert(utf8(codepoint), "invalid codepoint")
+ return syntax_assert(utf8_char(codepoint), "invalid codepoint")
end
local function string()
local chars = {}
diff --git a/text.lua b/text.lua
index 5574c0f..6a1c859 100644
--- a/text.lua
+++ b/text.lua
@@ -138,27 +138,6 @@ magic_charset = "[" .. table.concat(magic_charset) .. "]"
function escape_magic_chars(text) return text:gsub("(" .. magic_charset .. ")", "%%%1") end
-function utf8(number)
- if number <= 0x007F then
- -- Single byte
- return string.char(number)
- end
- if number < 0x00A0 or number > 0x10FFFF then
- -- Out of range
- return
- end
- local result = ""
- local i = 0
- while true do
- local remainder = number % 64
- result = string.char(128 + remainder) .. result
- number = (number - remainder) / 64
- i = i + 1
- if number <= 2 ^ (8 - i - 2) then break end
- end
- return string.char(256 - 2 ^ (8 - i - 1) + number) .. result
-end
-
local keywords = modlib.table.set{"and", "break", "do", "else", "elseif", "end", "false", "for", "function", "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while"}
keywords["goto"] = true -- Lua 5.2 (LuaJIT) support
diff --git a/utf8.lua b/utf8.lua
new file mode 100644
index 0000000..061298f
--- /dev/null
+++ b/utf8.lua
@@ -0,0 +1,25 @@
+local string_char = string.char
+
+local utf8 = {}
+
+function utf8.char(codepoint)
+ if codepoint <= 0x007F then
+ -- Single byte
+ return string_char(codepoint)
+ end
+ if codepoint < 0x00A0 or codepoint > 0x10FFFF then
+ -- Out of range
+ return -- TODO (?) error instead
+ end
+ local result = ""
+ local i = 0
+ repeat
+ local remainder = codepoint % 64
+ result = string_char(128 + remainder) .. result
+ codepoint = (codepoint - remainder) / 64
+ i = i + 1
+ until codepoint <= 2 ^ (8 - i - 2)
+ return string_char(0x100 - 2 ^ (8 - i - 1) + codepoint) .. result
+end
+
+return utf8
\ No newline at end of file
diff --git a/web/html.lua b/web/html.lua
index f1b9aad..9b141d4 100644
--- a/web/html.lua
+++ b/web/html.lua
@@ -6,8 +6,8 @@ local html = setmetatable({}, {__index = function(self, key)
local function unescape(text)
return text
:gsub("&([A-Za-z]+);", named_entities) -- named
- :gsub("(%d+);", function(digits) return modlib.text.utf8(tonumber(digits)) end) -- decimal
- :gsub("(%x+);", function(digits) return modlib.text.utf8(tonumber(digits, 16)) end) -- hex
+ :gsub("(%d+);", function(digits) return modlib.utf8.char(tonumber(digits)) end) -- decimal
+ :gsub("(%x+);", function(digits) return modlib.utf8.char(tonumber(digits, 16)) end) -- hex
end
self.unescape = unescape
return unescape