mirror of
https://github.com/appgurueu/modlib.git
synced 2024-11-22 07:13:45 +01:00
Move text.utf8 to utf8.char
This commit is contained in:
parent
341dd5851b
commit
825599f2d0
@ -5,7 +5,7 @@ local res, code = https.request"https://html.spec.whatwg.org/entities.json"
|
|||||||
assert(code == 200)
|
assert(code == 200)
|
||||||
local entity_map = {}
|
local entity_map = {}
|
||||||
for entity, chars in pairs(assert(modlib.json:read_string(res))) do
|
for entity, chars in pairs(assert(modlib.json:read_string(res))) do
|
||||||
entity_map[entity:sub(2, #entity - 1)] = table.concat(modlib.table.map(chars.codepoints, modlib.text.utf8))
|
entity_map[entity:sub(2, #entity - 1)] = table.concat(modlib.table.map(chars.codepoints, modlib.utf8.char))
|
||||||
end
|
end
|
||||||
local entries = {}
|
local entries = {}
|
||||||
for entity, chars in pairs(entity_map) do
|
for entity, chars in pairs(entity_map) do
|
||||||
|
1
init.lua
1
init.lua
@ -45,6 +45,7 @@ for _, file in pairs{
|
|||||||
"table",
|
"table",
|
||||||
"vararg",
|
"vararg",
|
||||||
"text",
|
"text",
|
||||||
|
"utf8",
|
||||||
"vector",
|
"vector",
|
||||||
"quaternion",
|
"quaternion",
|
||||||
"trie",
|
"trie",
|
||||||
|
4
json.lua
4
json.lua
@ -101,7 +101,7 @@ for i = 0, 5 do
|
|||||||
end
|
end
|
||||||
|
|
||||||
-- TODO SAX vs DOM
|
-- TODO SAX vs DOM
|
||||||
local utf8 = modlib.text.utf8
|
local utf8_char = modlib.utf8.char
|
||||||
function read(self, read_)
|
function read(self, read_)
|
||||||
local index = 0
|
local index = 0
|
||||||
local char
|
local char
|
||||||
@ -149,7 +149,7 @@ function read(self, read_)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
local function utf8_codepoint(codepoint)
|
local function utf8_codepoint(codepoint)
|
||||||
return syntax_assert(utf8(codepoint), "invalid codepoint")
|
return syntax_assert(utf8_char(codepoint), "invalid codepoint")
|
||||||
end
|
end
|
||||||
local function string()
|
local function string()
|
||||||
local chars = {}
|
local chars = {}
|
||||||
|
21
text.lua
21
text.lua
@ -138,27 +138,6 @@ magic_charset = "[" .. table.concat(magic_charset) .. "]"
|
|||||||
|
|
||||||
function escape_magic_chars(text) return text:gsub("(" .. magic_charset .. ")", "%%%1") end
|
function escape_magic_chars(text) return text:gsub("(" .. magic_charset .. ")", "%%%1") end
|
||||||
|
|
||||||
function utf8(number)
|
|
||||||
if number <= 0x007F then
|
|
||||||
-- Single byte
|
|
||||||
return string.char(number)
|
|
||||||
end
|
|
||||||
if number < 0x00A0 or number > 0x10FFFF then
|
|
||||||
-- Out of range
|
|
||||||
return
|
|
||||||
end
|
|
||||||
local result = ""
|
|
||||||
local i = 0
|
|
||||||
while true do
|
|
||||||
local remainder = number % 64
|
|
||||||
result = string.char(128 + remainder) .. result
|
|
||||||
number = (number - remainder) / 64
|
|
||||||
i = i + 1
|
|
||||||
if number <= 2 ^ (8 - i - 2) then break end
|
|
||||||
end
|
|
||||||
return string.char(256 - 2 ^ (8 - i - 1) + number) .. result
|
|
||||||
end
|
|
||||||
|
|
||||||
local keywords = modlib.table.set{"and", "break", "do", "else", "elseif", "end", "false", "for", "function", "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while"}
|
local keywords = modlib.table.set{"and", "break", "do", "else", "elseif", "end", "false", "for", "function", "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while"}
|
||||||
keywords["goto"] = true -- Lua 5.2 (LuaJIT) support
|
keywords["goto"] = true -- Lua 5.2 (LuaJIT) support
|
||||||
|
|
||||||
|
25
utf8.lua
Normal file
25
utf8.lua
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
local string_char = string.char
|
||||||
|
|
||||||
|
local utf8 = {}
|
||||||
|
|
||||||
|
function utf8.char(codepoint)
|
||||||
|
if codepoint <= 0x007F then
|
||||||
|
-- Single byte
|
||||||
|
return string_char(codepoint)
|
||||||
|
end
|
||||||
|
if codepoint < 0x00A0 or codepoint > 0x10FFFF then
|
||||||
|
-- Out of range
|
||||||
|
return -- TODO (?) error instead
|
||||||
|
end
|
||||||
|
local result = ""
|
||||||
|
local i = 0
|
||||||
|
repeat
|
||||||
|
local remainder = codepoint % 64
|
||||||
|
result = string_char(128 + remainder) .. result
|
||||||
|
codepoint = (codepoint - remainder) / 64
|
||||||
|
i = i + 1
|
||||||
|
until codepoint <= 2 ^ (8 - i - 2)
|
||||||
|
return string_char(0x100 - 2 ^ (8 - i - 1) + codepoint) .. result
|
||||||
|
end
|
||||||
|
|
||||||
|
return utf8
|
@ -6,8 +6,8 @@ local html = setmetatable({}, {__index = function(self, key)
|
|||||||
local function unescape(text)
|
local function unescape(text)
|
||||||
return text
|
return text
|
||||||
:gsub("&([A-Za-z]+);", named_entities) -- named
|
:gsub("&([A-Za-z]+);", named_entities) -- named
|
||||||
:gsub("&#(%d+);", function(digits) return modlib.text.utf8(tonumber(digits)) end) -- decimal
|
:gsub("&#(%d+);", function(digits) return modlib.utf8.char(tonumber(digits)) end) -- decimal
|
||||||
:gsub("&#x(%x+);", function(digits) return modlib.text.utf8(tonumber(digits, 16)) end) -- hex
|
:gsub("&#x(%x+);", function(digits) return modlib.utf8.char(tonumber(digits, 16)) end) -- hex
|
||||||
end
|
end
|
||||||
self.unescape = unescape
|
self.unescape = unescape
|
||||||
return unescape
|
return unescape
|
||||||
|
Loading…
Reference in New Issue
Block a user