mirror of
https://github.com/appgurueu/modlib.git
synced 2024-12-22 05:12:28 +01:00
Move text.utf8 to utf8.char
This commit is contained in:
parent
341dd5851b
commit
825599f2d0
@ -5,7 +5,7 @@ local res, code = https.request"https://html.spec.whatwg.org/entities.json"
|
||||
assert(code == 200)
|
||||
local entity_map = {}
|
||||
for entity, chars in pairs(assert(modlib.json:read_string(res))) do
|
||||
entity_map[entity:sub(2, #entity - 1)] = table.concat(modlib.table.map(chars.codepoints, modlib.text.utf8))
|
||||
entity_map[entity:sub(2, #entity - 1)] = table.concat(modlib.table.map(chars.codepoints, modlib.utf8.char))
|
||||
end
|
||||
local entries = {}
|
||||
for entity, chars in pairs(entity_map) do
|
||||
|
1
init.lua
1
init.lua
@ -45,6 +45,7 @@ for _, file in pairs{
|
||||
"table",
|
||||
"vararg",
|
||||
"text",
|
||||
"utf8",
|
||||
"vector",
|
||||
"quaternion",
|
||||
"trie",
|
||||
|
4
json.lua
4
json.lua
@ -101,7 +101,7 @@ for i = 0, 5 do
|
||||
end
|
||||
|
||||
-- TODO SAX vs DOM
|
||||
local utf8 = modlib.text.utf8
|
||||
local utf8_char = modlib.utf8.char
|
||||
function read(self, read_)
|
||||
local index = 0
|
||||
local char
|
||||
@ -149,7 +149,7 @@ function read(self, read_)
|
||||
end
|
||||
end
|
||||
local function utf8_codepoint(codepoint)
|
||||
return syntax_assert(utf8(codepoint), "invalid codepoint")
|
||||
return syntax_assert(utf8_char(codepoint), "invalid codepoint")
|
||||
end
|
||||
local function string()
|
||||
local chars = {}
|
||||
|
21
text.lua
21
text.lua
@ -138,27 +138,6 @@ magic_charset = "[" .. table.concat(magic_charset) .. "]"
|
||||
|
||||
function escape_magic_chars(text) return text:gsub("(" .. magic_charset .. ")", "%%%1") end
|
||||
|
||||
function utf8(number)
|
||||
if number <= 0x007F then
|
||||
-- Single byte
|
||||
return string.char(number)
|
||||
end
|
||||
if number < 0x00A0 or number > 0x10FFFF then
|
||||
-- Out of range
|
||||
return
|
||||
end
|
||||
local result = ""
|
||||
local i = 0
|
||||
while true do
|
||||
local remainder = number % 64
|
||||
result = string.char(128 + remainder) .. result
|
||||
number = (number - remainder) / 64
|
||||
i = i + 1
|
||||
if number <= 2 ^ (8 - i - 2) then break end
|
||||
end
|
||||
return string.char(256 - 2 ^ (8 - i - 1) + number) .. result
|
||||
end
|
||||
|
||||
local keywords = modlib.table.set{"and", "break", "do", "else", "elseif", "end", "false", "for", "function", "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while"}
|
||||
keywords["goto"] = true -- Lua 5.2 (LuaJIT) support
|
||||
|
||||
|
25
utf8.lua
Normal file
25
utf8.lua
Normal file
@ -0,0 +1,25 @@
|
||||
local string_char = string.char
|
||||
|
||||
local utf8 = {}
|
||||
|
||||
function utf8.char(codepoint)
|
||||
if codepoint <= 0x007F then
|
||||
-- Single byte
|
||||
return string_char(codepoint)
|
||||
end
|
||||
if codepoint < 0x00A0 or codepoint > 0x10FFFF then
|
||||
-- Out of range
|
||||
return -- TODO (?) error instead
|
||||
end
|
||||
local result = ""
|
||||
local i = 0
|
||||
repeat
|
||||
local remainder = codepoint % 64
|
||||
result = string_char(128 + remainder) .. result
|
||||
codepoint = (codepoint - remainder) / 64
|
||||
i = i + 1
|
||||
until codepoint <= 2 ^ (8 - i - 2)
|
||||
return string_char(0x100 - 2 ^ (8 - i - 1) + codepoint) .. result
|
||||
end
|
||||
|
||||
return utf8
|
@ -6,8 +6,8 @@ local html = setmetatable({}, {__index = function(self, key)
|
||||
local function unescape(text)
|
||||
return text
|
||||
:gsub("&([A-Za-z]+);", named_entities) -- named
|
||||
:gsub("&#(%d+);", function(digits) return modlib.text.utf8(tonumber(digits)) end) -- decimal
|
||||
:gsub("&#x(%x+);", function(digits) return modlib.text.utf8(tonumber(digits, 16)) end) -- hex
|
||||
:gsub("&#(%d+);", function(digits) return modlib.utf8.char(tonumber(digits)) end) -- decimal
|
||||
:gsub("&#x(%x+);", function(digits) return modlib.utf8.char(tonumber(digits, 16)) end) -- hex
|
||||
end
|
||||
self.unescape = unescape
|
||||
return unescape
|
||||
|
Loading…
Reference in New Issue
Block a user