Make utf8.char variadic, (re)move checks

This commit is contained in:
Lars Mueller 2022-10-01 18:45:12 +02:00
parent 4d6e5fdfb0
commit 4677b2c827

@ -1,15 +1,15 @@
local string_char = string.char local string_char, table_concat = string.char, table.concat
local utf8 = {} local utf8 = {}
function utf8.char(codepoint) function utf8.is_valid_codepoint(codepoint)
if codepoint <= 0x007F then -- Must be in bounds & must not be a surrogate
-- Single byte return codepoint <= 0x10FFFF and (codepoint < 0xD800 or codepoint > 0xDFFF)
return string_char(codepoint) end
end
if codepoint < 0x00A0 or codepoint > 0x10FFFF then local function utf8_char(codepoint)
-- Out of range if codepoint <= 0x007F then -- single byte
return -- TODO (?) error instead return string_char(codepoint) -- UTF-8 encoded string
end end
local result = "" local result = ""
local i = 0 local i = 0
@ -19,7 +19,18 @@ function utf8.char(codepoint)
codepoint = (codepoint - remainder) / 64 codepoint = (codepoint - remainder) / 64
i = i + 1 i = i + 1
until codepoint <= 2 ^ (8 - i - 2) until codepoint <= 2 ^ (8 - i - 2)
return string_char(0x100 - 2 ^ (8 - i - 1) + codepoint) .. result
return string_char(0x100 - 2 ^ (8 - i - 1) + codepoint) .. result -- UTF-8 encoded string
end
function utf8.char(...)
local n_args = select("#", ...)
if n_args == 1 then return utf8_char(...) end
local chars = {}
for i = 1, n_args do
chars[i] = utf8_char(select(i, ...))
end
return table_concat(chars)
end end
return utf8 return utf8