diff --git a/utf8.lua b/utf8.lua index 061298f..3b0f3c3 100644 --- a/utf8.lua +++ b/utf8.lua @@ -1,15 +1,15 @@ -local string_char = string.char +local string_char, table_concat = string.char, table.concat local utf8 = {} -function utf8.char(codepoint) - if codepoint <= 0x007F then - -- Single byte - return string_char(codepoint) - end - if codepoint < 0x00A0 or codepoint > 0x10FFFF then - -- Out of range - return -- TODO (?) error instead +function utf8.is_valid_codepoint(codepoint) + -- Must be in bounds & must not be a surrogate + return codepoint <= 0x10FFFF and (codepoint < 0xD800 or codepoint > 0xDFFF) +end + +local function utf8_char(codepoint) + if codepoint <= 0x007F then -- single byte + return string_char(codepoint) -- UTF-8 encoded string end local result = "" local i = 0 @@ -19,7 +19,18 @@ function utf8.char(codepoint) codepoint = (codepoint - remainder) / 64 i = i + 1 until codepoint <= 2 ^ (8 - i - 2) - return string_char(0x100 - 2 ^ (8 - i - 1) + codepoint) .. result + + return string_char(0x100 - 2 ^ (8 - i - 1) + codepoint) .. result -- UTF-8 encoded string +end + +function utf8.char(...) + local n_args = select("#", ...) + if n_args == 1 then return utf8_char(...) end + local chars = {} + for i = 1, n_args do + chars[i] = utf8_char(select(i, ...)) + end + return table_concat(chars) end return utf8 \ No newline at end of file