modlib/json.lua

403 lines
10 KiB
Lua
Raw Permalink Normal View History

2021-09-07 20:52:42 +02:00
local modlib, setmetatable, pairs, assert, error, table, table_insert, table_concat, tonumber, tostring, math_huge, string, type, next
= modlib, setmetatable, pairs, assert, error, table, table.insert, table.concat, tonumber, tostring, math.huge, string, type, next
local _ENV = {}
setfenv(1, _ENV)
--! experimental
-- See https://tools.ietf.org/id/draft-ietf-json-rfc4627bis-09.html#unichars and https://json.org
2021-09-07 20:52:42 +02:00
-- Null
-- TODO consider using userdata (for ex. by using newproxy)
do
local metatable = {}
-- eq is not among the metamethods, len won't work on 5.1
for _, metamethod in pairs{"add", "sub", "mul", "div", "mod", "pow", "unm", "concat", "len", "lt", "le", "index", "newindex", "call"} do
metatable["__" .. metamethod] = function() return error("attempt to " .. metamethod .. " a null value") end
end
null = setmetatable({}, metatable)
end
local metatable = {__index = self}
_ENV.metatable = metatable
function new(self)
return setmetatable(self, metatable)
end
local whitespace = modlib.table.set{"\t", "\r", "\n", " "}
local decoding_escapes = {
['"'] = '"',
["\\"] = "\\",
["/"] = "/",
b = "\b",
f = "\f",
n = "\n",
r = "\r",
t = "\t"
-- TODO is this complete?
}
-- Set up a DFA for number syntax validations
local number_dfa
2022-02-27 17:40:11 +01:00
do -- as a RegEx: (0|(1-9)(0-9)*)[.(0-9)+[(e|E)[+|-](0-9)+]]; does not need to handle the first sign
2021-09-07 20:52:42 +02:00
-- TODO proper DFA utilities
local function set_transitions(state, transitions)
for chars, next_state in pairs(transitions) do
for char in chars:gmatch"." do
state[char] = next_state
end
end
end
local onenine = "123456789"
local digit = "0" .. onenine
local e = "eE"
local exponent = {final = true}
set_transitions(exponent, {
[digit] = exponent
})
local pre_exponent = {expected = "exponent"}
set_transitions(pre_exponent, {
[digit] = exponent
})
local exponent_sign = {expected = "exponent"}
set_transitions(exponent_sign, {
[digit] = exponent,
["+"] = exponent,
["-"] = exponent
})
local fraction_final = {final = true}
set_transitions(fraction_final, {
[digit] = fraction_final,
[e] = exponent_sign
})
local fraction = {expected = "fraction"}
set_transitions(fraction, {
[digit] = fraction_final
})
local integer = {final = true}
set_transitions(integer, {
[digit] = integer,
[e] = exponent_sign,
["."] = fraction
})
local zero = {final = true}
set_transitions(zero, {
["."] = fraction
})
number_dfa = {}
set_transitions(number_dfa, {
[onenine] = integer,
["0"] = zero
})
end
local hex_digit_values = {}
for i = 0, 9 do
hex_digit_values[tostring(i)] = i
end
for i = 0, 5 do
hex_digit_values[string.char(("a"):byte() + i)] = 10 + i
hex_digit_values[string.char(("A"):byte() + i)] = 10 + i
end
-- TODO SAX vs DOM
local utf8 = modlib.text.utf8
2021-09-07 20:52:42 +02:00
function read(self, read_)
local index = 0
local char
-- TODO support read functions which provide additional debug output (such as row:column)
local function read()
index = index + 1
char = read_()
return char
end
local function syntax_error(errmsg)
-- TODO ensure the index isn't off
error("syntax error: " .. index .. ": " .. errmsg)
end
local function syntax_assert(value, errmsg)
if not value then
syntax_error(errmsg or "assertion failed!")
end
return value
end
local function skip_whitespace()
while whitespace[char] do
read()
end
end
-- Forward declaration
local value
local function number()
local state = number_dfa
local num = {}
while true do
-- Will work for nil too
local next_state = state[char]
if not next_state then
if not state.final then
if state == number_dfa then
syntax_error"expected a number"
end
syntax_error("invalid number: expected " .. state.expected)
end
return assert(tonumber(table_concat(num)))
end
table_insert(num, char)
state = next_state
read()
end
end
local function utf8_codepoint(codepoint)
return syntax_assert(utf8(codepoint), "invalid codepoint")
end
2021-09-07 20:52:42 +02:00
local function string()
local chars = {}
local high_surrogate
2021-09-07 20:52:42 +02:00
while true do
local string_char, next_high_surrogate
2021-09-07 20:52:42 +02:00
if char == '"' then
if high_surrogate then
table_insert(chars, utf8_codepoint(high_surrogate))
end
2021-09-07 20:52:42 +02:00
return table_concat(chars)
end
if char == "\\" then
read()
if char == "u" then
local codepoint = 0
2021-09-07 20:52:42 +02:00
for i = 3, 0, -1 do
codepoint = syntax_assert(hex_digit_values[read()], "expected a hex digit") * (16 ^ i) + codepoint
end
if high_surrogate and codepoint >= 0xDC00 and codepoint <= 0xDFFF then
-- TODO strict mode: throw an error for single surrogates
codepoint = 0x10000 + (high_surrogate - 0xD800) * 0x400 + codepoint - 0xDC00
-- Don't write the high surrogate
high_surrogate = nil
end
if codepoint >= 0xD800 and codepoint <= 0xDBFF then
next_high_surrogate = codepoint
else
string_char = utf8_codepoint(codepoint)
2021-09-07 20:52:42 +02:00
end
else
string_char = syntax_assert(decoding_escapes[char], "invalid escape sequence")
2021-09-07 20:52:42 +02:00
end
else
-- TODO check whether the character is one that must be escaped ("strict" mode)
string_char = syntax_assert(char, "unclosed string")
end
if high_surrogate then
table_insert(chars, utf8_codepoint(high_surrogate))
end
high_surrogate = next_high_surrogate
if string_char then
table_insert(chars, string_char)
2021-09-07 20:52:42 +02:00
end
read()
end
end
local element
local funcs = {
['-'] = function()
return -number()
end,
['"'] = string,
["{"] = function()
local dict = {}
skip_whitespace()
if char == "}" then return dict end
while true do
syntax_assert(char == '"', "key expected")
read()
local key = string()
read()
skip_whitespace()
syntax_assert(char == ":", "colon expected, got " .. char)
local val = element()
dict[key] = val
if char == "}" then return dict end
syntax_assert(char == ",", "comma expected")
read()
skip_whitespace()
end
end,
["["] = function()
local list = {}
skip_whitespace()
if char == "]" then return list end
while true do
table_insert(list, value())
skip_whitespace()
if char == "]" then return list end
syntax_assert(char == ",", "comma expected")
read()
skip_whitespace()
end
end,
}
local function expect_word(word, value)
local msg = word .. " expected"
funcs[word:sub(1, 1)] = function()
syntax_assert(char == word:sub(2, 2), msg)
for i = 3, #word do
read()
syntax_assert(char == word:sub(i, i), msg)
end
return value
end
end
expect_word("true", true)
expect_word("false", false)
expect_word("null", self.null)
function value()
syntax_assert(char, "value expected")
local func = funcs[char]
if func then
-- Advance after first char
read()
local val = func()
-- Advance after last char
read()
return val
end
if char >= "0" and char <= "9" then
return number()
end
syntax_error"value expected"
end
function element()
read()
skip_whitespace()
local val = value()
skip_whitespace()
return val
end
-- TODO consider asserting EOF as read() == nil, perhaps controlled by a parameter
return element()
end
local encoding_escapes = modlib.table.flip(decoding_escapes)
-- Solidus does not need to be escaped
encoding_escapes["/"] = nil
-- Control characters. Note: U+0080 to U+009F and U+007F are not considered control characters.
for byte = 0, 0x1F do
encoding_escapes[string.char(byte)] = string.format("u%04X", byte)
end
modlib.table.map(encoding_escapes, function(str) return "\\" .. str end)
local function escape(str)
return str:gsub(".", encoding_escapes)
end
function write(self, value, write)
local null = self.null
local written_strings = self.cache_escaped_strings and setmetatable({}, {__index = function(self, str)
local escaped_str = escape(str)
self[str] = escaped_str
return escaped_str
end})
local function string(str)
write'"'
write(written_strings and written_strings[str] or escape(str))
return write'"'
end
local dump
local function write_kv(key, value)
assert(type(key) == "string", "not a dictionary")
string(key)
write":"
dump(value)
end
function dump(value)
if value == null then
-- TODO improve null check (checking for equality doesn't allow using nan as null, for instance)
return write"null"
end
if value == true then
return write"true"
end
if value == false then
return write"false"
end
local type_ = type(value)
if type_ == "number" then
assert(value == value, "unsupported number value: nan")
assert(value ~= math_huge, "unsupported number value: inf")
assert(value ~= -math_huge, "unsupported number value: -inf")
return write(("%.17g"):format(value))
end
if type_ == "string" then
return string(value)
end
if type_ == "table" then
local table = value
local len = #table
if len == 0 then
local first, value = next(table)
write"{"
if first ~= nil then
write_kv(first, value)
end
for key, value in next, table, first do
write","
write_kv(key, value)
end
write"}"
else
assert(modlib.table.count(table) == len, "mixed list & hash part")
write"["
for i = 1, len - 1 do
dump(table[i])
write","
end
dump(table[len])
write"]"
end
return
end
error("unsupported type: " .. type_)
end
dump(value)
end
-- TODO get rid of this paste of write_file and write_string (see modlib.luon)
function write_file(self, value, file)
return self:write(value, function(text)
file:write(text)
end)
end
function write_string(self, value)
local rope = {}
self:write(value, function(text)
table_insert(rope, text)
end)
return table_concat(rope)
end
-- TODO read_path (for other serializers too)
function read_file(self, file)
local value = self:read(function()
return file:read(1)
end)
-- TODO consider file:close()
return value
end
function read_string(self, string)
-- TODO move the string -> one char read func pattern to modlib.text
local index = 0
local value = self:read(function()
index = index + 1
if index > #string then
return
end
return string:sub(index, index)
end)
-- We just expect EOF for strings
assert(index > #string, "EOF expected")
return value
end
return _ENV