Add experimental JSON module

This commit is contained in:
Lars Mueller 2021-09-07 20:52:42 +02:00
parent 9484b78eef
commit 41feceb003
2 changed files with 394 additions and 5 deletions

377
json.lua Normal file

@ -0,0 +1,377 @@
local modlib, setmetatable, pairs, assert, error, table, table_insert, table_concat, tonumber, tostring, math_huge, string, type, next
= modlib, setmetatable, pairs, assert, error, table, table.insert, table.concat, tonumber, tostring, math.huge, string, type, next
local _ENV = {}
setfenv(1, _ENV)
--! experimental
-- Null
-- TODO consider using userdata (for ex. by using newproxy)
do
local metatable = {}
-- eq is not among the metamethods, len won't work on 5.1
for _, metamethod in pairs{"add", "sub", "mul", "div", "mod", "pow", "unm", "concat", "len", "lt", "le", "index", "newindex", "call"} do
metatable["__" .. metamethod] = function() return error("attempt to " .. metamethod .. " a null value") end
end
null = setmetatable({}, metatable)
end
local metatable = {__index = self}
_ENV.metatable = metatable
function new(self)
return setmetatable(self, metatable)
end
local whitespace = modlib.table.set{"\t", "\r", "\n", " "}
local decoding_escapes = {
['"'] = '"',
["\\"] = "\\",
["/"] = "/",
b = "\b",
f = "\f",
n = "\n",
r = "\r",
t = "\t"
-- TODO is this complete?
}
-- Set up a DFA for number syntax validations
local number_dfa
do
-- TODO proper DFA utilities
local function set_transitions(state, transitions)
for chars, next_state in pairs(transitions) do
for char in chars:gmatch"." do
state[char] = next_state
end
end
end
local onenine = "123456789"
local digit = "0" .. onenine
local e = "eE"
local exponent = {final = true}
set_transitions(exponent, {
[digit] = exponent
})
local pre_exponent = {expected = "exponent"}
set_transitions(pre_exponent, {
[digit] = exponent
})
local exponent_sign = {expected = "exponent"}
set_transitions(exponent_sign, {
[digit] = exponent,
["+"] = exponent,
["-"] = exponent
})
local fraction_final = {final = true}
set_transitions(fraction_final, {
[digit] = fraction_final,
[e] = exponent_sign
})
local fraction = {expected = "fraction"}
set_transitions(fraction, {
[digit] = fraction_final
})
local integer = {final = true}
set_transitions(integer, {
[digit] = integer,
[e] = exponent_sign,
["."] = fraction
})
local zero = {final = true}
set_transitions(zero, {
["."] = fraction
})
number_dfa = {}
set_transitions(number_dfa, {
[onenine] = integer,
["0"] = zero
})
end
local hex_digit_values = {}
for i = 0, 9 do
hex_digit_values[tostring(i)] = i
end
for i = 0, 5 do
hex_digit_values[string.char(("a"):byte() + i)] = 10 + i
hex_digit_values[string.char(("A"):byte() + i)] = 10 + i
end
-- TODO SAX vs DOM
function read(self, read_)
local index = 0
local char
-- TODO support read functions which provide additional debug output (such as row:column)
local function read()
index = index + 1
char = read_()
return char
end
local function syntax_error(errmsg)
-- TODO ensure the index isn't off
error("syntax error: " .. index .. ": " .. errmsg)
end
local function syntax_assert(value, errmsg)
if not value then
syntax_error(errmsg or "assertion failed!")
end
return value
end
local function skip_whitespace()
while whitespace[char] do
read()
end
end
-- Forward declaration
local value
local function number()
local state = number_dfa
local num = {}
while true do
-- Will work for nil too
local next_state = state[char]
if not next_state then
if not state.final then
if state == number_dfa then
syntax_error"expected a number"
end
syntax_error("invalid number: expected " .. state.expected)
end
return assert(tonumber(table_concat(num)))
end
table_insert(num, char)
state = next_state
read()
end
end
local function string()
local chars = {}
while true do
if char == '"' then
return table_concat(chars)
end
if char == "\\" then
read()
if char == "u" then
local num = 0
for i = 3, 0, -1 do
num = syntax_assert(hex_digit_values[read()], "expected a hex digit") * (16 ^ i) + num
end
table_insert(chars, syntax_assert(modlib.text.utf8(num), "invalid codepoint"))
else
table_insert(chars, syntax_assert(decoding_escapes[char], "invalid escape sequence"))
end
else
syntax_assert(char, "unclosed string")
-- TODO check whether the character is one that must be escaped ("strict" mode)
table_insert(chars, char)
end
read()
end
end
local element
local funcs = {
['-'] = function()
return -number()
end,
['"'] = string,
["{"] = function()
local dict = {}
skip_whitespace()
if char == "}" then return dict end
while true do
syntax_assert(char == '"', "key expected")
read()
local key = string()
read()
skip_whitespace()
syntax_assert(char == ":", "colon expected, got " .. char)
local val = element()
dict[key] = val
if char == "}" then return dict end
syntax_assert(char == ",", "comma expected")
read()
skip_whitespace()
end
end,
["["] = function()
local list = {}
skip_whitespace()
if char == "]" then return list end
while true do
table_insert(list, value())
skip_whitespace()
if char == "]" then return list end
syntax_assert(char == ",", "comma expected")
read()
skip_whitespace()
end
end,
}
local function expect_word(word, value)
local msg = word .. " expected"
funcs[word:sub(1, 1)] = function()
syntax_assert(char == word:sub(2, 2), msg)
for i = 3, #word do
read()
syntax_assert(char == word:sub(i, i), msg)
end
return value
end
end
expect_word("true", true)
expect_word("false", false)
expect_word("null", self.null)
function value()
syntax_assert(char, "value expected")
local func = funcs[char]
if func then
-- Advance after first char
read()
local val = func()
-- Advance after last char
read()
return val
end
if char >= "0" and char <= "9" then
return number()
end
syntax_error"value expected"
end
function element()
read()
skip_whitespace()
local val = value()
skip_whitespace()
return val
end
-- TODO consider asserting EOF as read() == nil, perhaps controlled by a parameter
return element()
end
local encoding_escapes = modlib.table.flip(decoding_escapes)
-- Solidus does not need to be escaped
encoding_escapes["/"] = nil
-- Control characters. Note: U+0080 to U+009F and U+007F are not considered control characters.
for byte = 0, 0x1F do
encoding_escapes[string.char(byte)] = string.format("u%04X", byte)
end
modlib.table.map(encoding_escapes, function(str) return "\\" .. str end)
local function escape(str)
return str:gsub(".", encoding_escapes)
end
function write(self, value, write)
local null = self.null
local written_strings = self.cache_escaped_strings and setmetatable({}, {__index = function(self, str)
local escaped_str = escape(str)
self[str] = escaped_str
return escaped_str
end})
local function string(str)
write'"'
write(written_strings and written_strings[str] or escape(str))
return write'"'
end
local dump
local function write_kv(key, value)
assert(type(key) == "string", "not a dictionary")
string(key)
write":"
dump(value)
end
function dump(value)
if value == null then
-- TODO improve null check (checking for equality doesn't allow using nan as null, for instance)
return write"null"
end
if value == true then
return write"true"
end
if value == false then
return write"false"
end
local type_ = type(value)
if type_ == "number" then
assert(value == value, "unsupported number value: nan")
assert(value ~= math_huge, "unsupported number value: inf")
assert(value ~= -math_huge, "unsupported number value: -inf")
return write(("%.17g"):format(value))
end
if type_ == "string" then
return string(value)
end
if type_ == "table" then
local table = value
local len = #table
if len == 0 then
local first, value = next(table)
write"{"
if first ~= nil then
write_kv(first, value)
end
for key, value in next, table, first do
write","
write_kv(key, value)
end
write"}"
else
assert(modlib.table.count(table) == len, "mixed list & hash part")
write"["
for i = 1, len - 1 do
dump(table[i])
write","
end
dump(table[len])
write"]"
end
return
end
error("unsupported type: " .. type_)
end
dump(value)
end
-- TODO get rid of this paste of write_file and write_string (see modlib.luon)
function write_file(self, value, file)
return self:write(value, function(text)
file:write(text)
end)
end
function write_string(self, value)
local rope = {}
self:write(value, function(text)
table_insert(rope, text)
end)
return table_concat(rope)
end
-- TODO read_path (for other serializers too)
function read_file(self, file)
local value = self:read(function()
return file:read(1)
end)
-- TODO consider file:close()
return value
end
function read_string(self, string)
-- TODO move the string -> one char read func pattern to modlib.text
local index = 0
local value = self:read(function()
index = index + 1
if index > #string then
return
end
return string:sub(index, index)
end)
-- We just expect EOF for strings
assert(index > #string, "EOF expected")
return value
end
return _ENV

@ -228,17 +228,17 @@ for _ = 1, 1000 do
assert(distance == min_distance)
end
local function serializer_test(preserve)
local function serializer_test(is_json, preserve)
local function assert_preserves(obj)
local preserved = preserve(obj)
if obj ~= obj then
assert(preserved ~= preserved)
else
assert(table.equals_references(preserved, obj), luon:write_string(preserved))
assert(table.equals_references(preserved, obj), luon:write_string(preserved) .. " vs " .. luon:write_string(obj))
end
end
-- TODO proper deep table comparison with nan support
for _, constant in pairs{true, false, huge, -huge, 0/0} do
for _, constant in pairs(is_json and {true, false} or {true, false, huge, -huge, 0/0}) do
assert_preserves(constant)
end
-- Strings
@ -254,7 +254,10 @@ local function serializer_test(preserve)
end
-- Simple tables
assert_preserves{hello = "world", welt = "hallo"}
assert_preserves{a = 1, b = "hallo", c = "true"}
assert_preserves{"hello", "hello", "hello"}
assert_preserves{1, 2, 3, true, false}
if is_json then return end
local circular = {}
circular[circular] = circular
circular[1] = circular
@ -273,9 +276,18 @@ local function serializer_test(preserve)
assert_preserves(a)
end
-- JSON
do
serializer_test(true, function(object)
return json:read_string(json:write_string(object))
end)
-- Verify spacing is accepted
assert(modlib.table.equals_noncircular(json:read_string'\t\t\n{ "a" : 1, \t"b":2, "c" : [ 1, 2 ,3 ] } \n\r\t', {a = 1, b = 2, c = {1, 2, 3}}))
end
-- luon
do
serializer_test(function(object)
serializer_test(false, function(object)
return luon:read_string(luon:write_string(object))
end)
end
@ -283,7 +295,7 @@ end
-- bluon
do
-- TODO 1.1496387980481e-07 fails due to precision issues
serializer_test(function(object)
serializer_test(false, function(object)
local rope = table.rope{}
local written, read, input
bluon:write(object, rope)