diff --git a/json.lua b/json.lua new file mode 100644 index 0000000..5a1d72d --- /dev/null +++ b/json.lua @@ -0,0 +1,377 @@ +local modlib, setmetatable, pairs, assert, error, table, table_insert, table_concat, tonumber, tostring, math_huge, string, type, next + = modlib, setmetatable, pairs, assert, error, table, table.insert, table.concat, tonumber, tostring, math.huge, string, type, next + +local _ENV = {} +setfenv(1, _ENV) + +--! experimental + +-- Null +-- TODO consider using userdata (for ex. by using newproxy) +do + local metatable = {} + -- eq is not among the metamethods, len won't work on 5.1 + for _, metamethod in pairs{"add", "sub", "mul", "div", "mod", "pow", "unm", "concat", "len", "lt", "le", "index", "newindex", "call"} do + metatable["__" .. metamethod] = function() return error("attempt to " .. metamethod .. " a null value") end + end + null = setmetatable({}, metatable) +end + +local metatable = {__index = self} +_ENV.metatable = metatable +function new(self) + return setmetatable(self, metatable) +end + +local whitespace = modlib.table.set{"\t", "\r", "\n", " "} +local decoding_escapes = { + ['"'] = '"', + ["\\"] = "\\", + ["/"] = "/", + b = "\b", + f = "\f", + n = "\n", + r = "\r", + t = "\t" + -- TODO is this complete? +} + +-- Set up a DFA for number syntax validations +local number_dfa +do + -- TODO proper DFA utilities + local function set_transitions(state, transitions) + for chars, next_state in pairs(transitions) do + for char in chars:gmatch"." do + state[char] = next_state + end + end + end + local onenine = "123456789" + local digit = "0" .. onenine + local e = "eE" + local exponent = {final = true} + set_transitions(exponent, { + [digit] = exponent + }) + local pre_exponent = {expected = "exponent"} + set_transitions(pre_exponent, { + [digit] = exponent + }) + local exponent_sign = {expected = "exponent"} + set_transitions(exponent_sign, { + [digit] = exponent, + ["+"] = exponent, + ["-"] = exponent + }) + local fraction_final = {final = true} + set_transitions(fraction_final, { + [digit] = fraction_final, + [e] = exponent_sign + }) + local fraction = {expected = "fraction"} + set_transitions(fraction, { + [digit] = fraction_final + }) + local integer = {final = true} + set_transitions(integer, { + [digit] = integer, + [e] = exponent_sign, + ["."] = fraction + }) + local zero = {final = true} + set_transitions(zero, { + ["."] = fraction + }) + number_dfa = {} + set_transitions(number_dfa, { + [onenine] = integer, + ["0"] = zero + }) +end + +local hex_digit_values = {} +for i = 0, 9 do + hex_digit_values[tostring(i)] = i +end +for i = 0, 5 do + hex_digit_values[string.char(("a"):byte() + i)] = 10 + i + hex_digit_values[string.char(("A"):byte() + i)] = 10 + i +end + +-- TODO SAX vs DOM +function read(self, read_) + local index = 0 + local char + -- TODO support read functions which provide additional debug output (such as row:column) + local function read() + index = index + 1 + char = read_() + return char + end + local function syntax_error(errmsg) + -- TODO ensure the index isn't off + error("syntax error: " .. index .. ": " .. errmsg) + end + local function syntax_assert(value, errmsg) + if not value then + syntax_error(errmsg or "assertion failed!") + end + return value + end + local function skip_whitespace() + while whitespace[char] do + read() + end + end + -- Forward declaration + local value + local function number() + local state = number_dfa + local num = {} + while true do + -- Will work for nil too + local next_state = state[char] + if not next_state then + if not state.final then + if state == number_dfa then + syntax_error"expected a number" + end + syntax_error("invalid number: expected " .. state.expected) + end + return assert(tonumber(table_concat(num))) + end + table_insert(num, char) + state = next_state + read() + end + end + local function string() + local chars = {} + while true do + if char == '"' then + return table_concat(chars) + end + if char == "\\" then + read() + if char == "u" then + local num = 0 + for i = 3, 0, -1 do + num = syntax_assert(hex_digit_values[read()], "expected a hex digit") * (16 ^ i) + num + end + table_insert(chars, syntax_assert(modlib.text.utf8(num), "invalid codepoint")) + else + table_insert(chars, syntax_assert(decoding_escapes[char], "invalid escape sequence")) + end + else + syntax_assert(char, "unclosed string") + -- TODO check whether the character is one that must be escaped ("strict" mode) + table_insert(chars, char) + end + read() + end + end + local element + local funcs = { + ['-'] = function() + return -number() + end, + ['"'] = string, + ["{"] = function() + local dict = {} + skip_whitespace() + if char == "}" then return dict end + while true do + syntax_assert(char == '"', "key expected") + read() + local key = string() + read() + skip_whitespace() + syntax_assert(char == ":", "colon expected, got " .. char) + local val = element() + dict[key] = val + if char == "}" then return dict end + syntax_assert(char == ",", "comma expected") + read() + skip_whitespace() + end + end, + ["["] = function() + local list = {} + skip_whitespace() + if char == "]" then return list end + while true do + table_insert(list, value()) + skip_whitespace() + if char == "]" then return list end + syntax_assert(char == ",", "comma expected") + read() + skip_whitespace() + end + end, + } + local function expect_word(word, value) + local msg = word .. " expected" + funcs[word:sub(1, 1)] = function() + syntax_assert(char == word:sub(2, 2), msg) + for i = 3, #word do + read() + syntax_assert(char == word:sub(i, i), msg) + end + return value + end + end + expect_word("true", true) + expect_word("false", false) + expect_word("null", self.null) + function value() + syntax_assert(char, "value expected") + local func = funcs[char] + if func then + -- Advance after first char + read() + local val = func() + -- Advance after last char + read() + return val + end + if char >= "0" and char <= "9" then + return number() + end + syntax_error"value expected" + end + function element() + read() + skip_whitespace() + local val = value() + skip_whitespace() + return val + end + -- TODO consider asserting EOF as read() == nil, perhaps controlled by a parameter + return element() +end + +local encoding_escapes = modlib.table.flip(decoding_escapes) +-- Solidus does not need to be escaped +encoding_escapes["/"] = nil +-- Control characters. Note: U+0080 to U+009F and U+007F are not considered control characters. +for byte = 0, 0x1F do + encoding_escapes[string.char(byte)] = string.format("u%04X", byte) +end +modlib.table.map(encoding_escapes, function(str) return "\\" .. str end) +local function escape(str) + return str:gsub(".", encoding_escapes) +end +function write(self, value, write) + local null = self.null + local written_strings = self.cache_escaped_strings and setmetatable({}, {__index = function(self, str) + local escaped_str = escape(str) + self[str] = escaped_str + return escaped_str + end}) + local function string(str) + write'"' + write(written_strings and written_strings[str] or escape(str)) + return write'"' + end + local dump + local function write_kv(key, value) + assert(type(key) == "string", "not a dictionary") + string(key) + write":" + dump(value) + end + function dump(value) + if value == null then + -- TODO improve null check (checking for equality doesn't allow using nan as null, for instance) + return write"null" + end + if value == true then + return write"true" + end + if value == false then + return write"false" + end + local type_ = type(value) + if type_ == "number" then + assert(value == value, "unsupported number value: nan") + assert(value ~= math_huge, "unsupported number value: inf") + assert(value ~= -math_huge, "unsupported number value: -inf") + return write(("%.17g"):format(value)) + end + if type_ == "string" then + return string(value) + end + if type_ == "table" then + local table = value + local len = #table + if len == 0 then + local first, value = next(table) + write"{" + if first ~= nil then + write_kv(first, value) + end + for key, value in next, table, first do + write"," + write_kv(key, value) + end + write"}" + else + assert(modlib.table.count(table) == len, "mixed list & hash part") + write"[" + for i = 1, len - 1 do + dump(table[i]) + write"," + end + dump(table[len]) + write"]" + end + return + end + error("unsupported type: " .. type_) + end + dump(value) +end + +-- TODO get rid of this paste of write_file and write_string (see modlib.luon) + +function write_file(self, value, file) + return self:write(value, function(text) + file:write(text) + end) +end + +function write_string(self, value) + local rope = {} + self:write(value, function(text) + table_insert(rope, text) + end) + return table_concat(rope) +end + +-- TODO read_path (for other serializers too) + +function read_file(self, file) + local value = self:read(function() + return file:read(1) + end) + -- TODO consider file:close() + return value +end + +function read_string(self, string) + -- TODO move the string -> one char read func pattern to modlib.text + local index = 0 + local value = self:read(function() + index = index + 1 + if index > #string then + return + end + return string:sub(index, index) + end) + -- We just expect EOF for strings + assert(index > #string, "EOF expected") + return value +end + +return _ENV \ No newline at end of file diff --git a/test.lua b/test.lua index 57c2d7d..ff98529 100644 --- a/test.lua +++ b/test.lua @@ -228,17 +228,17 @@ for _ = 1, 1000 do assert(distance == min_distance) end -local function serializer_test(preserve) +local function serializer_test(is_json, preserve) local function assert_preserves(obj) local preserved = preserve(obj) if obj ~= obj then assert(preserved ~= preserved) else - assert(table.equals_references(preserved, obj), luon:write_string(preserved)) + assert(table.equals_references(preserved, obj), luon:write_string(preserved) .. " vs " .. luon:write_string(obj)) end end -- TODO proper deep table comparison with nan support - for _, constant in pairs{true, false, huge, -huge, 0/0} do + for _, constant in pairs(is_json and {true, false} or {true, false, huge, -huge, 0/0}) do assert_preserves(constant) end -- Strings @@ -254,7 +254,10 @@ local function serializer_test(preserve) end -- Simple tables assert_preserves{hello = "world", welt = "hallo"} + assert_preserves{a = 1, b = "hallo", c = "true"} assert_preserves{"hello", "hello", "hello"} + assert_preserves{1, 2, 3, true, false} + if is_json then return end local circular = {} circular[circular] = circular circular[1] = circular @@ -273,9 +276,18 @@ local function serializer_test(preserve) assert_preserves(a) end +-- JSON +do + serializer_test(true, function(object) + return json:read_string(json:write_string(object)) + end) + -- Verify spacing is accepted + assert(modlib.table.equals_noncircular(json:read_string'\t\t\n{ "a" : 1, \t"b":2, "c" : [ 1, 2 ,3 ] } \n\r\t', {a = 1, b = 2, c = {1, 2, 3}})) +end + -- luon do - serializer_test(function(object) + serializer_test(false, function(object) return luon:read_string(luon:write_string(object)) end) end @@ -283,7 +295,7 @@ end -- bluon do -- TODO 1.1496387980481e-07 fails due to precision issues - serializer_test(function(object) + serializer_test(false, function(object) local rope = table.rope{} local written, read, input bluon:write(object, rope)