Add SQLite3 database persistence

This commit is contained in:
Lars Mueller 2021-07-14 21:52:13 +02:00
parent daa0251632
commit 4f59279591
6 changed files with 619 additions and 197 deletions

2
.gitignore vendored

@ -2,3 +2,5 @@
character.b3d.lua character.b3d.lua
# generated by Lua logfile test # generated by Lua logfile test
logfile.test.lua logfile.test.lua
# generated by SQLite3 persistence test
database.test.sqlite3

@ -36,6 +36,48 @@ persistence.lua_log_file.new(mod.get_resource"logfile.test.lua", {}, false)
This will prevent strings from being referenced, possibly bloating file size, but saving memory. This will prevent strings from being referenced, possibly bloating file size, but saving memory.
#### SQLite3 Database Persistence
Uses a SQLite3 database to persistently store a Lua table. **Experimental.**. Obtaining it is a bit trickier, as it requires access to the `lsqlite3` library, which may be passed:
```lua
local modlib_sqlite3 = persistence.sqlite3(require"lsqlite3")
```
(assuming `require` is that of an insecure environment if Minetest is used)
Alternatively, if you are not running Minetest, mod security is disabled, you have (temporarily) provided `require` globally, or added `modlib` to `secure.trusted_mods`, you can simply do the following:
```lua
local modlib_sqlite3 = persistence.sqlite3()
```
Modlib will then simply call `require"lsqlite3"` for you.
Then, you can proceed to create a new database:
```lua
local database = persistence.modlib_sqlite3.new(mod.get_resource"database.test.sqlite3", {})
-- Create or load
database:init()
-- Use it
database:set_root("key", {nested = true})
database:close()
```
It uses a similar API to Lua log files:
* `new(filename, root)` - without `reference_strings` however (strings aren't referenced currently)
* `init`
* `set`
* `set_root`
* `rewrite`
* `close`
The advantage over Lua log files is that the SQlite3 database keeps disk usage minimal. Unused tables are dropped from the database immediately through reference counting. The downside of this is that this, combined with the overhead of using SQLite3, of course takes time, making updates on the SQLite3 database slower than Lua log file updates (which just append to an append-only file).
As simple and fast reference counting doesn't handle cycles, an additional `collectgarbage` stop-the-world method performing a full garbage collection on the database is provided which is called during `init`.
The method `defragment_ids` should not have to be used in practice (if it has to be, it happens automatically) and should be used solely for debugging purposes (neater IDs).
### Bluon ### Bluon
Binary Lua object notation. **Experimental.** Handling of subnormal numbers (very small floats) may be broken. Binary Lua object notation. **Experimental.** Handling of subnormal numbers (very small floats) may be broken.

@ -1,199 +1,17 @@
-- Localize globals -- TODO consider moving serializers in this namespace
local assert, error, io, loadfile, math, minetest, modlib, pairs, setfenv, setmetatable, type local function load(module_name)
= assert, error, io, loadfile, math, minetest, modlib, pairs, setfenv, setmetatable, type return assert(loadfile(modlib.mod.get_resource(modlib.modname, "persistence", module_name .. ".lua")))
-- Set environment
local _ENV = {}
setfenv(1, _ENV)
lua_log_file = {
-- default value
reference_strings = true
}
-- Note: keys may not be marked as weak references: garbage collected log files wouldn't close the file:
-- The `__gc` metamethod doesn't work for tables in Lua 5.1; a hack using `newproxy` would be needed
-- See https://stackoverflow.com/questions/27426704/lua-5-1-workaround-for-gc-metamethod-for-tables)
-- Therefore, :close() must be called on log files to remove them from the `files` table
local files = {}
local metatable = {__index = lua_log_file}
function lua_log_file.new(file_path, root, reference_strings)
local self = setmetatable({
file_path = assert(file_path),
root = root,
reference_strings = reference_strings
}, metatable)
if minetest then
files[self] = true
end end
return self local _ENV = setmetatable({}, {__index = function(_ENV, module_name)
if module_name == "lua_log_file" then
local module = load(module_name)()
_ENV[module_name] = module
return module
end end
if module_name == "sqlite3" then
local function set_references(self, table) local module = load(module_name)
-- Weak table keys to allow the collection of dead reference tables _ENV[module_name] = module
-- TODO garbage collect strings in the references table return module
self.references = setmetatable(table, {__mode = "k"})
end end
end})
function lua_log_file:load()
-- Bytecode is blocked by the engine
local read = assert(loadfile(self.file_path))
-- math.huge is serialized to inf
local env = {inf = math.huge}
setfenv(read, env)
read()
env.R = env.R or {{}}
local reference_count = #env.R
for ref in pairs(env.R) do
if ref > reference_count then
-- Ensure reference count always has the value of the largest reference
-- in case of "holes" (nil values) in the reference list
reference_count = ref
end
end
self.reference_count = reference_count
self.root = env.R[1]
set_references(self, {})
end
function lua_log_file:open()
self.file = io.open(self.file_path, "a+")
end
function lua_log_file:init()
if modlib.file.exists(self.file_path) then
self:load()
self:_rewrite()
self:open()
return
end
self:open()
self.root = {}
self:_write()
end
function lua_log_file:log(statement)
self.file:write(statement)
self.file:write"\n"
end
function lua_log_file:flush()
self.file:flush()
end
function lua_log_file:close()
self.file:close()
self.file = nil
files[self] = nil
end
if minetest then
minetest.register_on_shutdown(function()
for self in pairs(files) do
self.file:close()
end
end)
end
function lua_log_file:_dump(value, is_key)
if value == nil then
return "nil"
end
if value == true then
return "true"
end
if value == false then
return "false"
end
if value ~= value then
-- nan
return "0/0"
end
local _type = type(value)
if _type == "number" then
return ("%.17g"):format(value)
end
local reference = self.references[value]
if reference then
return "R[" .. reference .."]"
end
reference = self.reference_count + 1
local key = "R[" .. reference .."]"
local function create_reference()
self.reference_count = reference
self.references[value] = reference
end
if _type == "string" then
local reference_strings = self.reference_strings
if is_key and ((not reference_strings) or value:len() <= key:len()) and value:match"^[%a_][%a%d_]*$" then
-- Short key
return value, true
end
local formatted = ("%q"):format(value)
if (not reference_strings) or formatted:len() <= key:len() then
-- Short string
return formatted
end
-- Use reference
create_reference()
self:log(key .. "=" .. formatted)
elseif _type == "table" then
-- Tables always need a reference before they are traversed to prevent infinite recursion
create_reference()
-- TODO traverse tables to determine whether this is actually needed
self:log(key .. "={}")
local tablelen = #value
for k, v in pairs(value) do
if type(k) ~= "number" or k % 1 ~= 0 or k < 1 or k > tablelen then
local dumped, short = self:_dump(k, true)
self:log(key .. (short and ("." .. dumped) or ("[" .. dumped .. "]")) .. "=" .. self:_dump(v))
end
end
else
error("unsupported type: " .. _type)
end
return key
end
function lua_log_file:set(table, key, value)
if not self.references[table] then
error"orphan table"
end
if table[key] == value then
-- No change
return
end
table[key] = value
table = self:_dump(table)
local key, short_key = self:_dump(key, true)
self:log(table .. (short_key and ("." .. key) or ("[" .. key .. "]")) .. "=" .. self:_dump(value))
end
function lua_log_file:set_root(key, value)
return self:set(self.root, key, value)
end
function lua_log_file:_write()
set_references(self, {})
self.reference_count = 0
self:log"R={}"
self:_dump(self.root)
end
function lua_log_file:_rewrite()
self.file = io.open(self.file_path, "w+")
self:_write()
self.file:close()
end
function lua_log_file:rewrite()
if self.file then
self.file:close()
end
self:_rewrite()
self:open()
end
-- Export environment
return _ENV return _ENV

@ -0,0 +1,198 @@
-- Localize globals
local assert, error, io, loadfile, math, minetest, modlib, pairs, setfenv, setmetatable, type
= assert, error, io, loadfile, math, minetest, modlib, pairs, setfenv, setmetatable, type
-- Set environment
local _ENV = {}
setfenv(1, _ENV)
-- Default value
reference_strings = true
-- Note: keys may not be marked as weak references: garbage collected log files wouldn't close the file:
-- The `__gc` metamethod doesn't work for tables in Lua 5.1; a hack using `newproxy` would be needed
-- See https://stackoverflow.com/questions/27426704/lua-5-1-workaround-for-gc-metamethod-for-tables)
-- Therefore, :close() must be called on log files to remove them from the `files` table
local files = {}
local metatable = {__index = _ENV}
_ENV.metatable = metatable
function new(file_path, root, reference_strings)
local self = setmetatable({
file_path = assert(file_path),
root = root,
reference_strings = reference_strings
}, metatable)
if minetest then
files[self] = true
end
return self
end
local function set_references(self, table)
-- Weak table keys to allow the collection of dead reference tables
-- TODO garbage collect strings in the references table
self.references = setmetatable(table, {__mode = "k"})
end
function load(self)
-- Bytecode is blocked by the engine
local read = assert(loadfile(self.file_path))
-- math.huge is serialized to inf
local env = {inf = math.huge}
setfenv(read, env)
read()
env.R = env.R or {{}}
local reference_count = #env.R
for ref in pairs(env.R) do
if ref > reference_count then
-- Ensure reference count always has the value of the largest reference
-- in case of "holes" (nil values) in the reference list
reference_count = ref
end
end
self.reference_count = reference_count
self.root = env.R[1]
set_references(self, {})
end
function open(self)
self.file = io.open(self.file_path, "a+")
end
function init(self)
if modlib.file.exists(self.file_path) then
self:load()
self:_rewrite()
self:open()
return
end
self:open()
self.root = {}
self:_write()
end
function log(self, statement)
self.file:write(statement)
self.file:write"\n"
end
function flush(self)
self.file:flush()
end
function close(self)
self.file:close()
self.file = nil
files[self] = nil
end
if minetest then
minetest.register_on_shutdown(function()
for self in pairs(files) do
self.file:close()
end
end)
end
local function _dump(self, value, is_key)
if value == nil then
return "nil"
end
if value == true then
return "true"
end
if value == false then
return "false"
end
if value ~= value then
-- nan
return "0/0"
end
local _type = type(value)
if _type == "number" then
return ("%.17g"):format(value)
end
local reference = self.references[value]
if reference then
return "R[" .. reference .."]"
end
reference = self.reference_count + 1
local key = "R[" .. reference .."]"
local function create_reference()
self.reference_count = reference
self.references[value] = reference
end
if _type == "string" then
local reference_strings = self.reference_strings
if is_key and ((not reference_strings) or value:len() <= key:len()) and value:match"^[%a_][%a%d_]*$" then
-- Short key
return value, true
end
local formatted = ("%q"):format(value)
if (not reference_strings) or formatted:len() <= key:len() then
-- Short string
return formatted
end
-- Use reference
create_reference()
self:log(key .. "=" .. formatted)
elseif _type == "table" then
-- Tables always need a reference before they are traversed to prevent infinite recursion
create_reference()
-- TODO traverse tables to determine whether this is actually needed
self:log(key .. "={}")
local tablelen = #value
for k, v in pairs(value) do
if type(k) ~= "number" or k % 1 ~= 0 or k < 1 or k > tablelen then
local dumped, short = _dump(self, k, true)
self:log(key .. (short and ("." .. dumped) or ("[" .. dumped .. "]")) .. "=" .. _dump(self, v))
end
end
else
error("unsupported type: " .. _type)
end
return key
end
function set(self, table, key, value)
if not self.references[table] then
error"orphan table"
end
if table[key] == value then
-- No change
return
end
table[key] = value
table = _dump(self, table)
local key, short_key = _dump(self, key, true)
self:log(table .. (short_key and ("." .. key) or ("[" .. key .. "]")) .. "=" .. _dump(self, value))
end
function set_root(self, key, value)
return self:set(self.root, key, value)
end
function _write(self)
set_references(self, {})
self.reference_count = 0
self:log"R={}"
_dump(self, self.root)
end
function _rewrite(self)
self.file = io.open(self.file_path, "w+")
self:_write()
self.file:close()
end
function rewrite(self)
if self.file then
self.file:close()
end
self:_rewrite()
self:open()
end
-- Export environment
return _ENV

316
persistence/sqlite3.lua Normal file

@ -0,0 +1,316 @@
local assert, error, math_huge, modlib, minetest, setmetatable, type, table_insert, table_sort, pairs, ipairs
= assert, error, math.huge, modlib, minetest, setmetatable, type, table.insert, table.sort, pairs, ipairs
local sqlite3 = ... or require"lsqlite3"
--! experimental
--[[
Currently uses reference counting to immediately delete tables which aren't reachable from the root table anymore, which has two issues:
1. Deletion might trigger a large deletion chain
TODO defer deletion, clean up unused tables on startup, delete & iterate tables partially
2. Reference counting is unable to handle cycles. `:collectgarbage()` implements a tracing "stop-the-world" garbage collector which handles cycles.
TODO take advantage of Lua's garbage collection by keeping a bunch of "twin" objects in a weak structure using proxies (Lua 5.1) or the __gc metamethod (Lua 5.2)
See https://wiki.c2.com/?ReferenceCountingCanHandleCycles, https://www.memorymanagement.org/mmref/recycle.html#mmref-recycle and https://wiki.c2.com/?GenerationalGarbageCollectio
Weak tables are of no use here, as we need to be notified when a reference is dropped
]]
local _ENV = {}
setfenv(1, _ENV)
local metatable = {__index = _ENV}
_ENV.metatable = metatable
-- Note: keys may not be marked as weak references: wouldn't close the database: see persistence/lua_log_file.lua
local databases = {}
local types = {
boolean = 1,
number = 2,
string = 3,
table = 4
}
local function increment_highest_table_id(self)
self.highest_table_id = self.highest_table_id + 1
if self.highest_table_id > 2^50 then
-- IDs are approaching double precision limit (52 bits mantissa), defragment them
self:defragment_ids()
end
return self.highest_table_id
end
function new(file_path, root)
return setmetatable({
database = sqlite3.open(file_path),
root = root
}, metatable)
end
function _ENV.setmetatable(self)
assert(self.database and self.root)
return setmetatable(self, metatable)
end
local set
local function add_table(self, table)
if type(table) ~= "table" then return end
if self.counts[table] then
self.counts[table] = self.counts[table] + 1
return
end
self.table_ids[table] = increment_highest_table_id(self)
self.counts[table] = 1
for k, v in pairs(table) do
set(self, table, k, v)
end
end
local decrement_reference_count
local function delete_table(self, table)
local id = assert(self.table_ids[table])
self.table_ids[table] = nil
self.counts[table] = nil
for k, v in pairs(table) do
decrement_reference_count(self, k)
decrement_reference_count(self, v)
end
local statement = self._prepared.delete_table
statement:bind(1, id)
statement:step()
statement:reset()
end
function decrement_reference_count(self, table)
if type(table) ~= "table" then return end
local count = self.counts[table]
if not count then return end
count = count - 1
if count == 0 then return delete_table(self, table) end
self.counts[table] = count
end
function set(self, table, key, value)
local deletion = value == nil
if not deletion then
add_table(self, key)
add_table(self, value)
end
if type(previous_value) == "table" then
decrement_reference_count(self, previous_value)
end
if deletion and type(key) == "table" then
decrement_reference_count(self, key)
end
local statement = self._prepared[deletion and "delete" or "insert"]
local function bind_type_and_content(n, value)
local type_ = type(value)
statement:bind(n, assert(types[type_]))
if type_ == "boolean" then
statement:bind(n + 1, value and 1 or 0)
elseif type_ == "number" then
if value ~= value then
statement:bind(n + 1, "nan")
elseif value == math_huge then
statement:bind(n + 1, "inf")
elseif value == -math_huge then
statement:bind(n + 1, "-inf")
else
statement:bind(n + 1, value)
end
elseif type_ == "string" then
-- Use bind_blob instead of bind as Lua strings are effectively byte strings
statement:bind_blob(n + 1, value)
elseif type_ == "table" then
statement:bind(n + 1, self.table_ids[value])
end
end
statement:bind(1, assert(self.table_ids[table]))
bind_type_and_content(2, key)
if not deletion then
bind_type_and_content(4, value)
end
statement:step()
statement:reset()
end
local function exec(self, sql)
if self.database:exec(sql) ~= sqlite3.OK then
error(self.database:errmsg())
end
end
function init(self)
local database = self.database
local function prepare(sql)
local stmt = database:prepare(sql)
if not stmt then error(database:errmsg()) end
return stmt
end
self._prepared = {
insert = prepare"INSERT OR REPLACE INTO table_entries(table_id, key_type, key, value_type, value) VALUES (?, ?, ?, ?, ?)",
delete = prepare"DELETE FROM table_entries WHERE table_id = ? AND key_type = ? AND key = ?",
delete_table = prepare"DELETE FROM table_entries WHERE table_id = ?",
update = {
id = prepare"UPDATE table_entries SET table_id = ? WHERE table_id = ?",
keys = prepare("UPDATE table_entries SET key = ? WHERE key_type = " .. types.table .. " AND key = ?"),
values = prepare("UPDATE table_entries SET value = ? WHERE value_type = " .. types.table .. " AND value = ?")
}
}
exec(self, [[
CREATE TABLE IF NOT EXISTS table_entries (
table_id INTEGER NOT NULL,
key_type INTEGER NOT NULL,
key BLOB NOT NULL,
value_type INTEGER NOT NULL,
value BLOB NOT NULL,
PRIMARY KEY (table_id, key_type, key)
)]])
-- Default value
self.highest_table_id = 0
for id in self.database:urows"SELECT MAX(table_id) FROM table_entries" do
-- Gets a single value
self.highest_table_id = id
end
increment_highest_table_id(self)
local tables = {}
local counts = {}
self.counts = counts
local function get_value(type_, content)
if type_ == types.boolean then
if content == 0 then return false end
if content == 1 then return true end
error("invalid boolean value: " .. content)
end
if type_ == types.number then
if content == "nan" then
return 0/0
end
if content == "inf" then
return math_huge
end
if content == "-inf" then
return -math_huge
end
assert(type(content) == "number")
return content
end
if type_ == types.string then
assert(type(content) == "string")
return content
end
if type_ == types.table then
-- Table reference
tables[content] = tables[content] or {}
counts[content] = counts[content] or 1
return tables[content]
end
-- Null is unused
error("unsupported type: " .. type_)
end
-- Order by key_content to have retrieve list parts in the correct order, making it easier for Lua
for table_id, key_type, key, value_type, value in self.database:urows"SELECT * FROM table_entries ORDER BY table_id, key_type, key" do
local table = tables[table_id] or {}
counts[table] = counts[table] or 1
table[get_value(key_type, key)] = get_value(value_type, value)
tables[table_id] = table
end
if tables[1] then
self.root = tables[1]
counts[self.root] = counts[self.root] + 1
self.table_ids = modlib.table.flip(tables)
self:collectgarbage()
else
self.highest_table_id = 0
self.table_ids = {}
add_table(self, self.root)
end
databases[self] = true
end
function rewrite(self)
exec(self, "DELETE FROM table_entries")
self.highest_table_id = 0
self.table_ids = {}
self.counts = {}
add_table(self, self.root)
end
function _ENV.set(self, table, key, value)
local previous_value = table[key]
if previous_value == value then
-- no change
return
end
set(self, table, key, value)
table[key] = value
end
function set_root(self, key, value)
return _ENV.set(self, self.root, key, value)
end
function collectgarbage(self)
local marked = {}
local function mark(table)
if type(table) ~= "table" or marked[table] then return end
marked[table] = true
for k, v in pairs(table) do
mark(k)
mark(v)
end
end
mark(self.root)
for table in pairs(self.table_ids) do
if not marked[table] then
delete_table(self, table)
end
end
end
function defragment_ids(self)
local ids = {}
for _, id in pairs(self.table_ids) do
table_insert(ids, id)
end
table_sort(ids)
local update = self._prepared.update
local tables = modlib.table.flip(self.table_ids)
for new_id, old_id in ipairs(ids) do
for _, stmt in pairs(update) do
stmt:bind_values(new_id, old_id)
stmt:step()
stmt:reset()
end
self.table_ids[tables[old_id]] = new_id
end
self.highest_table_id = #ids
end
local function finalize_statements(table)
for _, stmt in pairs(table) do
if type(stmt) == "table" then
finalize_statements(stmt)
else
local errcode = stmt:finalize()
assert(errcode == sqlite3.OK, errcode)
end
end
end
function close(self)
finalize_statements(self._prepared)
self.database:close()
databases[self] = nil
end
if minetest then
minetest.register_on_shutdown(function()
for self in pairs(databases) do
self:close()
end
end)
end
return _ENV

@ -281,6 +281,7 @@ test_from_string("#694269", 0x694269FF)
test_from_string("#11223344", 0x11223344) test_from_string("#11223344", 0x11223344)
assert(colorspec.from_string"#694269":to_string() == "694269") assert(colorspec.from_string"#694269":to_string() == "694269")
-- Persistence
local function test_logfile(reference_strings) local function test_logfile(reference_strings)
local logfile = persistence.lua_log_file.new(mod.get_resource"logfile.test.lua", {}, reference_strings) local logfile = persistence.lua_log_file.new(mod.get_resource"logfile.test.lua", {}, reference_strings)
logfile:init() logfile:init()
@ -305,6 +306,51 @@ local function test_logfile(reference_strings)
end end
test_logfile(true) test_logfile(true)
test_logfile(false) test_logfile(false)
-- SQLite3
do
local sqlite3 = persistence.sqlite3(require"lsqlite3")
local p = sqlite3.new("database.test.sqlite3", {})
p:init()
p:rewrite()
p:set_root("key", "value")
assert(p.root.key == "value")
p:set_root("other key", "other value")
p:set_root("key", "other value")
p:set_root("key", nil)
local x = {x = 1, y = 2}
p:set_root("x1", x)
p:set_root("x2", x)
p:set_root("x2", nil)
p:set_root("x1", nil)
p:set_root("key", {a = 1, b = 2, c = {a = 1}})
p:set_root("key", nil)
p:set_root("key", {a = 1, b = 2, c = 3})
local cyclic = {}
cyclic.cycle = cyclic
p:set_root("cyclic", cyclic)
p:set_root("cyclic", nil)
p:collectgarbage()
p:defragment_ids()
local rows = {}
for row in p.database:rows"SELECT * FROM table_entries ORDER BY table_id, key_type, key" do
_G.table.insert(rows, row)
end
assert(modlib.table.equals(rows, {
{1, 3, "key", 4, 2},
{1, 3, "other key", 3, "other value"},
{2, 3, "a", 2, 1},
{2, 3, "b", 2, 2},
{2, 3, "c", 2, 3}
}))
p:close()
p = sqlite3.new("database.test.sqlite3", {})
p:init()
assert(modlib.table.equals(p.root, {
key = {a = 1, b = 2, c = 3},
["other key"] = "other value"
}))
p:close()
end
-- in-game tests & b3d testing -- in-game tests & b3d testing
local tests = { local tests = {