2021-01-13 10:45:00 +00:00
|
|
|
local bitser = require("ffi/bitser")
|
2021-04-13 16:11:39 +00:00
|
|
|
local buffer = require("string.buffer")
|
2021-01-13 10:45:00 +00:00
|
|
|
local dump = require("dump")
|
2021-04-13 22:35:20 +00:00
|
|
|
local ffi = require("ffi")
|
2022-10-27 00:25:51 +00:00
|
|
|
local ffiUtil = require("ffi/util")
|
2021-01-13 10:45:00 +00:00
|
|
|
local lfs = require("libs/libkoreader-lfs")
|
|
|
|
local logger = require("logger")
|
Logger: Use serpent instead of dump (#9588)
* Persist: support serpent, and use by default over dump (as we assume consistency > readability in Persist).
* Logger/Dbg: Use serpent instead of dump to dump tables (it's slightly more compact, honors __tostring, and will tag tables with their ref, which can come in handy when debugging).
* Dbg: Don't duplicate Logger's log function, just use it directly.
* Fontlist/ConfigDialog: Use serpent for the debug dump.
* Call `os.setlocale(C, "numeric")` on startup instead of peppering it around dump calls. It's process-wide, so it didn't make much sense.
* Trapper: Use LuaJIT's serde facilities instead of dump. They're more reliable in the face of funky input, much faster, and in this case, the data never makes it to human eyes, so a human-readable format didn't gain us anything.
2022-10-06 00:21:03 +00:00
|
|
|
local serpent = require("ffi/serpent")
|
2021-04-13 22:35:20 +00:00
|
|
|
local zstd = require("ffi/zstd")
|
|
|
|
|
|
|
|
local C = ffi.C
|
2022-10-27 00:25:51 +00:00
|
|
|
require("ffi/posix_h")
|
2021-01-13 10:45:00 +00:00
|
|
|
|
|
|
|
local function readFile(file, bytes)
|
|
|
|
local f, str, err
|
|
|
|
f, err = io.open(file, "rb")
|
|
|
|
if not f then
|
|
|
|
return nil, err
|
|
|
|
end
|
|
|
|
str, err = f:read(bytes or "*a")
|
|
|
|
f:close()
|
|
|
|
if not str then
|
|
|
|
return nil, err
|
|
|
|
end
|
|
|
|
return str
|
|
|
|
end
|
|
|
|
|
|
|
|
local codecs = {
|
2021-04-13 16:11:39 +00:00
|
|
|
-- bitser: binary format, fast encode/decode, low size. Not human readable.
|
2021-01-13 10:45:00 +00:00
|
|
|
bitser = {
|
|
|
|
id = "bitser",
|
|
|
|
reads_from_file = false,
|
2021-04-13 22:35:20 +00:00
|
|
|
writes_to_file = false,
|
2021-01-13 10:45:00 +00:00
|
|
|
|
|
|
|
serialize = function(t)
|
|
|
|
local ok, str = pcall(bitser.dumps, t)
|
|
|
|
if not ok then
|
2021-04-13 16:11:39 +00:00
|
|
|
return nil, "cannot serialize " .. tostring(t) .. " (" .. str .. ")"
|
2021-01-13 10:45:00 +00:00
|
|
|
end
|
|
|
|
return str
|
|
|
|
end,
|
|
|
|
|
|
|
|
deserialize = function(str)
|
|
|
|
local ok, t = pcall(bitser.loads, str)
|
|
|
|
if not ok then
|
2021-04-13 16:11:39 +00:00
|
|
|
return nil, "malformed serialized data: " .. t
|
|
|
|
end
|
|
|
|
return t
|
|
|
|
end,
|
|
|
|
},
|
|
|
|
-- luajit: binary format, optimized for speed, not size (combine w/ zstd if necessary). Not human readable.
|
|
|
|
-- Slightly larger on-disk representation than bitser, *much* faster to decode, slightly faster to encode.
|
|
|
|
luajit = {
|
|
|
|
id = "luajit",
|
|
|
|
reads_from_file = false,
|
2021-04-13 22:35:20 +00:00
|
|
|
writes_to_file = false,
|
2021-04-13 16:11:39 +00:00
|
|
|
|
|
|
|
serialize = function(t)
|
|
|
|
local ok, str = pcall(buffer.encode, t)
|
|
|
|
if not ok then
|
|
|
|
return nil, "cannot serialize " .. tostring(t) .. " (" .. str .. ")"
|
|
|
|
end
|
|
|
|
return str
|
|
|
|
end,
|
|
|
|
|
|
|
|
deserialize = function(str)
|
|
|
|
local ok, t = pcall(buffer.decode, str)
|
|
|
|
if not ok then
|
|
|
|
return nil, "malformed serialized data (" .. t .. ")"
|
2021-01-13 10:45:00 +00:00
|
|
|
end
|
|
|
|
return t
|
|
|
|
end,
|
|
|
|
},
|
2021-04-13 22:35:20 +00:00
|
|
|
-- zstd: luajit, but compressed w/ zstd ;). Much smaller, at a very small performance cost (decompressing is *fast*).
|
|
|
|
zstd = {
|
|
|
|
id = "zstd",
|
|
|
|
reads_from_file = true,
|
|
|
|
writes_to_file = true,
|
|
|
|
|
|
|
|
serialize = function(t, as_bytecode, path)
|
|
|
|
local ok, str = pcall(buffer.encode, t)
|
|
|
|
if not ok then
|
|
|
|
return nil, "cannot serialize " .. tostring(t) .. " (" .. str .. ")"
|
|
|
|
end
|
|
|
|
local f = C.fopen(path, "wb")
|
|
|
|
if f == nil then
|
|
|
|
return nil, "fopen: " .. ffi.string(C.strerror(ffi.errno()))
|
|
|
|
end
|
2023-06-12 07:25:54 +00:00
|
|
|
local cbuff, clen = zstd.zstd_compress(str, #str)
|
2021-04-13 22:35:20 +00:00
|
|
|
if C.fwrite(cbuff, 1, clen, f) < clen then
|
|
|
|
C.fclose(f)
|
|
|
|
C.free(cbuff)
|
|
|
|
return nil, "failed to write file"
|
|
|
|
end
|
2022-10-27 00:25:51 +00:00
|
|
|
C.fflush(f)
|
|
|
|
C.fsync(C.fileno(f))
|
2021-04-13 22:35:20 +00:00
|
|
|
C.fclose(f)
|
|
|
|
C.free(cbuff)
|
2021-05-03 03:20:14 +00:00
|
|
|
--- @note: Slight API extension for TileCacheItem, which needs to know the on-disk size, and saves us a :size() call
|
|
|
|
return true, clen
|
2021-04-13 22:35:20 +00:00
|
|
|
end,
|
|
|
|
|
|
|
|
deserialize = function(path)
|
|
|
|
local f = C.fopen(path, "rb")
|
|
|
|
if f == nil then
|
|
|
|
return nil, "fopen: " .. ffi.string(C.strerror(ffi.errno()))
|
|
|
|
end
|
|
|
|
local size = lfs.attributes(path, "size")
|
|
|
|
-- NOTE: In a perfect world, we'd just mmap the file.
|
|
|
|
-- But that's problematic on a portability level: while mmap is POSIX, implementations differ,
|
|
|
|
-- and some old platforms don't support mmap-on-vfat (Legacy Kindle) :'(.
|
|
|
|
local data = C.malloc(size)
|
|
|
|
if data == nil then
|
|
|
|
C.fclose(f)
|
|
|
|
return nil, "failed to allocate read buffer"
|
|
|
|
end
|
|
|
|
if C.fread(data, 1, size, f) < size or C.ferror(f) ~= 0 then
|
|
|
|
C.free(data)
|
|
|
|
C.fclose(f)
|
|
|
|
return nil, "failed to read file"
|
|
|
|
end
|
|
|
|
C.fclose(f)
|
|
|
|
|
|
|
|
local buff, ulen = zstd.zstd_uncompress(data, size)
|
|
|
|
C.free(data)
|
|
|
|
|
|
|
|
local str = ffi.string(buff, ulen)
|
|
|
|
C.free(buff)
|
|
|
|
|
|
|
|
local ok, t = pcall(buffer.decode, str)
|
|
|
|
if not ok then
|
|
|
|
return nil, "malformed serialized data (" .. t .. ")"
|
|
|
|
end
|
|
|
|
return t
|
|
|
|
end,
|
|
|
|
},
|
|
|
|
-- dump: human readable, pretty printed, fast enough for most use cases.
|
2021-01-13 10:45:00 +00:00
|
|
|
dump = {
|
|
|
|
id = "dump",
|
|
|
|
reads_from_file = true,
|
2021-04-13 22:35:20 +00:00
|
|
|
writes_to_file = false,
|
2021-01-13 10:45:00 +00:00
|
|
|
|
|
|
|
serialize = function(t, as_bytecode)
|
|
|
|
local content
|
|
|
|
if as_bytecode then
|
|
|
|
local bytecode, err = load("return " .. dump(t))
|
|
|
|
if not bytecode then
|
|
|
|
logger.warn("cannot convert table to bytecode", err, "fallback to text")
|
|
|
|
else
|
|
|
|
content = string.dump(bytecode, true)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if not content then
|
|
|
|
content = "return " .. dump(t)
|
|
|
|
end
|
|
|
|
return content
|
|
|
|
end,
|
|
|
|
|
|
|
|
deserialize = function(str)
|
|
|
|
local t, err = loadfile(str)
|
|
|
|
if not t then
|
|
|
|
t, err = loadstring(str)
|
|
|
|
end
|
|
|
|
if not t then
|
|
|
|
return nil, err
|
|
|
|
end
|
|
|
|
return t()
|
|
|
|
end,
|
Logger: Use serpent instead of dump (#9588)
* Persist: support serpent, and use by default over dump (as we assume consistency > readability in Persist).
* Logger/Dbg: Use serpent instead of dump to dump tables (it's slightly more compact, honors __tostring, and will tag tables with their ref, which can come in handy when debugging).
* Dbg: Don't duplicate Logger's log function, just use it directly.
* Fontlist/ConfigDialog: Use serpent for the debug dump.
* Call `os.setlocale(C, "numeric")` on startup instead of peppering it around dump calls. It's process-wide, so it didn't make much sense.
* Trapper: Use LuaJIT's serde facilities instead of dump. They're more reliable in the face of funky input, much faster, and in this case, the data never makes it to human eyes, so a human-readable format didn't gain us anything.
2022-10-06 00:21:03 +00:00
|
|
|
},
|
|
|
|
-- serpent: human readable (-ish), more thorough than dump (in particular, supports serializing functions)
|
|
|
|
-- NOTE: if you want pretty printing, pass { sortkeys = true, compact = false, indent = " " } to serpent's second arg.
|
|
|
|
serpent = {
|
|
|
|
id = "serpent",
|
|
|
|
reads_from_file = false,
|
|
|
|
writes_to_file = false,
|
|
|
|
|
|
|
|
serialize = function(t)
|
2022-10-06 03:56:12 +00:00
|
|
|
local ok, str = pcall(serpent.dump, t)
|
Logger: Use serpent instead of dump (#9588)
* Persist: support serpent, and use by default over dump (as we assume consistency > readability in Persist).
* Logger/Dbg: Use serpent instead of dump to dump tables (it's slightly more compact, honors __tostring, and will tag tables with their ref, which can come in handy when debugging).
* Dbg: Don't duplicate Logger's log function, just use it directly.
* Fontlist/ConfigDialog: Use serpent for the debug dump.
* Call `os.setlocale(C, "numeric")` on startup instead of peppering it around dump calls. It's process-wide, so it didn't make much sense.
* Trapper: Use LuaJIT's serde facilities instead of dump. They're more reliable in the face of funky input, much faster, and in this case, the data never makes it to human eyes, so a human-readable format didn't gain us anything.
2022-10-06 00:21:03 +00:00
|
|
|
if not ok then
|
|
|
|
return nil, "cannot serialize " .. tostring(t) .. " (" .. str .. ")"
|
|
|
|
end
|
|
|
|
return str
|
|
|
|
end,
|
|
|
|
|
|
|
|
deserialize = function(str)
|
|
|
|
local ok, t = serpent.load(str)
|
|
|
|
if not ok then
|
|
|
|
return nil, "malformed serialized data (" .. t .. ")"
|
|
|
|
end
|
|
|
|
return t
|
|
|
|
end,
|
2021-01-13 10:45:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
local Persist = {}
|
|
|
|
|
|
|
|
function Persist:new(o)
|
|
|
|
o = o or {}
|
|
|
|
assert(type(o.path) == "string", "path is required")
|
Logger: Use serpent instead of dump (#9588)
* Persist: support serpent, and use by default over dump (as we assume consistency > readability in Persist).
* Logger/Dbg: Use serpent instead of dump to dump tables (it's slightly more compact, honors __tostring, and will tag tables with their ref, which can come in handy when debugging).
* Dbg: Don't duplicate Logger's log function, just use it directly.
* Fontlist/ConfigDialog: Use serpent for the debug dump.
* Call `os.setlocale(C, "numeric")` on startup instead of peppering it around dump calls. It's process-wide, so it didn't make much sense.
* Trapper: Use LuaJIT's serde facilities instead of dump. They're more reliable in the face of funky input, much faster, and in this case, the data never makes it to human eyes, so a human-readable format didn't gain us anything.
2022-10-06 00:21:03 +00:00
|
|
|
o.codec = o.codec or "serpent"
|
2021-01-13 10:45:00 +00:00
|
|
|
setmetatable(o, self)
|
|
|
|
self.__index = self
|
|
|
|
return o
|
|
|
|
end
|
|
|
|
|
|
|
|
function Persist:exists()
|
|
|
|
local mode = lfs.attributes(self.path, "mode")
|
|
|
|
if mode then
|
|
|
|
return mode == "file"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
function Persist:timestamp()
|
|
|
|
return lfs.attributes(self.path, "modification")
|
|
|
|
end
|
|
|
|
|
|
|
|
function Persist:size()
|
|
|
|
return lfs.attributes(self.path, "size")
|
|
|
|
end
|
|
|
|
|
|
|
|
function Persist:load()
|
|
|
|
local t, err
|
|
|
|
if codecs[self.codec].reads_from_file then
|
|
|
|
t, err = codecs[self.codec].deserialize(self.path)
|
|
|
|
else
|
|
|
|
local str
|
|
|
|
str, err = readFile(self.path)
|
|
|
|
if not str then
|
|
|
|
return nil, err
|
|
|
|
end
|
|
|
|
t, err = codecs[self.codec].deserialize(str)
|
|
|
|
end
|
|
|
|
if not t then
|
|
|
|
return nil, err
|
|
|
|
end
|
2022-10-27 00:25:51 +00:00
|
|
|
|
|
|
|
self.loaded = true
|
2021-01-13 10:45:00 +00:00
|
|
|
return t
|
|
|
|
end
|
|
|
|
|
|
|
|
function Persist:save(t, as_bytecode)
|
2022-10-27 00:25:51 +00:00
|
|
|
local ok, err
|
2021-04-13 22:35:20 +00:00
|
|
|
if codecs[self.codec].writes_to_file then
|
2022-10-27 00:25:51 +00:00
|
|
|
ok, err = codecs[self.codec].serialize(t, as_bytecode, self.path)
|
2021-04-13 22:35:20 +00:00
|
|
|
if not ok then
|
|
|
|
return nil, err
|
|
|
|
end
|
|
|
|
else
|
2022-10-27 00:25:51 +00:00
|
|
|
ok, err = codecs[self.codec].serialize(t, as_bytecode)
|
|
|
|
if not ok then
|
2021-04-13 22:35:20 +00:00
|
|
|
return nil, err
|
|
|
|
end
|
|
|
|
local file
|
|
|
|
file, err = io.open(self.path, "wb")
|
|
|
|
if not file then
|
|
|
|
return nil, err
|
|
|
|
end
|
2022-10-27 00:25:51 +00:00
|
|
|
file:write(ok)
|
|
|
|
ffiUtil.fsyncOpenedFile(file)
|
2021-04-13 22:35:20 +00:00
|
|
|
file:close()
|
2021-01-13 10:45:00 +00:00
|
|
|
end
|
2022-10-27 00:25:51 +00:00
|
|
|
|
|
|
|
-- If we've just created the file, fsync the directory, too
|
|
|
|
if not self.loaded then
|
|
|
|
ffiUtil.fsyncDirectory(self.path)
|
|
|
|
self.loaded = true
|
|
|
|
end
|
|
|
|
|
|
|
|
-- c.f., note above, err is the on-disk size when writes_to_file is supported
|
|
|
|
return true, err
|
2021-01-13 10:45:00 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
function Persist:delete()
|
|
|
|
if not self:exists() then return end
|
|
|
|
return os.remove(self.path)
|
|
|
|
end
|
|
|
|
|
|
|
|
function Persist.getCodec(name)
|
|
|
|
for key, codec in pairs(codecs) do
|
Logger: Use serpent instead of dump (#9588)
* Persist: support serpent, and use by default over dump (as we assume consistency > readability in Persist).
* Logger/Dbg: Use serpent instead of dump to dump tables (it's slightly more compact, honors __tostring, and will tag tables with their ref, which can come in handy when debugging).
* Dbg: Don't duplicate Logger's log function, just use it directly.
* Fontlist/ConfigDialog: Use serpent for the debug dump.
* Call `os.setlocale(C, "numeric")` on startup instead of peppering it around dump calls. It's process-wide, so it didn't make much sense.
* Trapper: Use LuaJIT's serde facilities instead of dump. They're more reliable in the face of funky input, much faster, and in this case, the data never makes it to human eyes, so a human-readable format didn't gain us anything.
2022-10-06 00:21:03 +00:00
|
|
|
if key == name then
|
2021-01-13 10:45:00 +00:00
|
|
|
return codec
|
|
|
|
end
|
|
|
|
end
|
Logger: Use serpent instead of dump (#9588)
* Persist: support serpent, and use by default over dump (as we assume consistency > readability in Persist).
* Logger/Dbg: Use serpent instead of dump to dump tables (it's slightly more compact, honors __tostring, and will tag tables with their ref, which can come in handy when debugging).
* Dbg: Don't duplicate Logger's log function, just use it directly.
* Fontlist/ConfigDialog: Use serpent for the debug dump.
* Call `os.setlocale(C, "numeric")` on startup instead of peppering it around dump calls. It's process-wide, so it didn't make much sense.
* Trapper: Use LuaJIT's serde facilities instead of dump. They're more reliable in the face of funky input, much faster, and in this case, the data never makes it to human eyes, so a human-readable format didn't gain us anything.
2022-10-06 00:21:03 +00:00
|
|
|
return codecs["serpent"]
|
2021-01-13 10:45:00 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
return Persist
|