2
0
mirror of https://github.com/koreader/koreader synced 2024-11-10 01:10:34 +00:00
koreader/frontend/persist.lua

288 lines
8.3 KiB
Lua
Raw Normal View History

2021-01-13 10:45:00 +00:00
local bitser = require("ffi/bitser")
local buffer = require("string.buffer")
2021-01-13 10:45:00 +00:00
local dump = require("dump")
local ffi = require("ffi")
local ffiUtil = require("ffi/util")
2021-01-13 10:45:00 +00:00
local lfs = require("libs/libkoreader-lfs")
local logger = require("logger")
local serpent = require("ffi/serpent")
local zstd = require("ffi/zstd")
local C = ffi.C
require("ffi/posix_h")
2021-01-13 10:45:00 +00:00
local function readFile(file, bytes)
local f, str, err
f, err = io.open(file, "rb")
if not f then
return nil, err
end
str, err = f:read(bytes or "*a")
f:close()
if not str then
return nil, err
end
return str
end
local codecs = {
-- bitser: binary format, fast encode/decode, low size. Not human readable.
2021-01-13 10:45:00 +00:00
bitser = {
id = "bitser",
reads_from_file = false,
writes_to_file = false,
2021-01-13 10:45:00 +00:00
serialize = function(t)
local ok, str = pcall(bitser.dumps, t)
if not ok then
return nil, "cannot serialize " .. tostring(t) .. " (" .. str .. ")"
2021-01-13 10:45:00 +00:00
end
return str
end,
deserialize = function(str)
local ok, t = pcall(bitser.loads, str)
if not ok then
return nil, "malformed serialized data: " .. t
end
return t
end,
},
-- luajit: binary format, optimized for speed, not size (combine w/ zstd if necessary). Not human readable.
-- Slightly larger on-disk representation than bitser, *much* faster to decode, slightly faster to encode.
luajit = {
id = "luajit",
reads_from_file = false,
writes_to_file = false,
serialize = function(t)
local ok, str = pcall(buffer.encode, t)
if not ok then
return nil, "cannot serialize " .. tostring(t) .. " (" .. str .. ")"
end
return str
end,
deserialize = function(str)
local ok, t = pcall(buffer.decode, str)
if not ok then
return nil, "malformed serialized data (" .. t .. ")"
2021-01-13 10:45:00 +00:00
end
return t
end,
},
-- zstd: luajit, but compressed w/ zstd ;). Much smaller, at a very small performance cost (decompressing is *fast*).
zstd = {
id = "zstd",
reads_from_file = true,
writes_to_file = true,
serialize = function(t, as_bytecode, path)
local ok, str = pcall(buffer.encode, t)
if not ok then
return nil, "cannot serialize " .. tostring(t) .. " (" .. str .. ")"
end
local f = C.fopen(path, "wb")
if f == nil then
return nil, "fopen: " .. ffi.string(C.strerror(ffi.errno()))
end
local cbuff, clen = zstd.zstd_compress(str, #str)
if C.fwrite(cbuff, 1, clen, f) < clen then
C.fclose(f)
C.free(cbuff)
return nil, "failed to write file"
end
C.fflush(f)
C.fsync(C.fileno(f))
C.fclose(f)
C.free(cbuff)
--- @note: Slight API extension for TileCacheItem, which needs to know the on-disk size, and saves us a :size() call
return true, clen
end,
deserialize = function(path)
local f = C.fopen(path, "rb")
if f == nil then
return nil, "fopen: " .. ffi.string(C.strerror(ffi.errno()))
end
local size = lfs.attributes(path, "size")
-- NOTE: In a perfect world, we'd just mmap the file.
-- But that's problematic on a portability level: while mmap is POSIX, implementations differ,
-- and some old platforms don't support mmap-on-vfat (Legacy Kindle) :'(.
local data = C.malloc(size)
if data == nil then
C.fclose(f)
return nil, "failed to allocate read buffer"
end
if C.fread(data, 1, size, f) < size or C.ferror(f) ~= 0 then
C.free(data)
C.fclose(f)
return nil, "failed to read file"
end
C.fclose(f)
local buff, ulen = zstd.zstd_uncompress(data, size)
C.free(data)
local str = ffi.string(buff, ulen)
C.free(buff)
local ok, t = pcall(buffer.decode, str)
if not ok then
return nil, "malformed serialized data (" .. t .. ")"
end
return t
end,
},
-- dump: human readable, pretty printed, fast enough for most use cases.
2021-01-13 10:45:00 +00:00
dump = {
id = "dump",
reads_from_file = true,
writes_to_file = false,
2021-01-13 10:45:00 +00:00
serialize = function(t, as_bytecode)
local content
if as_bytecode then
local bytecode, err = load("return " .. dump(t))
if not bytecode then
logger.warn("cannot convert table to bytecode", err, "fallback to text")
else
content = string.dump(bytecode, true)
end
end
if not content then
content = "return " .. dump(t)
end
return content
end,
deserialize = function(str)
local t, err = loadfile(str)
if not t then
t, err = loadstring(str)
end
if not t then
return nil, err
end
return t()
end,
},
-- serpent: human readable (-ish), more thorough than dump (in particular, supports serializing functions)
-- NOTE: if you want pretty printing, pass { sortkeys = true, compact = false, indent = " " } to serpent's second arg.
serpent = {
id = "serpent",
reads_from_file = false,
writes_to_file = false,
serialize = function(t)
2022-10-06 03:56:12 +00:00
local ok, str = pcall(serpent.dump, t)
if not ok then
return nil, "cannot serialize " .. tostring(t) .. " (" .. str .. ")"
end
return str
end,
deserialize = function(str)
local ok, t = serpent.load(str)
if not ok then
return nil, "malformed serialized data (" .. t .. ")"
end
return t
end,
2021-01-13 10:45:00 +00:00
}
}
local Persist = {}
function Persist:new(o)
o = o or {}
assert(type(o.path) == "string", "path is required")
o.codec = o.codec or "serpent"
2021-01-13 10:45:00 +00:00
setmetatable(o, self)
self.__index = self
return o
end
function Persist:exists()
local mode = lfs.attributes(self.path, "mode")
if mode then
return mode == "file"
end
end
function Persist:timestamp()
return lfs.attributes(self.path, "modification")
end
function Persist:size()
return lfs.attributes(self.path, "size")
end
function Persist:load()
local t, err
if codecs[self.codec].reads_from_file then
t, err = codecs[self.codec].deserialize(self.path)
else
local str
str, err = readFile(self.path)
if not str then
return nil, err
end
t, err = codecs[self.codec].deserialize(str)
end
if not t then
return nil, err
end
self.loaded = true
2021-01-13 10:45:00 +00:00
return t
end
function Persist:save(t, as_bytecode)
local ok, err
if codecs[self.codec].writes_to_file then
ok, err = codecs[self.codec].serialize(t, as_bytecode, self.path)
if not ok then
return nil, err
end
else
ok, err = codecs[self.codec].serialize(t, as_bytecode)
if not ok then
return nil, err
end
local file
file, err = io.open(self.path, "wb")
if not file then
return nil, err
end
file:write(ok)
ffiUtil.fsyncOpenedFile(file)
file:close()
2021-01-13 10:45:00 +00:00
end
-- If we've just created the file, fsync the directory, too
if not self.loaded then
ffiUtil.fsyncDirectory(self.path)
self.loaded = true
end
-- c.f., note above, err is the on-disk size when writes_to_file is supported
return true, err
2021-01-13 10:45:00 +00:00
end
function Persist:delete()
if not self:exists() then return end
return os.remove(self.path)
end
function Persist.getCodec(name)
for key, codec in pairs(codecs) do
if key == name then
2021-01-13 10:45:00 +00:00
return codec
end
end
return codecs["serpent"]
2021-01-13 10:45:00 +00:00
end
return Persist