2
0
mirror of https://github.com/koreader/koreader synced 2024-11-10 01:10:34 +00:00
koreader/plugins/calibre.koplugin/metadata.lua

285 lines
8.4 KiB
Lua
Raw Normal View History

2020-10-12 15:20:16 +00:00
--[[--
This module implements functions for loading, saving and editing calibre metadata files.
2020-10-12 15:20:16 +00:00
Calibre uses JSON to store metadata on device after each wired transfer.
In wireless transfers calibre sends the same metadata to the client, which is in charge
of storing it.
@module koplugin.calibre.metadata
--]]--
local lfs = require("libs/libkoreader-lfs")
local rapidjson = require("rapidjson")
local logger = require("logger")
local parser = require("parser")
local util = require("util")
local time = require("ui/time")
local used_metadata = {
"uuid",
"lpath",
"last_modified",
"size",
"title",
"authors",
"tags",
"series",
"series_index"
}
-- The search metadata cache requires an even smaller subset
local search_used_metadata = {
"lpath",
"size",
"title",
"authors",
"tags",
"series",
"series_index"
}
local function slim(book, is_search)
local slim_book = rapidjson.object({})
for _, k in ipairs(is_search and search_used_metadata or used_metadata) do
if k == "series" or k == "series_index" then
slim_book[k] = book[k] or rapidjson.null
elseif k == "tags" or k == "authors" then
slim_book[k] = book[k] or rapidjson.array({})
else
slim_book[k] = book[k]
end
end
return slim_book
end
-- This is the max file size we attempt to decode using rapidjson.
-- For larger files we use a sax parser to avoid OOM errors
local MAX_JSON_FILESIZE = 50 * 1024 * 1024
--- find calibre files for a given dir
local function findCalibreFiles(dir)
local function existOrLast(file)
local fullname
local options = { file, "." .. file }
for _, option in ipairs(options) do
fullname = dir .. "/" .. option
if util.fileExists(fullname) then
return true, fullname
end
end
return false, fullname
end
local ok_meta, file_meta = existOrLast("metadata.calibre")
local ok_drive, file_drive = existOrLast("driveinfo.calibre")
return ok_meta, ok_drive, file_meta, file_drive
end
local CalibreMetadata = {
-- info about the library itself. It should
-- hold a table with the contents of "driveinfo.calibre"
drive = rapidjson.array({}),
-- info about the books in this library. It should
-- hold a table with the contents of "metadata.calibre"
books = rapidjson.array({}),
}
--- loads driveinfo from JSON file
function CalibreMetadata:loadDeviceInfo(file)
if not file then file = self.driveinfo end
local json, err = rapidjson.load(file)
if not json then
logger.warn("Unable to load device info from JSON file:", err)
return {}
end
return json
end
-- saves driveinfo to JSON file
function CalibreMetadata:saveDeviceInfo(arg)
-- keep previous device name. This allow us to identify the calibre driver used.
-- "Folder" is used by connect to folder
-- "KOReader" is used by smart device app
-- "Amazon", "Kobo", "Bq" ... are used by platform device drivers
local previous_name = self.drive.device_name
self.drive = arg
if previous_name then
self.drive.device_name = previous_name
end
rapidjson.dump(self.drive, self.driveinfo)
end
-- loads books' metadata from JSON file
function CalibreMetadata:loadBookList()
local attr = lfs.attributes(self.metadata)
if not attr then
logger.warn("Unable to get file attributes from JSON file:", self.metadata)
return rapidjson.array({})
end
local valid = attr.mode == "file" and attr.size > 0
if not valid then
logger.warn("File is invalid", self.metadata)
return rapidjson.array({})
end
local books, err
local impl = G_reader_settings:readSetting("calibre_json_parser") or attr.size > MAX_JSON_FILESIZE and "safe" or "fast"
if impl == "fast" then
books, err = rapidjson.load_calibre(self.metadata)
elseif impl == "safe" then
books, err = parser.parseFile(self.metadata)
else
books, err = rapidjson.load(self.metadata)
end
if not books then
logger.warn(string.format("Unable to load library from json file %s: \n%s",
self.metadata, err))
return rapidjson.array({})
end
return books
end
-- saves books' metadata to JSON file
function CalibreMetadata:saveBookList()
local file = self.metadata
local books = self.books
rapidjson.dump(books, file, { pretty = true })
end
-- add a book to our books table
function CalibreMetadata:addBook(book)
-- prevent duplicate entries
local _, index = self:getBookUuid(book.lpath)
if index then
self.books[index] = slim(book)
else
table.insert(self.books, #self.books + 1, slim(book))
end
end
-- remove a book from our books table
function CalibreMetadata:removeBook(lpath)
local function drop_lpath(t, i, j)
return t[i].lpath ~= lpath
end
util.arrayRemove(self.books, drop_lpath)
end
-- gets the uuid and index of a book from its path
function CalibreMetadata:getBookUuid(lpath)
for index, book in ipairs(self.books) do
if book.lpath == lpath then
return book.uuid, index
end
end
return "none"
end
-- gets the book id at the given index
function CalibreMetadata:getBookId(index)
local book = {}
book.priKey = index
for _, key in ipairs({"uuid", "lpath", "last_modified"}) do
book[key] = self.books[index][key]
end
return book
end
-- gets the book metadata at the given index
function CalibreMetadata:getBookMetadata(index)
return self.books[index]
end
-- removes deleted books from table
function CalibreMetadata:prune()
local count = 0
for index, book in ipairs(self.books) do
local path = self.path .. "/" .. book.lpath
if not util.fileExists(path) then
logger.dbg("prunning book from DB at index", index, "path", path)
self:removeBook(book.lpath)
count = count + 1
end
end
if count > 0 then
self:saveBookList()
end
return count
end
-- removes unused metadata from books
function CalibreMetadata:cleanUnused(is_search)
for index, book in ipairs(self.books) do
self.books[index] = slim(book, is_search)
end
-- We don't want to stomp on the library's actual JSON db for metadata searches.
if is_search then
return
end
self:saveBookList()
end
-- cleans all temp data stored for current library.
function CalibreMetadata:clean()
self.books = rapidjson.array({})
self.drive = rapidjson.array({})
self.path = nil
self.driveinfo = nil
self.metadata = nil
end
-- get keys from driveinfo.calibre
function CalibreMetadata:getDeviceInfo(dir, kind)
if not dir or not kind then return end
local _, ok_drive, __, driveinfo = findCalibreFiles(dir)
if not ok_drive then return end
local drive = self:loadDeviceInfo(driveinfo)
if drive then
return drive[kind]
end
end
-- initialize a directory as a calibre library.
-- This is the main function. Call it to initialize a calibre library
-- in a given path. It will find calibre files if they're on disk and
-- try to load info from them.
The great Input/GestureDetector/TimeVal spring cleanup (a.k.a., a saner main loop) (#7415) * ReaderDictionary: Port delay computations to TimeVal * ReaderHighlight: Port delay computations to TimeVal * ReaderView: Port delay computations to TimeVal * Android: Reset gesture detection state on APP_CMD_TERM_WINDOW. This prevents potentially being stuck in bogus gesture states when switching apps. * GestureDetector: * Port delay computations to TimeVal * Fixed delay computations to handle time warps (large and negative deltas). * Simplified timed callback handling to invalidate timers much earlier, preventing accumulating useless timers that no longer have any chance of ever detecting a gesture. * Fixed state clearing to handle the actual effective slots, instead of hard-coding slot 0 & slot 1. * Simplified timed callback handling in general, and added support for a timerfd backend for better performance and accuracy. * The improved timed callback handling allows us to detect and honor (as much as possible) the three possible clock sources usable by Linux evdev events. The only case where synthetic timestamps are used (and that only to handle timed callbacks) is limited to non-timerfd platforms where input events use a clock source that is *NOT* MONOTONIC. AFAICT, that's pretty much... PocketBook, and that's it? * Input: * Use the <linux/input.h> FFI module instead of re-declaring every constant * Fixed (verbose) debug logging of input events to actually translate said constants properly. * Completely reset gesture detection state on suspend. This should prevent bogus gesture detection on resume. * Refactored the waitEvent loop to make it easier to comprehend (hopefully) and much more efficient. Of specific note, it no longer does a crazy select spam every 100µs, instead computing and relying on sane timeouts, as afforded by switching the UI event/input loop to the MONOTONIC time base, and the refactored timed callbacks in GestureDetector. * reMarkable: Stopped enforcing synthetic timestamps on input events, as it should no longer be necessary. * TimeVal: * Refactored and simplified, especially as far as metamethods are concerned (based on <bsd/sys/time.h>). * Added a host of new methods to query the various POSIX clock sources, and made :now default to MONOTONIC. * Removed the debug guard in __sub, as time going backwards can be a perfectly normal occurrence. * New methods: * Clock sources: :realtime, :monotonic, :monotonic_coarse, :realtime_coarse, :boottime * Utility: :tonumber, :tousecs, :tomsecs, :fromnumber, :isPositive, :isZero * UIManager: * Ported event loop & scheduling to TimeVal, and switched to the MONOTONIC time base. This ensures reliable and consistent scheduling, as time is ensured never to go backwards. * Added a :getTime() method, that returns a cached TimeVal:now(), updated at the top of every UI frame. It's used throughout the codebase to cadge a syscall in circumstances where we are guaranteed that a syscall would return a mostly identical value, because very few time has passed. The only code left that does live syscalls does it because it's actually necessary for accuracy, and the only code left that does that in a REALTIME time base is code that *actually* deals with calendar time (e.g., Statistics). * DictQuickLookup: Port delay computations to TimeVal * FootNoteWidget: Port delay computations to TimeVal * HTMLBoxWidget: Port delay computations to TimeVal * Notification: Port delay computations to TimeVal * TextBoxWidget: Port delay computations to TimeVal * AutoSuspend: Port to TimeVal * AutoTurn: * Fix it so that settings are actually honored. * Port to TimeVal * BackgroundRunner: Port to TimeVal * Calibre: Port benchmarking code to TimeVal * BookInfoManager: Removed unnecessary yield in the metadata extraction subprocess now that subprocesses get scheduled properly. * All in all, these changes reduced the CPU cost of a single tap by a factor of ten (!), and got rid of an insane amount of weird poll/wakeup cycles that must have been hell on CPU schedulers and batteries..
2021-03-30 00:57:59 +00:00
-- NOTE: Take special notice of the books table, because it could be huge.
-- If you're not working with the metadata directly (ie: in wireless connections)
-- you should copy relevant data to another table and free this one to keep things tidy.
function CalibreMetadata:init(dir, is_search)
if not dir then return end
local start_time = time.now()
self.path = dir
local ok_meta, ok_drive, file_meta, file_drive = findCalibreFiles(dir)
self.driveinfo = file_drive
if ok_drive then
self.drive = self:loadDeviceInfo()
end
self.metadata = file_meta
if ok_meta then
self.books = self:loadBookList()
elseif is_search then
-- no metadata to search
return false
end
local msg
if is_search then
self:cleanUnused(is_search)
The great Input/GestureDetector/TimeVal spring cleanup (a.k.a., a saner main loop) (#7415) * ReaderDictionary: Port delay computations to TimeVal * ReaderHighlight: Port delay computations to TimeVal * ReaderView: Port delay computations to TimeVal * Android: Reset gesture detection state on APP_CMD_TERM_WINDOW. This prevents potentially being stuck in bogus gesture states when switching apps. * GestureDetector: * Port delay computations to TimeVal * Fixed delay computations to handle time warps (large and negative deltas). * Simplified timed callback handling to invalidate timers much earlier, preventing accumulating useless timers that no longer have any chance of ever detecting a gesture. * Fixed state clearing to handle the actual effective slots, instead of hard-coding slot 0 & slot 1. * Simplified timed callback handling in general, and added support for a timerfd backend for better performance and accuracy. * The improved timed callback handling allows us to detect and honor (as much as possible) the three possible clock sources usable by Linux evdev events. The only case where synthetic timestamps are used (and that only to handle timed callbacks) is limited to non-timerfd platforms where input events use a clock source that is *NOT* MONOTONIC. AFAICT, that's pretty much... PocketBook, and that's it? * Input: * Use the <linux/input.h> FFI module instead of re-declaring every constant * Fixed (verbose) debug logging of input events to actually translate said constants properly. * Completely reset gesture detection state on suspend. This should prevent bogus gesture detection on resume. * Refactored the waitEvent loop to make it easier to comprehend (hopefully) and much more efficient. Of specific note, it no longer does a crazy select spam every 100µs, instead computing and relying on sane timeouts, as afforded by switching the UI event/input loop to the MONOTONIC time base, and the refactored timed callbacks in GestureDetector. * reMarkable: Stopped enforcing synthetic timestamps on input events, as it should no longer be necessary. * TimeVal: * Refactored and simplified, especially as far as metamethods are concerned (based on <bsd/sys/time.h>). * Added a host of new methods to query the various POSIX clock sources, and made :now default to MONOTONIC. * Removed the debug guard in __sub, as time going backwards can be a perfectly normal occurrence. * New methods: * Clock sources: :realtime, :monotonic, :monotonic_coarse, :realtime_coarse, :boottime * Utility: :tonumber, :tousecs, :tomsecs, :fromnumber, :isPositive, :isZero * UIManager: * Ported event loop & scheduling to TimeVal, and switched to the MONOTONIC time base. This ensures reliable and consistent scheduling, as time is ensured never to go backwards. * Added a :getTime() method, that returns a cached TimeVal:now(), updated at the top of every UI frame. It's used throughout the codebase to cadge a syscall in circumstances where we are guaranteed that a syscall would return a mostly identical value, because very few time has passed. The only code left that does live syscalls does it because it's actually necessary for accuracy, and the only code left that does that in a REALTIME time base is code that *actually* deals with calendar time (e.g., Statistics). * DictQuickLookup: Port delay computations to TimeVal * FootNoteWidget: Port delay computations to TimeVal * HTMLBoxWidget: Port delay computations to TimeVal * Notification: Port delay computations to TimeVal * TextBoxWidget: Port delay computations to TimeVal * AutoSuspend: Port to TimeVal * AutoTurn: * Fix it so that settings are actually honored. * Port to TimeVal * BackgroundRunner: Port to TimeVal * Calibre: Port benchmarking code to TimeVal * BookInfoManager: Removed unnecessary yield in the metadata extraction subprocess now that subprocesses get scheduled properly. * All in all, these changes reduced the CPU cost of a single tap by a factor of ten (!), and got rid of an insane amount of weird poll/wakeup cycles that must have been hell on CPU schedulers and batteries..
2021-03-30 00:57:59 +00:00
msg = string.format("(search) in %.3f milliseconds: %d books",
time.to_ms(time.since(start_time)), #self.books)
else
local deleted_count = self:prune()
self:cleanUnused()
The great Input/GestureDetector/TimeVal spring cleanup (a.k.a., a saner main loop) (#7415) * ReaderDictionary: Port delay computations to TimeVal * ReaderHighlight: Port delay computations to TimeVal * ReaderView: Port delay computations to TimeVal * Android: Reset gesture detection state on APP_CMD_TERM_WINDOW. This prevents potentially being stuck in bogus gesture states when switching apps. * GestureDetector: * Port delay computations to TimeVal * Fixed delay computations to handle time warps (large and negative deltas). * Simplified timed callback handling to invalidate timers much earlier, preventing accumulating useless timers that no longer have any chance of ever detecting a gesture. * Fixed state clearing to handle the actual effective slots, instead of hard-coding slot 0 & slot 1. * Simplified timed callback handling in general, and added support for a timerfd backend for better performance and accuracy. * The improved timed callback handling allows us to detect and honor (as much as possible) the three possible clock sources usable by Linux evdev events. The only case where synthetic timestamps are used (and that only to handle timed callbacks) is limited to non-timerfd platforms where input events use a clock source that is *NOT* MONOTONIC. AFAICT, that's pretty much... PocketBook, and that's it? * Input: * Use the <linux/input.h> FFI module instead of re-declaring every constant * Fixed (verbose) debug logging of input events to actually translate said constants properly. * Completely reset gesture detection state on suspend. This should prevent bogus gesture detection on resume. * Refactored the waitEvent loop to make it easier to comprehend (hopefully) and much more efficient. Of specific note, it no longer does a crazy select spam every 100µs, instead computing and relying on sane timeouts, as afforded by switching the UI event/input loop to the MONOTONIC time base, and the refactored timed callbacks in GestureDetector. * reMarkable: Stopped enforcing synthetic timestamps on input events, as it should no longer be necessary. * TimeVal: * Refactored and simplified, especially as far as metamethods are concerned (based on <bsd/sys/time.h>). * Added a host of new methods to query the various POSIX clock sources, and made :now default to MONOTONIC. * Removed the debug guard in __sub, as time going backwards can be a perfectly normal occurrence. * New methods: * Clock sources: :realtime, :monotonic, :monotonic_coarse, :realtime_coarse, :boottime * Utility: :tonumber, :tousecs, :tomsecs, :fromnumber, :isPositive, :isZero * UIManager: * Ported event loop & scheduling to TimeVal, and switched to the MONOTONIC time base. This ensures reliable and consistent scheduling, as time is ensured never to go backwards. * Added a :getTime() method, that returns a cached TimeVal:now(), updated at the top of every UI frame. It's used throughout the codebase to cadge a syscall in circumstances where we are guaranteed that a syscall would return a mostly identical value, because very few time has passed. The only code left that does live syscalls does it because it's actually necessary for accuracy, and the only code left that does that in a REALTIME time base is code that *actually* deals with calendar time (e.g., Statistics). * DictQuickLookup: Port delay computations to TimeVal * FootNoteWidget: Port delay computations to TimeVal * HTMLBoxWidget: Port delay computations to TimeVal * Notification: Port delay computations to TimeVal * TextBoxWidget: Port delay computations to TimeVal * AutoSuspend: Port to TimeVal * AutoTurn: * Fix it so that settings are actually honored. * Port to TimeVal * BackgroundRunner: Port to TimeVal * Calibre: Port benchmarking code to TimeVal * BookInfoManager: Removed unnecessary yield in the metadata extraction subprocess now that subprocesses get scheduled properly. * All in all, these changes reduced the CPU cost of a single tap by a factor of ten (!), and got rid of an insane amount of weird poll/wakeup cycles that must have been hell on CPU schedulers and batteries..
2021-03-30 00:57:59 +00:00
msg = string.format("in %.3f milliseconds: %d books. %d pruned",
time.to_ms(time.since(start_time)), #self.books, deleted_count)
end
logger.info(string.format("calibre info loaded from disk %s", msg))
return true
end
return CalibreMetadata