Cache: Fix a whole lot of things.

* Minor updates to the min & max cache sizes (16 & 64MB). Mostly to satisfy my power-of-two OCD.
  * Purge broken on-disk cache files
  * Optimize free RAM computations
  * Start dropping LRU items when running low on memory before pre-rendring (hinting) pages in non-reflowable documents.
  * Make serialize dump the most recently *displayed* page, as the actual MRU item is the most recently *hinted* page, not the current one.
  * Use more accurate item size estimations across the whole codebase.

TileCacheItem:

  * Drop lua-serialize in favor of Persist.

KoptInterface:

  * Drop lua-serialize in favor of Persist.
  * Make KOPTContext caching actually work by ensuring its hash is stable.
reviewable/pr7635/r1
NiLuJe 3 years ago
parent e7acec1526
commit ce624be8b8

@ -29,3 +29,12 @@ Android won't have a crash.log file because Google restricts what apps can log,
Please try to include the relevant sections in your issue description. Please try to include the relevant sections in your issue description.
You can upload the whole `crash.log` file on GitHub by dragging and You can upload the whole `crash.log` file on GitHub by dragging and
dropping it onto this textbox. dropping it onto this textbox.
If you instead opt to inline it, please do so behind a spoiler tag:
<details>
<summary>crash.log</summary>
```
<Paste crash.log content here>
```
</details>

@ -27,13 +27,13 @@ DHINTCOUNT = 1
DRENDER_MODE = 0 -- 0 is COLOUR DRENDER_MODE = 0 -- 0 is COLOUR
-- minimum cache size -- minimum cache size
DGLOBAL_CACHE_SIZE_MINIMUM = 1024*1024*10 DGLOBAL_CACHE_SIZE_MINIMUM = 1024*1024*16
-- proportion of system free memory used as global cache -- proportion of system free memory used as global cache
DGLOBAL_CACHE_FREE_PROPORTION = 0.4 DGLOBAL_CACHE_FREE_PROPORTION = 0.4
-- maximum cache size -- maximum cache size
DGLOBAL_CACHE_SIZE_MAXIMUM = 1024*1024*60 DGLOBAL_CACHE_SIZE_MAXIMUM = 1024*1024*64
-- background colour in non scroll mode: 8 = gray, 0 = white, 15 = black -- background colour in non scroll mode: 8 = gray, 0 = white, 15 = black
DBACKGROUND_COLOR = 0 DBACKGROUND_COLOR = 0

@ -458,9 +458,9 @@ function ReaderZooming:getZoom(pageno)
or self.zoom_factor or self.zoom_factor
zoom = zoom_w * zoom_factor zoom = zoom_w * zoom_factor
end end
if zoom and zoom > 10 and not Cache:willAccept(zoom * (self.dimen.w * self.dimen.h + 64)) then if zoom and zoom > 10 and not Cache:willAccept(zoom * (self.dimen.w * self.dimen.h + 512)) then
logger.dbg("zoom too large, adjusting") logger.dbg("zoom too large, adjusting")
while not Cache:willAccept(zoom * (self.dimen.w * self.dimen.h + 64)) do while not Cache:willAccept(zoom * (self.dimen.w * self.dimen.h + 512)) do
if zoom > 100 then if zoom > 100 then
zoom = zoom - 50 zoom = zoom - 50
elseif zoom > 10 then elseif zoom > 10 then

@ -733,13 +733,13 @@ function ReaderUI:onClose(full_refresh)
if self.dialog ~= self then if self.dialog ~= self then
self:saveSettings() self:saveSettings()
end end
-- serialize last used items for later launch
Cache:serialize()
if self.document ~= nil then if self.document ~= nil then
logger.dbg("closing document") logger.dbg("closing document")
self:notifyCloseDocument() self:notifyCloseDocument()
end end
UIManager:close(self.dialog, full_refresh and "full") UIManager:close(self.dialog, full_refresh and "full")
-- serialize last used items for later launch
Cache:serialize()
if _running_instance == self then if _running_instance == self then
_running_instance = nil _running_instance = nil
end end

@ -12,28 +12,99 @@ if CanvasContext.should_restrict_JIT then
jit.off(true, true) jit.off(true, true)
end end
-- For documentation purposes, here's a battle-tested shell version of calcFreeMem
--[[
if grep -q 'MemAvailable' /proc/meminfo ; then
# We'll settle for 85% of available memory to leave a bit of breathing room
tmpfs_size="$(awk '/MemAvailable/ {printf "%d", $2 * 0.85}' /proc/meminfo)"
elif grep -q 'Inactive(file)' /proc/meminfo ; then
# Basically try to emulate the kernel's computation, c.f., https://unix.stackexchange.com/q/261247
# Again, 85% of available memory
tmpfs_size="$(awk -v low=$(grep low /proc/zoneinfo | awk '{k+=$2}END{printf "%d", k}') \
'{a[$1]=$2}
END{
printf "%d", (a["MemFree:"]+a["Active(file):"]+a["Inactive(file):"]+a["SReclaimable:"]-(12*low))*0.85;
}' /proc/meminfo)"
else
# Ye olde crap workaround of Free + Buffers + Cache...
# Take it with a grain of salt, and settle for 80% of that...
tmpfs_size="$(awk \
'{a[$1]=$2}
END{
printf "%d", (a["MemFree:"]+a["Buffers:"]+a["Cached:"])*0.80;
}' /proc/meminfo)"
fi
--]]
-- And here's our simplified Lua version...
local function calcFreeMem() local function calcFreeMem()
local memtotal, memfree, memavailable, buffers, cached
local meminfo = io.open("/proc/meminfo", "r") local meminfo = io.open("/proc/meminfo", "r")
local freemem = 0
if meminfo then if meminfo then
for line in meminfo:lines() do for line in meminfo:lines() do
local free, buffer, cached, n if not memtotal then
free, n = line:gsub("^MemFree:%s-(%d+) kB", "%1") memtotal = line:match("^MemTotal:%s-(%d+) kB")
if n ~= 0 then freemem = freemem + tonumber(free)*1024 end if memtotal then
buffer, n = line:gsub("^Buffers:%s-(%d+) kB", "%1") -- Next!
if n ~= 0 then freemem = freemem + tonumber(buffer)*1024 end goto continue
cached, n = line:gsub("^Cached:%s-(%d+) kB", "%1") end
if n ~= 0 then freemem = freemem + tonumber(cached)*1024 end end
if not memfree then
memfree = line:match("^MemFree:%s-(%d+) kB")
if memfree then
-- Next!
goto continue
end
end
if not memavailable then
memavailable = line:match("^MemAvailable:%s-(%d+) kB")
if memavailable then
-- Best case scenario, we're done :)
break
end
end
if not buffers then
buffers = line:match("^Buffers:%s-(%d+) kB")
if buffers then
-- Next!
goto continue
end
end
if not cached then
cached = line:match("^Cached:%s-(%d+) kB")
if cached then
-- Ought to be the last entry we care about, we're done
break
end
end
::continue::
end end
meminfo:close() meminfo:close()
else
-- Not on Linux?
return 0, 0
end
if memavailable then
-- Leave a bit of margin, and report 85% of that...
return math.floor(memavailable * 0.85) * 1024, memtotal * 1024
else
-- Crappy Free + Buffers + Cache version, because the zoneinfo approach is a tad hairy...
-- So, leave an even larger margin, and only report 75% of that...
return math.floor((memfree + buffers + cached) * 0.75) * 1024, memtotal * 1024
end end
return freemem
end end
local function calcCacheMemSize() local function calcCacheMemSize()
local min = DGLOBAL_CACHE_SIZE_MINIMUM local min = DGLOBAL_CACHE_SIZE_MINIMUM
local max = DGLOBAL_CACHE_SIZE_MAXIMUM local max = DGLOBAL_CACHE_SIZE_MAXIMUM
local calc = calcFreeMem()*(DGLOBAL_CACHE_FREE_PROPORTION or 0) local calc = calcFreeMem() * (DGLOBAL_CACHE_FREE_PROPORTION or 0)
return math.min(max, math.max(min, calc)) return math.min(max, math.max(min, calc))
end end
@ -45,7 +116,7 @@ local cache_path = DataStorage:getDataDir() .. "/cache/"
local function getDiskCache() local function getDiskCache()
local cached = {} local cached = {}
for key_md5 in lfs.dir(cache_path) do for key_md5 in lfs.dir(cache_path) do
local file = cache_path..key_md5 local file = cache_path .. key_md5
if lfs.attributes(file, "mode") == "file" then if lfs.attributes(file, "mode") == "file" then
cached[key_md5] = file cached[key_md5] = file
end end
@ -78,13 +149,13 @@ function Cache:_unref(key)
for i = #self.cache_order, 1, -1 do for i = #self.cache_order, 1, -1 do
if self.cache_order[i] == key then if self.cache_order[i] == key then
table.remove(self.cache_order, i) table.remove(self.cache_order, i)
break
end end
end end
end end
-- internal: free cache item -- internal: free cache item
function Cache:_free(key) function Cache:_free(key)
if not self.cache[key] then return end
self.current_memsize = self.current_memsize - self.cache[key].size self.current_memsize = self.current_memsize - self.cache[key].size
self.cache[key]:onFree() self.cache[key]:onFree()
self.cache[key] = nil self.cache[key] = nil
@ -92,6 +163,8 @@ end
-- drop an item named via key from the cache -- drop an item named via key from the cache
function Cache:drop(key) function Cache:drop(key)
if not self.cache[key] then return end
self:_unref(key) self:_unref(key)
self:_free(key) self:_free(key)
end end
@ -99,31 +172,37 @@ end
function Cache:insert(key, object) function Cache:insert(key, object)
-- make sure that one key only exists once: delete existing -- make sure that one key only exists once: delete existing
self:drop(key) self:drop(key)
-- guarantee that we have enough memory in cache -- If this object is single-handledly too large for the cache, we're done
if (object.size > self.max_memsize) then if object.size > self.max_memsize then
logger.warn("too much memory claimed for", key) logger.warn("Too much memory would be claimed by caching", key)
return return
end end
-- delete objects that least recently used -- If inserting this obect would blow the cache's watermark,
-- start dropping least recently used items first.
-- (they are at the end of the cache_order array) -- (they are at the end of the cache_order array)
while self.current_memsize + object.size > self.max_memsize do while self.current_memsize + object.size > self.max_memsize do
local removed_key = table.remove(self.cache_order) local removed_key = table.remove(self.cache_order)
self:_free(removed_key) if removed_key then
self:_free(removed_key)
else
logger.warn("Cache accounting is broken")
break
end
end end
-- insert new object in front of the LRU order -- Insert new object in front of the LRU order
table.insert(self.cache_order, 1, key) table.insert(self.cache_order, 1, key)
self.cache[key] = object self.cache[key] = object
self.current_memsize = self.current_memsize + object.size self.current_memsize = self.current_memsize + object.size
end end
--[[ --[[
-- check for cache item for key -- check for cache item by key
-- if ItemClass is given, disk cache is also checked. -- if ItemClass is given, disk cache is also checked.
--]] --]]
function Cache:check(key, ItemClass) function Cache:check(key, ItemClass)
if self.cache[key] then if self.cache[key] then
if self.cache_order[1] ~= key then if self.cache_order[1] ~= key then
-- put key in front of the LRU list -- Move key in front of the LRU list (i.e., MRU)
self:_unref(key) self:_unref(key)
table.insert(self.cache_order, 1, key) table.insert(self.cache_order, 1, key)
end end
@ -137,57 +216,77 @@ function Cache:check(key, ItemClass)
self:insert(key, item) self:insert(key, item)
return item return item
else else
logger.warn("discard cache", msg) logger.warn("Failed to load on-disk cache:", msg)
--- It's apparently unusable, purge it and refresh the snapshot.
os.remove(cached)
self:refreshSnapshot()
end end
end end
end end
end end
function Cache:willAccept(size) function Cache:willAccept(size)
-- we only allow single objects to fill 75% of the cache -- We only allow single objects to fill 75% of the cache
if size*4 < self.max_memsize*3 then return size*4 < self.max_memsize*3
return true
end
end end
function Cache:serialize() function Cache:serialize()
-- calculate disk cache size -- Calculate the current disk cache size
local cached_size = 0 local cached_size = 0
local sorted_caches = {} local sorted_caches = {}
for _, file in pairs(self.cached) do for _, file in pairs(self.cached) do
table.insert(sorted_caches, {file=file, time=lfs.attributes(file, "access")}) table.insert(sorted_caches, {file=file, time=lfs.attributes(file, "access")})
cached_size = cached_size + (lfs.attributes(file, "size") or 0) cached_size = cached_size + (lfs.attributes(file, "size") or 0)
end end
table.sort(sorted_caches, function(v1,v2) return v1.time > v2.time end) table.sort(sorted_caches, function(v1, v2) return v1.time > v2.time end)
-- only serialize the most recently used cache
local cache_size = 0 -- Only serialize the second most recently used cache item (as the MRU would be the *hinted* page).
local mru_key
local mru_found = 0
for _, key in ipairs(self.cache_order) do for _, key in ipairs(self.cache_order) do
local cache_item = self.cache[key] local cache_item = self.cache[key]
-- only dump cache item that requests serialization explicitly -- Only dump cache items that actually request persistence
if cache_item.persistent and cache_item.dump then if cache_item.persistent and cache_item.dump then
local cache_full_path = cache_path..md5(key) mru_key = key
local cache_file_exists = lfs.attributes(cache_full_path) mru_found = mru_found + 1
if mru_found >= 2 then
if cache_file_exists then break end -- We found the second MRU item, i.e., the *displayed* page
break
end
end
end
if mru_key then
local cache_full_path = cache_path .. md5(mru_key)
local cache_file_exists = lfs.attributes(cache_full_path)
logger.dbg("dump cache item", key) if not cache_file_exists then
cache_size = cache_item:dump(cache_full_path) or 0 logger.dbg("Dumping cache item", mru_key)
if cache_size > 0 then break end local cache_item = self.cache[mru_key]
local cache_size = cache_item:dump(cache_full_path)
if cache_size then
cached_size = cached_size + cache_size
end
end end
end end
-- set disk cache the same limit as memory cache
while cached_size + cache_size - self.max_memsize > 0 do -- Allocate the same amount of storage to the disk cache than the memory cache
while cached_size > self.max_memsize do
-- discard the least recently used cache -- discard the least recently used cache
local discarded = table.remove(sorted_caches) local discarded = table.remove(sorted_caches)
cached_size = cached_size - lfs.attributes(discarded.file, "size") if discarded then
os.remove(discarded.file) cached_size = cached_size - lfs.attributes(discarded.file, "size")
os.remove(discarded.file)
else
logger.warn("Cache accounting is broken")
break
end
end end
-- disk cache may have changes so need to refresh disk cache snapshot -- We may have updated the disk cache's content, so refresh its state
self.cached = getDiskCache() self:refreshSnapshot()
end end
-- blank the cache -- Blank the cache
function Cache:clear() function Cache:clear()
for k, _ in pairs(self.cache) do for k, _ in pairs(self.cache) do
self.cache[k]:onFree() self.cache[k]:onFree()
@ -197,9 +296,41 @@ function Cache:clear()
self.current_memsize = 0 self.current_memsize = 0
end end
-- Terribly crappy workaround: evict half the cache if we appear to be redlining on free RAM...
function Cache:memoryPressureCheck()
local memfree, memtotal = calcFreeMem()
-- Nonsensical values? (!Linux), skip this.
if memtotal == 0 then
return
end
-- If less that 20% of the total RAM is free, drop half the Cache...
if memfree / memtotal < 0.20 then
logger.warn("Running low on memory, evicting half of the cache...")
for i = #self.cache_order / 2, 1, -1 do
local removed_key = table.remove(self.cache_order)
self:_free(removed_key)
end
-- And finish by forcing a GC sweep now...
collectgarbage()
collectgarbage()
end
end
-- Refresh the disk snapshot (mainly used by ui/data/onetime_migration) -- Refresh the disk snapshot (mainly used by ui/data/onetime_migration)
function Cache:refreshSnapshot() function Cache:refreshSnapshot()
self.cached = getDiskCache() self.cached = getDiskCache()
end end
-- Evict the disk cache (ditto)
function Cache:clearDiskCache()
for _, file in pairs(self.cached) do
os.remove(file)
end
self:refreshSnapshot()
end
return Cache return Cache

@ -3,8 +3,11 @@ Inheritable abstraction for cache items
--]] --]]
local CacheItem = { local CacheItem = {
size = 64, -- some reasonable default for simple Lua values / small tables size = 128, -- some reasonable default for a small table.
} }
--- NOTE: As far as size estimations go, the assumption is that a key, value pair should roughly take two words,
--- and the most common items we cache are Geom-like tables (i.e., 4 key-value pairs).
--- That's generally a low estimation, especially for larger tables, where memory allocation trickery may be happening.
function CacheItem:new(o) function CacheItem:new(o)
o = o or {} o = o or {}

@ -1,3 +1,5 @@
local ffiUtil = require("ffi/util")
local Configurable = {} local Configurable = {}
function Configurable:new(o) function Configurable:new(o)
@ -8,7 +10,7 @@ function Configurable:new(o)
end end
function Configurable:reset() function Configurable:reset()
for key,value in pairs(self) do for key, value in pairs(self) do
local value_type = type(value) local value_type = type(value)
if value_type == "number" or value_type == "string" then if value_type == "number" or value_type == "string" then
self[key] = nil self[key] = nil
@ -18,7 +20,7 @@ end
function Configurable:hash(sep) function Configurable:hash(sep)
local hash = "" local hash = ""
for key,value in pairs(self) do for key, value in ffiUtil.orderedPairs(self) do
local value_type = type(value) local value_type = type(value)
if value_type == "number" or value_type == "string" then if value_type == "number" or value_type == "string" then
hash = hash..sep..value hash = hash..sep..value
@ -31,7 +33,7 @@ function Configurable:loadDefaults(config_options)
-- reset configurable before loading new options -- reset configurable before loading new options
self:reset() self:reset()
local prefix = config_options.prefix.."_" local prefix = config_options.prefix.."_"
for i=1,#config_options do for i=1, #config_options do
local options = config_options[i].options local options = config_options[i].options
for j=1,#options do for j=1,#options do
local key = options[j].name local key = options[j].name
@ -46,7 +48,7 @@ function Configurable:loadDefaults(config_options)
end end
function Configurable:loadSettings(settings, prefix) function Configurable:loadSettings(settings, prefix)
for key,value in pairs(self) do for key, value in pairs(self) do
local value_type = type(value) local value_type = type(value)
if value_type == "number" or value_type == "string" if value_type == "number" or value_type == "string"
or value_type == "table" then or value_type == "table" then
@ -59,7 +61,7 @@ function Configurable:loadSettings(settings, prefix)
end end
function Configurable:saveSettings(settings, prefix) function Configurable:saveSettings(settings, prefix)
for key,value in pairs(self) do for key, value in pairs(self) do
local value_type = type(value) local value_type = type(value)
if value_type == "number" or value_type == "string" if value_type == "number" or value_type == "string"
or value_type == "table" then or value_type == "table" then

@ -93,9 +93,19 @@ end
-- this might be overridden by a document implementation -- this might be overridden by a document implementation
function Document:close() function Document:close()
local DocumentRegistry = require("document/documentregistry") local DocumentRegistry = require("document/documentregistry")
if self.is_open and DocumentRegistry:closeDocument(self.file) == 0 then if self.is_open then
self.is_open = false if DocumentRegistry:closeDocument(self.file) == 0 then
self._document:close() self.is_open = false
self._document:close()
self._document = nil
-- NOTE: DocumentRegistry:openDocument will force a GC sweep the next time we open a Document.
-- MµPDF will also do a bit of spring cleaning of its internal cache when opening a *different* document.
else
logger.warn("Tried to close a document with *multiple* remaining hot references")
end
else
logger.warn("Tried to close an already closed document")
end end
end end
@ -375,7 +385,7 @@ function Document:renderPage(pageno, rect, zoom, rotation, gamma, render_mode)
-- this will be the size we actually render -- this will be the size we actually render
local size = page_size local size = page_size
-- we prefer to render the full page, if it fits into cache -- we prefer to render the full page, if it fits into cache
if not Cache:willAccept(size.w * size.h + 64) then if not Cache:willAccept(size.w * size.h * (self.render_color and 4 or 1) + 512) then
-- whole page won't fit into cache -- whole page won't fit into cache
logger.dbg("rendering only part of the page") logger.dbg("rendering only part of the page")
--- @todo figure out how to better segment the page --- @todo figure out how to better segment the page
@ -392,11 +402,11 @@ function Document:renderPage(pageno, rect, zoom, rotation, gamma, render_mode)
-- prepare cache item with contained blitbuffer -- prepare cache item with contained blitbuffer
tile = TileCacheItem:new{ tile = TileCacheItem:new{
persistent = true, persistent = true,
size = size.w * size.h + 64, -- estimation
excerpt = size, excerpt = size,
pageno = pageno, pageno = pageno,
bb = Blitbuffer.new(size.w, size.h, self.render_color and self.color_bb_type or nil) bb = Blitbuffer.new(size.w, size.h, self.render_color and self.color_bb_type or nil)
} }
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
-- create a draw context -- create a draw context
local dc = DrawContext.new() local dc = DrawContext.new()
@ -429,6 +439,9 @@ end
-- a hint for the cache engine to paint a full page to the cache -- a hint for the cache engine to paint a full page to the cache
--- @todo this should trigger a background operation --- @todo this should trigger a background operation
function Document:hintPage(pageno, zoom, rotation, gamma, render_mode) function Document:hintPage(pageno, zoom, rotation, gamma, render_mode)
--- @note: Crappy safeguard around memory issues like in #7627: if we're eating too much RAM, drop half the cache...
Cache:memoryPressureCheck()
logger.dbg("hinting page", pageno) logger.dbg("hinting page", pageno)
self:renderPage(pageno, nil, zoom, rotation, gamma, render_mode) self:renderPage(pageno, nil, zoom, rotation, gamma, render_mode)
end end

@ -10,9 +10,9 @@ local DEBUG = require("dbg")
local Document = require("document/document") local Document = require("document/document")
local Geom = require("ui/geometry") local Geom = require("ui/geometry")
local KOPTContext = require("ffi/koptcontext") local KOPTContext = require("ffi/koptcontext")
local Persist = require("persist")
local TileCacheItem = require("document/tilecacheitem") local TileCacheItem = require("document/tilecacheitem")
local logger = require("logger") local logger = require("logger")
local serial = require("serialize")
local util = require("ffi/util") local util = require("ffi/util")
local KoptInterface = { local KoptInterface = {
@ -36,16 +36,41 @@ end
function ContextCacheItem:dump(filename) function ContextCacheItem:dump(filename)
if self.kctx:isPreCache() == 0 then if self.kctx:isPreCache() == 0 then
logger.dbg("dumping koptcontext to", filename) logger.dbg("Dumping KOPTContext to", filename)
return serial.dump(self.size, KOPTContext.totable(self.kctx), filename)
local cache_file = Persist:new{
path = filename,
codec = "zstd",
}
local t = KOPTContext.totable(self.kctx)
t.cache_size = self.size
local ok, size = cache_file:save(t)
if ok then
return size
else
logger.warn("Failed to dump KOPTContext")
return nil
end
end end
end end
function ContextCacheItem:load(filename) function ContextCacheItem:load(filename)
logger.dbg("loading koptcontext from", filename) logger.dbg("Loading KOPTContext from", filename)
local size, kc_table = serial.load(filename)
self.size = size local cache_file = Persist:new{
self.kctx = KOPTContext.fromtable(kc_table) path = filename,
codec = "zstd",
}
local t = cache_file:load(filename)
if t then
self.size = t.cache_size
self.kctx = KOPTContext.fromtable(t)
else
logger.warn("Failed to load KOPTContext")
end
end end
local OCREngine = CacheItem:new{} local OCREngine = CacheItem:new{}
@ -154,7 +179,8 @@ Auto detect bbox.
function KoptInterface:getAutoBBox(doc, pageno) function KoptInterface:getAutoBBox(doc, pageno)
local native_size = Document.getNativePageDimensions(doc, pageno) local native_size = Document.getNativePageDimensions(doc, pageno)
local bbox = { local bbox = {
x0 = 0, y0 = 0, x0 = 0,
y0 = 0,
x1 = native_size.w, x1 = native_size.w,
y1 = native_size.h, y1 = native_size.h,
} }
@ -172,7 +198,7 @@ function KoptInterface:getAutoBBox(doc, pageno)
else else
bbox = Document.getPageBBox(doc, pageno) bbox = Document.getPageBBox(doc, pageno)
end end
Cache:insert(hash, CacheItem:new{ autobbox = bbox }) Cache:insert(hash, CacheItem:new{ autobbox = bbox, size = 160 })
page:close() page:close()
kc:free() kc:free()
return bbox return bbox
@ -207,7 +233,7 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
auto_bbox = bbox auto_bbox = bbox
end end
page:close() page:close()
Cache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox }) Cache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox, size = 160 })
kc:free() kc:free()
return auto_bbox return auto_bbox
else else
@ -240,7 +266,7 @@ function KoptInterface:getCachedContext(doc, pageno)
--self:logReflowDuration(pageno, dur) --self:logReflowDuration(pageno, dur)
local fullwidth, fullheight = kc:getPageDim() local fullwidth, fullheight = kc:getPageDim()
logger.dbg("reflowed page", pageno, "fullwidth:", fullwidth, "fullheight:", fullheight) logger.dbg("reflowed page", pageno, "fullwidth:", fullwidth, "fullheight:", fullheight)
self.last_context_size = fullwidth * fullheight + 128 -- estimation self.last_context_size = fullwidth * fullheight + 3072 -- estimation
Cache:insert(kctx_hash, ContextCacheItem:new{ Cache:insert(kctx_hash, ContextCacheItem:new{
persistent = true, persistent = true,
size = self.last_context_size, size = self.last_context_size,
@ -251,7 +277,7 @@ function KoptInterface:getCachedContext(doc, pageno)
-- wait for background thread -- wait for background thread
local kc = self:waitForContext(cached.kctx) local kc = self:waitForContext(cached.kctx)
local fullwidth, fullheight = kc:getPageDim() local fullwidth, fullheight = kc:getPageDim()
self.last_context_size = fullwidth * fullheight + 128 -- estimation self.last_context_size = fullwidth * fullheight + 3072 -- estimation
return kc return kc
end end
end end
@ -312,20 +338,20 @@ function KoptInterface:renderReflowedPage(doc, pageno, rect, zoom, rotation, ren
local cached = Cache:check(renderpg_hash) local cached = Cache:check(renderpg_hash)
if not cached then if not cached then
-- do the real reflowing if kctx is not been cached yet -- do the real reflowing if kctx has not been cached yet
local kc = self:getCachedContext(doc, pageno) local kc = self:getCachedContext(doc, pageno)
local fullwidth, fullheight = kc:getPageDim() local fullwidth, fullheight = kc:getPageDim()
if not Cache:willAccept(fullwidth * fullheight / 2) then if not Cache:willAccept(fullwidth * fullheight) then
-- whole page won't fit into cache -- whole page won't fit into cache
error("aborting, since we don't have enough cache for this page") error("aborting, since we don't have enough cache for this page")
end end
-- prepare cache item with contained blitbuffer -- prepare cache item with contained blitbuffer
local tile = TileCacheItem:new{ local tile = TileCacheItem:new{
size = fullwidth * fullheight + 64, -- estimation
excerpt = Geom:new{ w = fullwidth, h = fullheight }, excerpt = Geom:new{ w = fullwidth, h = fullheight },
pageno = pageno, pageno = pageno,
} }
tile.bb = kc:dstToBlitBuffer() tile.bb = kc:dstToBlitBuffer()
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
Cache:insert(renderpg_hash, tile) Cache:insert(renderpg_hash, tile)
return tile return tile
else else
@ -363,7 +389,6 @@ function KoptInterface:renderOptimizedPage(doc, pageno, rect, zoom, rotation, re
-- prepare cache item with contained blitbuffer -- prepare cache item with contained blitbuffer
local tile = TileCacheItem:new{ local tile = TileCacheItem:new{
persistent = true, persistent = true,
size = fullwidth * fullheight + 64, -- estimation
excerpt = Geom:new{ excerpt = Geom:new{
x = 0, y = 0, x = 0, y = 0,
w = fullwidth, w = fullwidth,
@ -372,6 +397,7 @@ function KoptInterface:renderOptimizedPage(doc, pageno, rect, zoom, rotation, re
pageno = pageno, pageno = pageno,
} }
tile.bb = kc:dstToBlitBuffer() tile.bb = kc:dstToBlitBuffer()
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
kc:free() kc:free()
Cache:insert(renderpg_hash, tile) Cache:insert(renderpg_hash, tile)
return tile return tile
@ -478,8 +504,8 @@ function KoptInterface:getReflowedTextBoxes(doc, pageno)
local kc = self:waitForContext(cached.kctx) local kc = self:waitForContext(cached.kctx)
--kc:setDebug() --kc:setDebug()
local fullwidth, fullheight = kc:getPageDim() local fullwidth, fullheight = kc:getPageDim()
local boxes = kc:getReflowedWordBoxes("dst", 0, 0, fullwidth, fullheight) local boxes, nr_word = kc:getReflowedWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes }) Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes, size = 192 * nr_word }) -- estimation
return boxes return boxes
end end
else else
@ -502,8 +528,8 @@ function KoptInterface:getNativeTextBoxes(doc, pageno)
local kc = self:waitForContext(cached.kctx) local kc = self:waitForContext(cached.kctx)
--kc:setDebug() --kc:setDebug()
local fullwidth, fullheight = kc:getPageDim() local fullwidth, fullheight = kc:getPageDim()
local boxes = kc:getNativeWordBoxes("dst", 0, 0, fullwidth, fullheight) local boxes, nr_word = kc:getNativeWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ nativepgboxes = boxes }) Cache:insert(hash, CacheItem:new{ nativepgboxes = boxes, size = 192 * nr_word }) -- estimation
return boxes return boxes
end end
else else
@ -529,8 +555,8 @@ function KoptInterface:getReflowedTextBoxesFromScratch(doc, pageno)
local fullwidth, fullheight = reflowed_kc:getPageDim() local fullwidth, fullheight = reflowed_kc:getPageDim()
local kc = self:createContext(doc, pageno) local kc = self:createContext(doc, pageno)
kc:copyDestBMP(reflowed_kc) kc:copyDestBMP(reflowed_kc)
local boxes = kc:getNativeWordBoxes("dst", 0, 0, fullwidth, fullheight) local boxes, nr_word = kc:getNativeWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes }) Cache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes, size = 192 * nr_word }) -- estimation
kc:free() kc:free()
return boxes return boxes
end end
@ -575,8 +601,8 @@ function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno)
kc:setZoom(1.0) kc:setZoom(1.0)
local page = doc._document:openPage(pageno) local page = doc._document:openPage(pageno)
page:getPagePix(kc) page:getPagePix(kc)
local boxes = kc:getNativeWordBoxes("src", 0, 0, page_size.w, page_size.h) local boxes, nr_word = kc:getNativeWordBoxes("src", 0, 0, page_size.w, page_size.h)
Cache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes }) Cache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes, size = 192 * nr_word }) -- estimation
page:close() page:close()
kc:free() kc:free()
return boxes return boxes
@ -607,7 +633,7 @@ function KoptInterface:getPageBlock(doc, pageno, x, y)
local page = doc._document:openPage(pageno) local page = doc._document:openPage(pageno)
page:getPagePix(kc) page:getPagePix(kc)
kc:findPageBlocks() kc:findPageBlocks()
Cache:insert(hash, CacheItem:new{ kctx = kc }) Cache:insert(hash, CacheItem:new{ kctx = kc, size = 3072 }) -- estimation
page:close() page:close()
kctx = kc kctx = kc
else else
@ -621,7 +647,7 @@ Get word from OCR providing selected word box.
--]] --]]
function KoptInterface:getOCRWord(doc, pageno, wbox) function KoptInterface:getOCRWord(doc, pageno, wbox)
if not Cache:check(self.ocrengine) then if not Cache:check(self.ocrengine) then
Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new() }) Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
end end
if doc.configurable.text_wrap == 1 then if doc.configurable.text_wrap == 1 then
return self:getReflewOCRWord(doc, pageno, wbox.sbox) return self:getReflewOCRWord(doc, pageno, wbox.sbox)
@ -648,7 +674,7 @@ function KoptInterface:getReflewOCRWord(doc, pageno, rect)
kc.getTOCRWord, kc, "dst", kc.getTOCRWord, kc, "dst",
rect.x, rect.y, rect.w, rect.h, rect.x, rect.y, rect.w, rect.h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1) self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ rfocrword = word }) Cache:insert(hash, CacheItem:new{ rfocrword = word, size = #word + 64 }) -- estimation
return word return word
end end
else else
@ -681,7 +707,7 @@ function KoptInterface:getNativeOCRWord(doc, pageno, rect)
kc.getTOCRWord, kc, "src", kc.getTOCRWord, kc, "src",
0, 0, word_w, word_h, 0, 0, word_w, word_h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1) self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ ocrword = word }) Cache:insert(hash, CacheItem:new{ ocrword = word, size = #word + 64 }) -- estimation
logger.dbg("word", word) logger.dbg("word", word)
page:close() page:close()
kc:free() kc:free()
@ -696,7 +722,7 @@ Get text from OCR providing selected text boxes.
--]] --]]
function KoptInterface:getOCRText(doc, pageno, tboxes) function KoptInterface:getOCRText(doc, pageno, tboxes)
if not Cache:check(self.ocrengine) then if not Cache:check(self.ocrengine) then
Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new() }) Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
end end
logger.info("Not implemented yet") logger.info("Not implemented yet")
end end

@ -152,9 +152,9 @@ function PdfDocument:getUsedBBox(pageno)
if used.x1 > pwidth then used.x1 = pwidth end if used.x1 > pwidth then used.x1 = pwidth end
if used.y0 < 0 then used.y0 = 0 end if used.y0 < 0 then used.y0 = 0 end
if used.y1 > pheight then used.y1 = pheight end if used.y1 > pheight then used.y1 = pheight end
--- @todo Give size for cacheitem? 02.12 2012 (houqp)
Cache:insert(hash, CacheItem:new{ Cache:insert(hash, CacheItem:new{
ubbox = used, ubbox = used,
size = 256, -- might be closer to 160
}) })
page:close() page:close()
return used return used
@ -170,6 +170,7 @@ function PdfDocument:getPageLinks(pageno)
local links = page:getPageLinks() local links = page:getPageLinks()
Cache:insert(hash, CacheItem:new{ Cache:insert(hash, CacheItem:new{
links = links, links = links,
size = 64 + (8 * 32 * #links),
}) })
page:close() page:close()
return links return links

@ -1,6 +1,6 @@
local Blitbuffer = require("ffi/blitbuffer") local Blitbuffer = require("ffi/blitbuffer")
local CacheItem = require("cacheitem") local CacheItem = require("cacheitem")
local serial = require("serialize") local Persist = require("persist")
local logger = require("logger") local logger = require("logger")
local TileCacheItem = CacheItem:new{} local TileCacheItem = CacheItem:new{}
@ -12,19 +12,65 @@ function TileCacheItem:onFree()
end end
end end
--- @note: Perhaps one day we'll be able to teach bitser or string.buffer about custom structs with pointers to buffers,
--- so we won't have to do the BB tostring/fromstring dance anymore...
function TileCacheItem:totable()
local t = {
size = self.size,
pageno = self.pageno,
excerpt = self.excerpt,
persistent = self.persistent,
bb = {
w = self.bb.w,
h = self.bb.h,
stride = tonumber(self.bb.stride),
fmt = self.bb:getType(),
data = Blitbuffer.tostring(self.bb),
},
}
return t
end
function TileCacheItem:dump(filename) function TileCacheItem:dump(filename)
logger.dbg("dumping tile cache to", filename, self.excerpt) logger.dbg("Dumping tile cache to", filename, self.excerpt)
return serial.dump(self.size, self.excerpt, self.pageno,
self.bb.w, self.bb.h, tonumber(self.bb.stride), self.bb:getType(), local cache_file = Persist:new{
Blitbuffer.tostring(self.bb), filename) path = filename,
codec = "zstd",
}
local ok, size = cache_file:save(self:totable())
if ok then
return size
else
logger.warn("Failed to dump tile cache")
return nil
end
end
function TileCacheItem:fromtable(t)
self.size = t.size
self.pageno = t.pageno
self.excerpt = t.excerpt
self.persistent = t.persistent
self.bb = Blitbuffer.fromstring(t.bb.w, t.bb.h, t.bb.fmt, t.bb.data, t.bb.stride)
end end
function TileCacheItem:load(filename) function TileCacheItem:load(filename)
local w, h, stride, bb_type, bb_data local cache_file = Persist:new{
self.size, self.excerpt, self.pageno, path = filename,
w, h, stride, bb_type, bb_data = serial.load(filename) codec = "zstd",
self.bb = Blitbuffer.fromstring(w, h, bb_type, bb_data, stride) }
logger.dbg("loading tile cache from", filename, self)
local t = cache_file:load(filename)
if t then
self:fromtable(t)
logger.dbg("Loaded tile cache from", filename, self)
else
logger.warn("Failed to load tile cache from", filename)
end
end end
return TileCacheItem return TileCacheItem

@ -94,7 +94,8 @@ local codecs = {
C.fclose(f) C.fclose(f)
C.free(cbuff) C.free(cbuff)
return true --- @note: Slight API extension for TileCacheItem, which needs to know the on-disk size, and saves us a :size() call
return true, clen
end, end,
deserialize = function(path) deserialize = function(path)
@ -216,6 +217,9 @@ function Persist:save(t, as_bytecode)
if not ok then if not ok then
return nil, err return nil, err
end end
-- c.f., note above, err is the on-disk size
return true, err
else else
local str, err = codecs[self.codec].serialize(t, as_bytecode) local str, err = codecs[self.codec].serialize(t, as_bytecode)
if not str then if not str then

@ -7,7 +7,7 @@ local lfs = require("libs/libkoreader-lfs")
local logger = require("logger") local logger = require("logger")
-- Date at which the last migration snippet was added -- Date at which the last migration snippet was added
local CURRENT_MIGRATION_DATE = 20210414 local CURRENT_MIGRATION_DATE = 20210503
-- Retrieve the date of the previous migration, if any -- Retrieve the date of the previous migration, if any
local last_migration_date = G_reader_settings:readSetting("last_migration_date", 0) local last_migration_date = G_reader_settings:readSetting("last_migration_date", 0)
@ -209,5 +209,13 @@ if last_migration_date < 20210414 then
end end
end end
-- Cache, migration to Persist, https://github.com/koreader/koreader/pull/7624
if last_migration_date < 20210503 then
logger.info("Performing one-time migration for 20210503")
local Cache = require("cache")
Cache:clearDiskCache()
end
-- We're done, store the current migration date -- We're done, store the current migration date
G_reader_settings:saveSetting("last_migration_date", CURRENT_MIGRATION_DATE) G_reader_settings:saveSetting("last_migration_date", CURRENT_MIGRATION_DATE)

@ -113,7 +113,7 @@ function RenderText:getGlyph(face, charcode, bold)
return return
end end
glyph = CacheItem:new{rendered_glyph} glyph = CacheItem:new{rendered_glyph}
glyph.size = glyph[1].bb:getWidth() * glyph[1].bb:getHeight() / 2 + 32 glyph.size = tonumber(glyph[1].bb.stride) * glyph[1].bb.h + 320
GlyphCache:insert(hash, glyph) GlyphCache:insert(hash, glyph)
return rendered_glyph return rendered_glyph
end end
@ -314,7 +314,7 @@ function RenderText:getGlyphByIndex(face, glyphindex, bold)
return return
end end
glyph = CacheItem:new{rendered_glyph} glyph = CacheItem:new{rendered_glyph}
glyph.size = glyph[1].bb:getWidth() * glyph[1].bb:getHeight() / 2 + 32 glyph.size = tonumber(glyph[1].bb.stride) * glyph[1].bb.h + 320
GlyphCache:insert(hash, glyph) GlyphCache:insert(hash, glyph)
return rendered_glyph return rendered_glyph
end end

@ -39,7 +39,7 @@ end
local DPI_SCALE = get_dpi_scale() local DPI_SCALE = get_dpi_scale()
local ImageCache = Cache:new{ local ImageCache = Cache:new{
max_memsize = 5*1024*1024, -- 5M of image cache max_memsize = 8*1024*1024, -- 8M of image cache
current_memsize = 0, current_memsize = 0,
cache = {}, cache = {},
-- this will hold the LRU order of the cache -- this will hold the LRU order of the cache

@ -30,7 +30,7 @@ local CatalogCacheItem = CacheItem:new{
-- cache catalog parsed from feed xml -- cache catalog parsed from feed xml
local CatalogCache = Cache:new{ local CatalogCache = Cache:new{
max_memsize = 20*1024, -- keep only 20 cache items max_memsize = 20*1024, -- keep only 20 items
current_memsize = 0, current_memsize = 0,
cache = {}, cache = {},
cache_order = {}, cache_order = {},

Loading…
Cancel
Save