2
0
mirror of https://github.com/koreader/koreader synced 2024-11-10 01:10:34 +00:00
koreader/frontend/document/document.lua
NiLuJe 46a5d20513
Document: Do not cache panel-zoom tiles to disk and fix their caching and rendering (#12303)
* Use a dedicated cache hash for partial tiles from panel-zoom
* Never dump them to disk, as it confuses DocCache's crappy heuristics that rewinds the cache to skip over the hinted page to try to dump the on-screen page to disk.
* Apply the zoom factor in the exact same way as any other page rect (i.e., floor coordinates, ceil dimensions), and make sure said rect is actually a Geom so it doesn't break the cache hash, which relies on Geom's custom tostring method for rects. Said scaling method *also* belongs to the Geom class anyway.
* Handle such pre-scaled rects properly in renderPage, so as not to apply the zoom factor to the full page, which would attempt to create a gigantic buffer.
* And now that the rect is rendered properly in an appropriately-sized buffer, use the rendered tile as-is, no need to blit it to another (potentially way too large because of the above issue) blank BB.
* The zoom factor is now computed for a scale to best-fit (honoring `imageviewer_rotate_auto_for_best_fit`), ensuring the best efficiency (ImageViewer won't have to re-scale).
* Cache: Reduce the maximum item size to 50% of the cache, instead of 75%.
* Warn about the legacy ReaderRotation module, as it turned out to be horribly broken. The whole machinery (which is spread over *a lot* of various codepaths) is left as-is, peppered with notes & fixmes hinting at the problem. Thankfully, that's not how we actually handle rotation, so it was probably hardly ever used (which possibly explains why nobody ever noticed it breaking, and that nugget possibly dates back to the inception of the kpv -> ko refactor!). (#12309)
2024-08-08 04:52:24 +02:00

638 lines
20 KiB
Lua

local Blitbuffer = require("ffi/blitbuffer")
local CacheItem = require("cacheitem")
local Configurable = require("configurable")
local DocCache = require("document/doccache")
local DrawContext = require("ffi/drawcontext")
local CanvasContext = require("document/canvascontext")
local Geom = require("ui/geometry")
local Math = require("optmath")
local TileCacheItem = require("document/tilecacheitem")
local lfs = require("libs/libkoreader-lfs")
local logger = require("logger")
--[[
This is an abstract interface to a document
]]--
local Document = {
-- file name
file = nil,
-- engine instance
_document = nil,
links = nil, -- table
GAMMA_NO_GAMMA = 1.0,
-- override bbox from orignal page's getUsedBBox
bbox = nil, -- table
-- flag to show whether the document was opened successfully
is_open = false,
-- flag to show that the document needs to be unlocked by a password
is_locked = false,
-- flag to show that the document is edited and needs to write back to disk
is_edited = false,
-- whether this document can be rendered in color
is_color_capable = true,
-- bb type needed by engine for color rendering
color_bb_type = Blitbuffer.TYPE_BBRGB32,
-- image content stats, if supported by the engine
_drawn_images_count = nil,
_drawn_images_surface_ratio = nil,
}
function Document:extend(subclass_prototype)
local o = subclass_prototype or {}
setmetatable(o, self)
self.__index = self
return o
end
function Document:new(o)
o = self:extend(o)
if o._init then o:_init() end
if o.init then o:init() end
return o
end
-- base document initialization should be called on each document init
function Document:_init()
self.links = {}
self.bbox = {}
self.configurable = Configurable:new{}
self.info = {
-- whether the document is pageable
has_pages = false,
-- whether words can be provided
has_words = false,
-- whether hyperlinks can be provided
has_hyperlinks = false,
-- whether (native to format) annotations can be provided
has_annotations = false,
-- whether pages can be rotated
is_rotatable = false,
number_of_pages = 0,
-- if not pageable, length of the document in pixels
doc_height = 0,
-- other metadata
title = "",
author = "",
date = ""
}
-- Should be updated by a call to Document.updateColorRendering(self) in subclasses
self.render_color = false
-- Those may be updated via KOptOptions, or the DitheringUpdate event.
-- Whether HW dithering is enabled
self.hw_dithering = false
-- Whether SW dithering is enabled
self.sw_dithering = false
-- Zero-init those to be able to drop the nil guards at runtime
self._drawn_images_count = 0
self._drawn_images_surface_ratio = 0
end
-- override this method to open a document
function Document:init()
end
-- this might be overridden by a document implementation
function Document:unlock(password)
-- return true instead when the password provided unlocked the document
return false
end
-- this might be overridden by a document implementation
-- (in which case, do make sure it calls this one, too, to avoid refcounting mismatches in DocumentRegistry!)
-- Returns true if the Document instance needs to be destroyed (no more live refs),
-- false if not (i.e., we've just decreased the refcount, so, leave internal engine data alone).
-- nil if all hell broke loose.
function Document:close()
local DocumentRegistry = require("document/documentregistry")
if self.is_open then
local refcount = DocumentRegistry:closeDocument(self.file)
if refcount == 0 then
self.is_open = false
self._document:close()
self._document = nil
-- NOTE: DocumentRegistry:openDocument will force a GC sweep the next time we open a Document.
-- MµPDF will also do a bit of spring cleaning of its internal cache when opening a *different* document.
return true
else
-- This can happen in perfectly sane contexts (i.e., Reader -> History -> View fullsize cover on the *same* book).
logger.dbg("Document: Decreased refcount to", refcount, "for", self.file)
return false
end
else
logger.warn("Tried to close an already closed document:", self.file)
return nil
end
end
-- check if document is edited and needs to write to disk
function Document:isEdited()
return self.is_edited
end
-- discard change will set is_edited flag to false and implematation of Document
-- should check the is_edited flag before writing document
function Document:discardChange()
self.is_edited = false
end
-- this might be overridden by a document implementation
function Document:getNativePageDimensions(pageno)
local hash = "pgdim|"..self.file.."|"..self.mod_time.."|"..pageno
local cached = DocCache:check(hash)
if cached then
return cached[1]
end
local page = self._document:openPage(pageno)
local page_size_w, page_size_h = page:getSize(self.dc_null)
local page_size = Geom:new{ w = page_size_w, h = page_size_h }
DocCache:insert(hash, CacheItem:new{ page_size })
page:close()
return page_size
end
function Document:getDocumentProps()
-- pdfdocument, djvudocument
return self._document:getMetadata()
-- credocument, picdocument - overridden by a document implementation
end
function Document:getProps(cached_doc_metadata)
local function makeNilIfEmpty(str)
if str == "" then
return nil
end
return str
end
local props = cached_doc_metadata or self:getDocumentProps()
local title = makeNilIfEmpty(props.title or props.Title)
local authors = makeNilIfEmpty(props.authors or props.author or props.Author)
local series = makeNilIfEmpty(props.series or props.Series)
local series_index
if series and string.find(series, "#") then
-- If there's a series index in there, split it off to series_index, and only store the name in series.
-- This property is currently only set by:
-- * DjVu, for which I couldn't find a real standard for metadata fields
-- (we currently use Series for this field, c.f., https://exiftool.org/TagNames/DjVu.html).
-- * CRe, which could offer us a split getSeriesName & getSeriesNumber...
-- except getSeriesNumber does an atoi, so it'd murder decimal values.
-- So, instead, parse how it formats the whole thing as a string ;).
local series_name
series_name, series_index = series:match("(.*) #(%d+%.?%d-)$")
if series_index then
series = series_name
series_index = tonumber(series_index)
end
end
local language = makeNilIfEmpty(props.language or props.Language)
local keywords = makeNilIfEmpty(props.keywords or props.Keywords)
local description = makeNilIfEmpty(props.description or props.Description or props.subject)
local identifiers = makeNilIfEmpty(props.identifiers)
return {
title = title,
authors = authors,
series = series,
series_index = series_index,
language = language,
keywords = keywords,
description = description,
identifiers = identifiers,
}
end
function Document:_readMetadata()
self.mod_time = lfs.attributes(self.file, "modification")
self.info.number_of_pages = self._document:getPages()
return true
end
function Document:getPageCount()
return self.info.number_of_pages
end
-- Some functions that look quite silly, but they can be
-- overridden for document types that support separate flows
-- (e.g. CreDocument)
function Document:hasNonLinearFlows()
return false
end
function Document:hasHiddenFlows()
return false
end
function Document:getNextPage(page)
local new_page = page + 1
return (new_page > 0 and new_page <= self.info.number_of_pages) and new_page or 0
end
function Document:getPrevPage(page)
if page == 0 then return self.info.number_of_pages end
local new_page = page - 1
return (new_page > 0 and new_page <= self.info.number_of_pages) and new_page or 0
end
function Document:getTotalPagesLeft(page)
return self.info.number_of_pages - page
end
function Document:getPageFlow(page)
return 0
end
function Document:getFirstPageInFlow(flow)
return 1
end
function Document:getTotalPagesInFlow(flow)
return self.info.number_of_pages
end
function Document:getPageNumberInFlow(page)
return page
end
-- Transform a given rect according to the specified zoom & rotation
function Document:transformRect(native_rect, zoom, rotation)
local rect = native_rect:copy()
if rotation == 90 or rotation == 270 then
-- switch orientation
rect.w, rect.h = rect.h, rect.w
end
-- Apply the zoom factor, and round to integer in a sensible manner
rect:transformByScale(zoom)
return rect
end
-- Ditto, but we get the input rect from the full page dimensions for a given page number
function Document:getPageDimensions(pageno, zoom, rotation)
local native_rect = self:getNativePageDimensions(pageno)
return self:transformRect(native_rect, zoom, rotation)
end
function Document:getPageBBox(pageno)
local bbox = self.bbox[pageno] -- exact
if bbox ~= nil then
return bbox
else
local oddEven = Math.oddEven(pageno)
bbox = self.bbox[oddEven] -- odd/even
end
if bbox ~= nil then -- last used up to this page
return bbox
else
for i = 0,pageno do
bbox = self.bbox[ pageno - i ]
if bbox ~= nil then
return bbox
end
end
end
if bbox == nil then -- fallback bbox
bbox = self:getUsedBBox(pageno)
end
return bbox
end
--[[
This method returns pagesize if bbox is corrupted
--]]
function Document:getUsedBBoxDimensions(pageno, zoom, rotation)
local bbox = self:getPageBBox(pageno)
-- clipping page bbox
if bbox.x0 < 0 then bbox.x0 = 0 end
if bbox.y0 < 0 then bbox.y0 = 0 end
if bbox.x1 and bbox.x1 < 0 then bbox.x1 = 0 end
if bbox.y1 and bbox.y1 < 0 then bbox.y1 = 0 end
local ubbox_dimen
if (not bbox.x1 or bbox.x0 >= bbox.x1) or (not bbox.y1 or bbox.y0 >= bbox.y1) then
-- if document's bbox info is corrupted, we use the page size
ubbox_dimen = self:getPageDimensions(pageno, zoom, rotation)
else
ubbox_dimen = Geom:new{
x = bbox.x0,
y = bbox.y0,
w = bbox.x1 - bbox.x0,
h = bbox.y1 - bbox.y0,
}
--- @note: Should we round this regardless of zoom?
if zoom ~= 1 then
ubbox_dimen:transformByScale(zoom)
end
end
return ubbox_dimen
end
function Document:getToc()
return self._document:getToc()
end
function Document:canHaveAlternativeToc()
return false
end
function Document:isTocAlternativeToc()
return false
end
function Document:getPageLinks(pageno)
return nil
end
function Document:getLinkFromPosition(pageno, pos)
return nil
end
function Document:getImageFromPosition(pos)
return nil
end
function Document:getTextFromPositions(pos0, pos1)
return nil
end
function Document:getTextBoxes(pageno)
return nil
end
function Document:getOCRWord(pageno, rect)
return nil
end
function Document:getCoverPageImage()
return nil
end
function Document:findText()
return nil
end
function Document:findAllText()
return nil
end
function Document:updateColorRendering()
if self.is_color_capable and CanvasContext.is_color_rendering_enabled then
self.render_color = true
else
self.render_color = false
end
end
function Document:getTileCacheValidity()
return self.tile_cache_validity_ts
end
function Document:setTileCacheValidity(ts)
self.tile_cache_validity_ts = ts
end
function Document:resetTileCacheValidity()
self.tile_cache_validity_ts = os.time()
end
function Document:getFullPageHash(pageno, zoom, rotation, gamma)
return "renderpg|"..self.file.."|"..self.mod_time.."|"..pageno.."|"
..zoom.."|"
..rotation.."|"..gamma.."|"..self.render_mode..(self.render_color and "|color" or "|bw")
..(self.reflowable_font_size and "|"..self.reflowable_font_size or "")
end
function Document:getPagePartHash(pageno, zoom, rotation, gamma, rect)
return "renderpgpart|"..self.file.."|"..self.mod_time.."|"..pageno.."|"
..tostring(rect).."|"..zoom.."|"..tostring(rect.scaled_rect).."|"
..rotation.."|"..gamma.."|"..self.render_mode..(self.render_color and "|color" or "|bw")
..(self.reflowable_font_size and "|"..self.reflowable_font_size or "")
end
function Document:renderPage(pageno, rect, zoom, rotation, gamma, hinting)
-- If rect contains a nested scaled_rect object, our caller handled scaling itself (e.g., drawPagePart)
local is_prescaled = rect and rect.scaled_rect ~= nil or false
local hash, hash_excerpt, tile
if is_prescaled then
hash = self:getPagePartHash(pageno, zoom, rotation, gamma, rect)
tile = DocCache:check(hash, TileCacheItem)
else
hash = self:getFullPageHash(pageno, zoom, rotation, gamma)
tile = DocCache:check(hash, TileCacheItem)
-- In the is_prescaled branch above, we're *already* only rendering part of the page
if not tile and rect then
hash_excerpt = hash.."|"..tostring(rect)
tile = DocCache:check(hash_excerpt)
end
end
if tile then
if self.tile_cache_validity_ts then
if tile.created_ts and tile.created_ts >= self.tile_cache_validity_ts then
return tile
end
logger.dbg("discarding stale cached tile")
else
return tile
end
end
if hinting then
CanvasContext:enableCPUCores(2)
end
local page_size = self:getPageDimensions(pageno, zoom, rotation)
-- This will be the actual render size (i.e., the BB's dimensions)
local size
if is_prescaled then
-- Our caller already handled the scaling, honor it.
-- And we don't particulalry care whether DocCache will be able to cache it in RAM, so no need to double-check.
size = rect.scaled_rect
else
-- We prefer to render the full page, if it fits into cache...
size = page_size
if not DocCache:willAccept(size.w * size.h * (self.render_color and 4 or 1) + 512) then
-- ...and if it doesn't...
logger.dbg("Attempting to render only part of the page:", rect)
--- @todo figure out how to better segment the page
if not rect then
logger.warn("No render region was specified, we won't render the page at all!")
-- no rect specified, abort
if hinting then
CanvasContext:enableCPUCores(1)
end
return
end
-- ...only render the requested rect
hash = hash_excerpt
size = rect
end
end
-- Prepare our BB, and wrap it in a cache item for DocCache
tile = TileCacheItem:new{
persistent = not is_prescaled, -- we don't want to dump page fragments to disk (unnecessary, and it would confuse DocCache's heuristics)
doc_path = self.file,
created_ts = os.time(),
excerpt = size,
pageno = pageno,
bb = Blitbuffer.new(size.w, size.h, self.render_color and self.color_bb_type or nil)
}
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
-- We need a draw context
local dc = DrawContext.new()
dc:setRotate(rotation)
-- Make the context match the rotation,
-- by pointing at the rotated origin via coordinates offsets.
-- NOTE: We rotate our *Screen* bb on rotation (SetRotationMode), not the document,
-- so we hardly ever exercize this codepath...
-- AFAICT, the only thing that will *ever* (attempt to) rotate the document is ReaderRotation's key bindings (RotationUpdate).
--- @fixme: And whaddayano, it's broken ;). The aptly named key binds in question are J/K, I shit you not.
if rotation == 90 then
dc:setOffset(page_size.w, 0)
elseif rotation == 180 then
dc:setOffset(page_size.w, page_size.h)
elseif rotation == 270 then
dc:setOffset(0, page_size.h)
end
dc:setZoom(zoom)
if gamma ~= self.GAMMA_NO_GAMMA then
dc:setGamma(gamma)
end
-- And finally, render the page in our BB
local page = self._document:openPage(pageno)
page:draw(dc, tile.bb, size.x, size.y, self.render_mode)
page:close()
DocCache:insert(hash, tile)
if hinting then
CanvasContext:enableCPUCores(1)
end
return tile
end
-- a hint for the cache engine to paint a full page to the cache
--- @todo this should trigger a background operation
function Document:hintPage(pageno, zoom, rotation, gamma)
logger.dbg("hinting page", pageno)
self:renderPage(pageno, nil, zoom, rotation, gamma, true)
end
--[[
Draw page content to blitbuffer.
1. find tile in cache
2. if not found, call renderPage
@target: target blitbuffer
@rect: visible_area inside document page
--]]
function Document:drawPage(target, x, y, rect, pageno, zoom, rotation, gamma)
local tile = self:renderPage(pageno, rect, zoom, rotation, gamma)
-- Enable SW dithering if requested (only available in koptoptions)
if self.sw_dithering then
target:ditherblitFrom(tile.bb,
x, y,
rect.x - tile.excerpt.x,
rect.y - tile.excerpt.y,
rect.w, rect.h)
else
target:blitFrom(tile.bb,
x, y,
rect.x - tile.excerpt.x,
rect.y - tile.excerpt.y,
rect.w, rect.h)
end
end
function Document:getDrawnImagesStatistics()
-- For now, only set by CreDocument in CreDocument:drawCurrentView()
-- Returns 0, 0 (as per Document:init) otherwise.
return self._drawn_images_count, self._drawn_images_surface_ratio
end
function Document:drawPagePart(pageno, native_rect, rotation)
-- native_rect is straight from base, so not a Geom
local rect = Geom:new(native_rect)
local canvas_size = CanvasContext:getSize()
-- Compute a zoom in order to scale to best fit, so that ImageViewer doesn't have to rescale further.
-- Optionally, based on ImageViewer settings, we'll auto-rotate for the best resolution.
local rotate = false
if G_reader_settings:isTrue("imageviewer_rotate_auto_for_best_fit") then
rotate = (canvas_size.w > canvas_size.h) ~= (rect.w > rect.h)
end
local zoom = rotate and math.min(canvas_size.w / rect.h, canvas_size.h / rect.w) or math.min(canvas_size.w / rect.w, canvas_size.h / rect.h)
local scaled_rect = self:transformRect(rect, zoom, rotation)
-- Stuff it inside rect so renderPage knows we're handling scaling ourselves
rect.scaled_rect = scaled_rect
-- Enable SMP via the hinting flag
local tile = self:renderPage(pageno, rect, zoom, rotation, 1.0, true)
return tile.bb, rotate
end
function Document:getPageText(pageno)
-- is this worth caching? not done yet.
local page = self._document:openPage(pageno)
local text = page:getPageText()
page:close()
return text
end
function Document:saveHighlight(pageno, item)
return nil
end
function Document:deleteHighlight(pageno, item)
return nil
end
function Document:updateHighlightContents(pageno, item, contents)
return nil
end
--[[
helper functions
--]]
function Document:logMemoryUsage(pageno)
local status_file = io.open("/proc/self/status", "r")
local log_file = io.open("mem_usage_log.txt", "a+")
local data = -1
if status_file then
for line in status_file:lines() do
local s, n
s, n = line:gsub("VmData:%s-(%d+) kB", "%1")
if n ~= 0 then data = tonumber(s) end
if data ~= -1 then break end
end
status_file:close()
end
if log_file then
if log_file:seek("end") == 0 then -- write the header only once
log_file:write("PAGE\tMEM\n")
end
log_file:write(string.format("%s\t%s\n", pageno, data))
log_file:close()
end
end
return Document