mirror of
https://github.com/koreader/koreader
synced 2024-11-18 03:25:46 +00:00
602 lines
18 KiB
Lua
602 lines
18 KiB
Lua
local Blitbuffer = require("ffi/blitbuffer")
|
|
local CacheItem = require("cacheitem")
|
|
local Configurable = require("configurable")
|
|
local DocCache = require("document/doccache")
|
|
local DrawContext = require("ffi/drawcontext")
|
|
local CanvasContext = require("document/canvascontext")
|
|
local Geom = require("ui/geometry")
|
|
local Math = require("optmath")
|
|
local TileCacheItem = require("document/tilecacheitem")
|
|
local lfs = require("libs/libkoreader-lfs")
|
|
local logger = require("logger")
|
|
|
|
--[[
|
|
This is an abstract interface to a document
|
|
]]--
|
|
local Document = {
|
|
-- file name
|
|
file = nil,
|
|
-- engine instance
|
|
_document = nil,
|
|
|
|
links = nil, -- table
|
|
|
|
GAMMA_NO_GAMMA = 1.0,
|
|
|
|
-- override bbox from orignal page's getUsedBBox
|
|
bbox = nil, -- table
|
|
|
|
-- flag to show whether the document was opened successfully
|
|
is_open = false,
|
|
|
|
-- flag to show that the document needs to be unlocked by a password
|
|
is_locked = false,
|
|
|
|
-- flag to show that the document is edited and needs to write back to disk
|
|
is_edited = false,
|
|
|
|
-- whether this document can be rendered in color
|
|
is_color_capable = true,
|
|
-- bb type needed by engine for color rendering
|
|
color_bb_type = Blitbuffer.TYPE_BBRGB32,
|
|
|
|
-- image content stats, if supported by the engine
|
|
_drawn_images_count = nil,
|
|
_drawn_images_surface_ratio = nil,
|
|
}
|
|
|
|
function Document:extend(subclass_prototype)
|
|
local o = subclass_prototype or {}
|
|
setmetatable(o, self)
|
|
self.__index = self
|
|
return o
|
|
end
|
|
|
|
function Document:new(o)
|
|
o = self:extend(o)
|
|
if o._init then o:_init() end
|
|
if o.init then o:init() end
|
|
return o
|
|
end
|
|
|
|
-- base document initialization should be called on each document init
|
|
function Document:_init()
|
|
self.links = {}
|
|
self.bbox = {}
|
|
self.configurable = Configurable:new{}
|
|
self.info = {
|
|
-- whether the document is pageable
|
|
has_pages = false,
|
|
-- whether words can be provided
|
|
has_words = false,
|
|
-- whether hyperlinks can be provided
|
|
has_hyperlinks = false,
|
|
-- whether (native to format) annotations can be provided
|
|
has_annotations = false,
|
|
|
|
-- whether pages can be rotated
|
|
is_rotatable = false,
|
|
|
|
number_of_pages = 0,
|
|
-- if not pageable, length of the document in pixels
|
|
doc_height = 0,
|
|
|
|
-- other metadata
|
|
title = "",
|
|
author = "",
|
|
date = ""
|
|
}
|
|
|
|
-- Should be updated by a call to Document.updateColorRendering(self) in subclasses
|
|
self.render_color = false
|
|
|
|
-- Those may be updated via KOptOptions, or the DitheringUpdate event.
|
|
-- Whether HW dithering is enabled
|
|
self.hw_dithering = false
|
|
-- Whether SW dithering is enabled
|
|
self.sw_dithering = false
|
|
|
|
-- Zero-init those to be able to drop the nil guards at runtime
|
|
self._drawn_images_count = 0
|
|
self._drawn_images_surface_ratio = 0
|
|
end
|
|
|
|
-- override this method to open a document
|
|
function Document:init()
|
|
end
|
|
|
|
-- this might be overridden by a document implementation
|
|
function Document:unlock(password)
|
|
-- return true instead when the password provided unlocked the document
|
|
return false
|
|
end
|
|
|
|
-- this might be overridden by a document implementation
|
|
-- (in which case, do make sure it calls this one, too, to avoid refcounting mismatches in DocumentRegistry!)
|
|
-- Returns true if the Document instance needs to be destroyed (no more live refs),
|
|
-- false if not (i.e., we've just decreased the refcount, so, leave internal engine data alone).
|
|
-- nil if all hell broke loose.
|
|
function Document:close()
|
|
local DocumentRegistry = require("document/documentregistry")
|
|
if self.is_open then
|
|
local refcount = DocumentRegistry:closeDocument(self.file)
|
|
if refcount == 0 then
|
|
self.is_open = false
|
|
self._document:close()
|
|
self._document = nil
|
|
|
|
-- NOTE: DocumentRegistry:openDocument will force a GC sweep the next time we open a Document.
|
|
-- MµPDF will also do a bit of spring cleaning of its internal cache when opening a *different* document.
|
|
return true
|
|
else
|
|
-- This can happen in perfectly sane contexts (i.e., Reader -> History -> View fullsize cover on the *same* book).
|
|
logger.dbg("Document: Decreased refcount to", refcount, "for", self.file)
|
|
return false
|
|
end
|
|
else
|
|
logger.warn("Tried to close an already closed document:", self.file)
|
|
return nil
|
|
end
|
|
end
|
|
|
|
-- check if document is edited and needs to write to disk
|
|
function Document:isEdited()
|
|
return self.is_edited
|
|
end
|
|
|
|
-- discard change will set is_edited flag to false and implematation of Document
|
|
-- should check the is_edited flag before writing document
|
|
function Document:discardChange()
|
|
self.is_edited = false
|
|
end
|
|
|
|
-- this might be overridden by a document implementation
|
|
function Document:getNativePageDimensions(pageno)
|
|
local hash = "pgdim|"..self.file.."|"..self.mod_time.."|"..pageno
|
|
local cached = DocCache:check(hash)
|
|
if cached then
|
|
return cached[1]
|
|
end
|
|
local page = self._document:openPage(pageno)
|
|
local page_size_w, page_size_h = page:getSize(self.dc_null)
|
|
local page_size = Geom:new{ w = page_size_w, h = page_size_h }
|
|
DocCache:insert(hash, CacheItem:new{ page_size })
|
|
page:close()
|
|
return page_size
|
|
end
|
|
|
|
function Document:getDocumentProps()
|
|
-- pdfdocument, djvudocument
|
|
return self._document:getMetadata()
|
|
-- credocument, picdocument - overridden by a document implementation
|
|
end
|
|
|
|
function Document:getProps(cached_doc_metadata)
|
|
local function makeNilIfEmpty(str)
|
|
if str == "" then
|
|
return nil
|
|
end
|
|
return str
|
|
end
|
|
local props = cached_doc_metadata or self:getDocumentProps()
|
|
local title = makeNilIfEmpty(props.title or props.Title)
|
|
local authors = makeNilIfEmpty(props.authors or props.author or props.Author)
|
|
local series = makeNilIfEmpty(props.series or props.Series)
|
|
local series_index
|
|
if series and string.find(series, "#") then
|
|
-- If there's a series index in there, split it off to series_index, and only store the name in series.
|
|
-- This property is currently only set by:
|
|
-- * DjVu, for which I couldn't find a real standard for metadata fields
|
|
-- (we currently use Series for this field, c.f., https://exiftool.org/TagNames/DjVu.html).
|
|
-- * CRe, which could offer us a split getSeriesName & getSeriesNumber...
|
|
-- except getSeriesNumber does an atoi, so it'd murder decimal values.
|
|
-- So, instead, parse how it formats the whole thing as a string ;).
|
|
local series_name
|
|
series_name, series_index = series:match("(.*) #(%d+%.?%d-)$")
|
|
if series_index then
|
|
series = series_name
|
|
series_index = tonumber(series_index)
|
|
end
|
|
end
|
|
local language = makeNilIfEmpty(props.language or props.Language)
|
|
local keywords = makeNilIfEmpty(props.keywords or props.Keywords)
|
|
local description = makeNilIfEmpty(props.description or props.Description or props.subject)
|
|
return {
|
|
title = title,
|
|
authors = authors,
|
|
series = series,
|
|
series_index = series_index,
|
|
language = language,
|
|
keywords = keywords,
|
|
description = description,
|
|
}
|
|
end
|
|
|
|
function Document:_readMetadata()
|
|
self.mod_time = lfs.attributes(self.file, "modification")
|
|
self.info.number_of_pages = self._document:getPages()
|
|
return true
|
|
end
|
|
|
|
function Document:getPageCount()
|
|
return self.info.number_of_pages
|
|
end
|
|
|
|
-- Some functions that look quite silly, but they can be
|
|
-- overridden for document types that support separate flows
|
|
-- (e.g. CreDocument)
|
|
function Document:hasNonLinearFlows()
|
|
return false
|
|
end
|
|
|
|
function Document:hasHiddenFlows()
|
|
return false
|
|
end
|
|
|
|
function Document:getNextPage(page)
|
|
local new_page = page + 1
|
|
return (new_page > 0 and new_page <= self.info.number_of_pages) and new_page or 0
|
|
end
|
|
|
|
function Document:getPrevPage(page)
|
|
if page == 0 then return self.info.number_of_pages end
|
|
local new_page = page - 1
|
|
return (new_page > 0 and new_page <= self.info.number_of_pages) and new_page or 0
|
|
end
|
|
|
|
function Document:getTotalPagesLeft(page)
|
|
return self.info.number_of_pages - page
|
|
end
|
|
|
|
function Document:getPageFlow(page)
|
|
return 0
|
|
end
|
|
|
|
function Document:getFirstPageInFlow(flow)
|
|
return 1
|
|
end
|
|
|
|
function Document:getTotalPagesInFlow(flow)
|
|
return self.info.number_of_pages
|
|
end
|
|
|
|
function Document:getPageNumberInFlow(page)
|
|
return page
|
|
end
|
|
|
|
-- calculates page dimensions
|
|
function Document:getPageDimensions(pageno, zoom, rotation)
|
|
local native_dimen = self:getNativePageDimensions(pageno):copy()
|
|
if rotation == 90 or rotation == 270 then
|
|
-- switch orientation
|
|
native_dimen.w, native_dimen.h = native_dimen.h, native_dimen.w
|
|
end
|
|
-- Apply the zoom factor, and round to integer in a sensible manner
|
|
native_dimen:transformByScale(zoom)
|
|
return native_dimen
|
|
end
|
|
|
|
function Document:getPageBBox(pageno)
|
|
local bbox = self.bbox[pageno] -- exact
|
|
if bbox ~= nil then
|
|
return bbox
|
|
else
|
|
local oddEven = Math.oddEven(pageno)
|
|
bbox = self.bbox[oddEven] -- odd/even
|
|
end
|
|
if bbox ~= nil then -- last used up to this page
|
|
return bbox
|
|
else
|
|
for i = 0,pageno do
|
|
bbox = self.bbox[ pageno - i ]
|
|
if bbox ~= nil then
|
|
return bbox
|
|
end
|
|
end
|
|
end
|
|
if bbox == nil then -- fallback bbox
|
|
bbox = self:getUsedBBox(pageno)
|
|
end
|
|
return bbox
|
|
end
|
|
|
|
--[[
|
|
This method returns pagesize if bbox is corrupted
|
|
--]]
|
|
function Document:getUsedBBoxDimensions(pageno, zoom, rotation)
|
|
local bbox = self:getPageBBox(pageno)
|
|
-- clipping page bbox
|
|
if bbox.x0 < 0 then bbox.x0 = 0 end
|
|
if bbox.y0 < 0 then bbox.y0 = 0 end
|
|
if bbox.x1 and bbox.x1 < 0 then bbox.x1 = 0 end
|
|
if bbox.y1 and bbox.y1 < 0 then bbox.y1 = 0 end
|
|
local ubbox_dimen
|
|
if (not bbox.x1 or bbox.x0 >= bbox.x1) or (not bbox.y1 or bbox.y0 >= bbox.y1) then
|
|
-- if document's bbox info is corrupted, we use the page size
|
|
ubbox_dimen = self:getPageDimensions(pageno, zoom, rotation)
|
|
else
|
|
ubbox_dimen = Geom:new{
|
|
x = bbox.x0,
|
|
y = bbox.y0,
|
|
w = bbox.x1 - bbox.x0,
|
|
h = bbox.y1 - bbox.y0,
|
|
}
|
|
--- @note: Should we round this regardless of zoom?
|
|
if zoom ~= 1 then
|
|
ubbox_dimen:transformByScale(zoom)
|
|
end
|
|
end
|
|
return ubbox_dimen
|
|
end
|
|
|
|
function Document:getToc()
|
|
return self._document:getToc()
|
|
end
|
|
|
|
function Document:canHaveAlternativeToc()
|
|
return false
|
|
end
|
|
|
|
function Document:isTocAlternativeToc()
|
|
return false
|
|
end
|
|
|
|
function Document:getPageLinks(pageno)
|
|
return nil
|
|
end
|
|
|
|
function Document:getLinkFromPosition(pageno, pos)
|
|
return nil
|
|
end
|
|
|
|
function Document:getImageFromPosition(pos)
|
|
return nil
|
|
end
|
|
|
|
function Document:getTextFromPositions(pos0, pos1)
|
|
return nil
|
|
end
|
|
|
|
function Document:getTextBoxes(pageno)
|
|
return nil
|
|
end
|
|
|
|
function Document:getOCRWord(pageno, rect)
|
|
return nil
|
|
end
|
|
|
|
function Document:getCoverPageImage()
|
|
return nil
|
|
end
|
|
|
|
function Document:findText()
|
|
return nil
|
|
end
|
|
|
|
function Document:findAllText()
|
|
return nil
|
|
end
|
|
|
|
function Document:updateColorRendering()
|
|
if self.is_color_capable and CanvasContext.is_color_rendering_enabled then
|
|
self.render_color = true
|
|
else
|
|
self.render_color = false
|
|
end
|
|
end
|
|
|
|
function Document:preRenderPage()
|
|
return nil
|
|
end
|
|
|
|
function Document:postRenderPage()
|
|
return nil
|
|
end
|
|
|
|
function Document:getTileCacheValidity()
|
|
return self.tile_cache_validity_ts
|
|
end
|
|
|
|
function Document:setTileCacheValidity(ts)
|
|
self.tile_cache_validity_ts = ts
|
|
end
|
|
|
|
function Document:resetTileCacheValidity()
|
|
self.tile_cache_validity_ts = os.time()
|
|
end
|
|
|
|
function Document:getFullPageHash(pageno, zoom, rotation, gamma, render_mode, color)
|
|
return "renderpg|"..self.file.."|"..self.mod_time.."|"..pageno.."|"
|
|
..zoom.."|"..rotation.."|"..gamma.."|"..render_mode..(color and "|color" or "")
|
|
..(self.reflowable_font_size and "|"..self.reflowable_font_size or "")
|
|
end
|
|
|
|
function Document:renderPage(pageno, rect, zoom, rotation, gamma, render_mode, hinting)
|
|
local hash_excerpt
|
|
local hash = self:getFullPageHash(pageno, zoom, rotation, gamma, render_mode, self.render_color)
|
|
local tile = DocCache:check(hash, TileCacheItem)
|
|
if not tile then
|
|
hash_excerpt = hash.."|"..tostring(rect)
|
|
tile = DocCache:check(hash_excerpt)
|
|
end
|
|
if tile then
|
|
if self.tile_cache_validity_ts then
|
|
if tile.created_ts and tile.created_ts >= self.tile_cache_validity_ts then
|
|
return tile
|
|
end
|
|
logger.dbg("discarding stale cached tile")
|
|
else
|
|
return tile
|
|
end
|
|
end
|
|
|
|
if hinting then
|
|
CanvasContext:enableCPUCores(2)
|
|
end
|
|
self:preRenderPage()
|
|
|
|
local page_size = self:getPageDimensions(pageno, zoom, rotation)
|
|
-- this will be the size we actually render
|
|
local size = page_size
|
|
-- we prefer to render the full page, if it fits into cache
|
|
if not DocCache:willAccept(size.w * size.h * (self.render_color and 4 or 1) + 512) then
|
|
-- whole page won't fit into cache
|
|
logger.dbg("rendering only part of the page")
|
|
--- @todo figure out how to better segment the page
|
|
if not rect then
|
|
logger.warn("aborting, since we do not have a specification for that part")
|
|
-- required part not given, so abort
|
|
if hinting then
|
|
CanvasContext:enableCPUCores(1)
|
|
end
|
|
return
|
|
end
|
|
-- only render required part
|
|
hash = hash_excerpt
|
|
size = rect
|
|
end
|
|
|
|
-- prepare cache item with contained blitbuffer
|
|
tile = TileCacheItem:new{
|
|
persistent = true,
|
|
doc_path = self.file,
|
|
created_ts = os.time(),
|
|
excerpt = size,
|
|
pageno = pageno,
|
|
bb = Blitbuffer.new(size.w, size.h, self.render_color and self.color_bb_type or nil)
|
|
}
|
|
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
|
|
|
|
-- create a draw context
|
|
local dc = DrawContext.new()
|
|
|
|
dc:setRotate(rotation)
|
|
-- correction of rotation
|
|
if rotation == 90 then
|
|
dc:setOffset(page_size.w, 0)
|
|
elseif rotation == 180 then
|
|
dc:setOffset(page_size.w, page_size.h)
|
|
elseif rotation == 270 then
|
|
dc:setOffset(0, page_size.h)
|
|
end
|
|
dc:setZoom(zoom)
|
|
|
|
if gamma ~= self.GAMMA_NO_GAMMA then
|
|
dc:setGamma(gamma)
|
|
end
|
|
|
|
-- render
|
|
local page = self._document:openPage(pageno)
|
|
page:draw(dc, tile.bb, size.x, size.y, render_mode)
|
|
page:close()
|
|
DocCache:insert(hash, tile)
|
|
|
|
self:postRenderPage()
|
|
if hinting then
|
|
CanvasContext:enableCPUCores(1)
|
|
end
|
|
return tile
|
|
end
|
|
|
|
-- a hint for the cache engine to paint a full page to the cache
|
|
--- @todo this should trigger a background operation
|
|
function Document:hintPage(pageno, zoom, rotation, gamma, render_mode)
|
|
logger.dbg("hinting page", pageno)
|
|
self:renderPage(pageno, nil, zoom, rotation, gamma, render_mode, true)
|
|
end
|
|
|
|
--[[
|
|
Draw page content to blitbuffer.
|
|
1. find tile in cache
|
|
2. if not found, call renderPage
|
|
|
|
@target: target blitbuffer
|
|
@rect: visible_area inside document page
|
|
--]]
|
|
function Document:drawPage(target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
|
local tile = self:renderPage(pageno, rect, zoom, rotation, gamma, render_mode)
|
|
-- Enable SW dithering if requested (only available in koptoptions)
|
|
if self.sw_dithering then
|
|
target:ditherblitFrom(tile.bb,
|
|
x, y,
|
|
rect.x - tile.excerpt.x,
|
|
rect.y - tile.excerpt.y,
|
|
rect.w, rect.h)
|
|
else
|
|
target:blitFrom(tile.bb,
|
|
x, y,
|
|
rect.x - tile.excerpt.x,
|
|
rect.y - tile.excerpt.y,
|
|
rect.w, rect.h)
|
|
end
|
|
end
|
|
|
|
function Document:getDrawnImagesStatistics()
|
|
-- For now, only set by CreDocument in CreDocument:drawCurrentView()
|
|
-- Returns 0, 0 (as per Document:init) otherwise.
|
|
return self._drawn_images_count, self._drawn_images_surface_ratio
|
|
end
|
|
|
|
function Document:getPagePart(pageno, rect, rotation)
|
|
local canvas_size = CanvasContext:getSize()
|
|
local zoom = math.min(canvas_size.w*2 / rect.w, canvas_size.h*2 / rect.h)
|
|
-- it's really, really important to do math.floor, otherwise we get image projection
|
|
local scaled_rect = {
|
|
x = math.floor(rect.x * zoom),
|
|
y = math.floor(rect.y * zoom),
|
|
w = math.floor(rect.w * zoom),
|
|
h = math.floor(rect.h * zoom),
|
|
}
|
|
local tile = self:renderPage(pageno, scaled_rect, zoom, rotation, 1, 0)
|
|
local target = Blitbuffer.new(scaled_rect.w, scaled_rect.h, self.render_color and self.color_bb_type or nil)
|
|
target:blitFrom(tile.bb, 0, 0, scaled_rect.x, scaled_rect.y, scaled_rect.w, scaled_rect.h)
|
|
return target
|
|
end
|
|
|
|
function Document:getPageText(pageno)
|
|
-- is this worth caching? not done yet.
|
|
local page = self._document:openPage(pageno)
|
|
local text = page:getPageText()
|
|
page:close()
|
|
return text
|
|
end
|
|
|
|
function Document:saveHighlight(pageno, item)
|
|
return nil
|
|
end
|
|
|
|
function Document:deleteHighlight(pageno, item)
|
|
return nil
|
|
end
|
|
|
|
function Document:updateHighlightContents(pageno, item, contents)
|
|
return nil
|
|
end
|
|
|
|
--[[
|
|
helper functions
|
|
--]]
|
|
function Document:logMemoryUsage(pageno)
|
|
local status_file = io.open("/proc/self/status", "r")
|
|
local log_file = io.open("mem_usage_log.txt", "a+")
|
|
local data = -1
|
|
if status_file then
|
|
for line in status_file:lines() do
|
|
local s, n
|
|
s, n = line:gsub("VmData:%s-(%d+) kB", "%1")
|
|
if n ~= 0 then data = tonumber(s) end
|
|
if data ~= -1 then break end
|
|
end
|
|
status_file:close()
|
|
end
|
|
if log_file then
|
|
if log_file:seek("end") == 0 then -- write the header only once
|
|
log_file:write("PAGE\tMEM\n")
|
|
end
|
|
log_file:write(string.format("%s\t%s\n", pageno, data))
|
|
log_file:close()
|
|
end
|
|
end
|
|
|
|
return Document
|