2
0
mirror of https://github.com/koreader/koreader synced 2024-11-10 01:10:34 +00:00
koreader/frontend/document/pdfdocument.lua

406 lines
15 KiB
Lua
Raw Normal View History

2024-08-30 20:10:43 +00:00
local BlitBuffer = require("ffi/blitbuffer")
2013-10-18 20:38:07 +00:00
local CacheItem = require("cacheitem")
local CanvasContext = require("document/canvascontext")
local DocCache = require("document/doccache")
local DocSettings = require("docsettings")
2013-10-18 20:38:07 +00:00
local Document = require("document/document")
2013-12-31 05:12:56 +00:00
local DrawContext = require("ffi/drawcontext")
local logger = require("logger")
local util = require("util")
local ffi = require("ffi")
local C = ffi.C
local pdf = nil
2013-10-18 20:38:07 +00:00
Clarify our OOP semantics across the codebase (#9586) Basically: * Use `extend` for class definitions * Use `new` for object instantiations That includes some minor code cleanups along the way: * Updated `Widget`'s docs to make the semantics clearer. * Removed `should_restrict_JIT` (it's been dead code since https://github.com/koreader/android-luajit-launcher/pull/283) * Minor refactoring of LuaSettings/LuaData/LuaDefaults/DocSettings to behave (mostly, they are instantiated via `open` instead of `new`) like everything else and handle inheritance properly (i.e., DocSettings is now a proper LuaSettings subclass). * Default to `WidgetContainer` instead of `InputContainer` for stuff that doesn't actually setup key/gesture events. * Ditto for explicit `*Listener` only classes, make sure they're based on `EventListener` instead of something uselessly fancier. * Unless absolutely necessary, do not store references in class objects, ever; only values. Instead, always store references in instances, to avoid both sneaky inheritance issues, and sneaky GC pinning of stale references. * ReaderUI: Fix one such issue with its `active_widgets` array, with critical implications, as it essentially pinned *all* of ReaderUI's modules, including their reference to the `Document` instance (i.e., that was a big-ass leak). * Terminal: Make sure the shell is killed on plugin teardown. * InputText: Fix Home/End/Del physical keys to behave sensibly. * InputContainer/WidgetContainer: If necessary, compute self.dimen at paintTo time (previously, only InputContainers did, which might have had something to do with random widgets unconcerned about input using it as a baseclass instead of WidgetContainer...). * OverlapGroup: Compute self.dimen at *init* time, because for some reason it needs to do that, but do it directly in OverlapGroup instead of going through a weird WidgetContainer method that it was the sole user of. * ReaderCropping: Under no circumstances should a Document instance member (here, self.bbox) risk being `nil`ed! * Kobo: Minor code cleanups.
2022-10-06 00:14:48 +00:00
local PdfDocument = Document:extend{
2014-03-13 13:52:43 +00:00
_document = false,
is_pdf = true,
2014-03-13 13:52:43 +00:00
dc_null = DrawContext.new(),
koptinterface = nil,
provider = "mupdf",
provider_name = "MuPDF",
}
function PdfDocument:init()
if not pdf then pdf = require("ffi/mupdf") end
2014-03-13 13:52:43 +00:00
self.koptinterface = require("document/koptinterface")
self.koptinterface:setDefaultConfigurable(self.configurable)
2014-03-13 13:52:43 +00:00
local ok
ok, self._document = pcall(pdf.openDocument, self.file)
2014-03-13 13:52:43 +00:00
if not ok then
error(self._document) -- will contain error message
2014-03-13 13:52:43 +00:00
end
self:updateColorRendering()
self.is_reflowable = self._document:isDocumentReflowable()
self.reflowable_font_size = self:convertKoptToReflowableFontSize()
-- no-op on PDF
self:layoutDocument()
2014-03-13 13:52:43 +00:00
self.is_open = true
self.info.has_pages = true
self.info.configurable = true
2024-07-14 20:00:32 +00:00
self.render_mode = 0
2014-03-13 13:52:43 +00:00
if self._document:needsPassword() then
self.is_locked = true
else
self:_readMetadata()
end
end
function PdfDocument:updateColorRendering()
Document.updateColorRendering(self) -- will set self.render_color
if self._document then
self._document:setColorRendering(self.render_color)
end
end
function PdfDocument:layoutDocument(font_size)
if font_size then
self.reflowable_font_size = font_size
end
self._document:layoutDocument(
CanvasContext:getWidth(),
CanvasContext:getHeight(),
CanvasContext:scaleBySize(self.reflowable_font_size))
end
local default_font_size = 22
-- the koptreader config goes from 0.1 to 3.0, but we want a regular font size
function PdfDocument:convertKoptToReflowableFontSize(font_size)
if font_size then
return font_size * default_font_size
end
local size
if DocSettings:hasSidecarFile(self.file) then
local doc_settings = DocSettings:open(self.file)
size = doc_settings:readSetting("kopt_font_size")
end
if size then
return size * default_font_size
elseif G_reader_settings:readSetting("kopt_font_size") then
return G_reader_settings:readSetting("kopt_font_size") * default_font_size
elseif G_defaults:readSetting("DKOPTREADER_CONFIG_FONT_SIZE") then
return G_defaults:readSetting("DKOPTREADER_CONFIG_FONT_SIZE") * default_font_size
else
return default_font_size
end
end
function PdfDocument:unlock(password)
2014-03-13 13:52:43 +00:00
if not self._document:authenticatePassword(password) then
return false
2014-03-13 13:52:43 +00:00
end
self.is_locked = false
self:_readMetadata()
return true
end
function PdfDocument:comparePositions(pos1, pos2)
return self.koptinterface:comparePositions(self, pos1, pos2)
end
function PdfDocument:getPageTextBoxes(pageno)
local hash = "textbox|"..self.file.."|"..pageno
local cached = DocCache:check(hash)
if not cached then
local page = self._document:openPage(pageno)
local text = page:getPageText()
page:close()
DocCache:insert(hash, CacheItem:new{text=text, size=text.size})
return text
else
return cached.text
end
end
function PdfDocument:getPanelFromPage(pageno, pos)
return self.koptinterface:getPanelFromPage(self, pageno, pos)
end
function PdfDocument:getWordFromPosition(spos)
2014-03-13 13:52:43 +00:00
return self.koptinterface:getWordFromPosition(self, spos)
end
function PdfDocument:getTextFromPositions(spos0, spos1)
2014-03-13 13:52:43 +00:00
return self.koptinterface:getTextFromPositions(self, spos0, spos1)
end
function PdfDocument:getTextBoxes(pageno)
return self.koptinterface:getTextBoxes(self, pageno)
end
function PdfDocument:getPageBoxesFromPositions(pageno, ppos0, ppos1)
2014-03-13 13:52:43 +00:00
return self.koptinterface:getPageBoxesFromPositions(self, pageno, ppos0, ppos1)
end
function PdfDocument:nativeToPageRectTransform(pageno, rect)
return self.koptinterface:nativeToPageRectTransform(self, pageno, rect)
end
function PdfDocument:getSelectedWordContext(word, nb_words, pos)
return self.koptinterface:getSelectedWordContext(word, nb_words, pos)
end
function PdfDocument:getOCRWord(pageno, wbox)
2014-03-13 13:52:43 +00:00
return self.koptinterface:getOCRWord(self, pageno, wbox)
end
function PdfDocument:getOCRText(pageno, tboxes)
2014-03-13 13:52:43 +00:00
return self.koptinterface:getOCRText(self, pageno, tboxes)
end
function PdfDocument:getPageBlock(pageno, x, y)
return self.koptinterface:getPageBlock(self, pageno, x, y)
end
function PdfDocument:getUsedBBox(pageno)
local hash = "pgubbox|"..self.file.."|"..self.reflowable_font_size.."|"..pageno
local cached = DocCache:check(hash)
2014-03-13 13:52:43 +00:00
if cached then
return cached.ubbox
end
local page = self._document:openPage(pageno)
local used = {}
used.x0, used.y0, used.x1, used.y1 = page:getUsedBBox()
local pwidth, pheight = page:getSize(self.dc_null)
-- clamp to page BBox
if used.x0 < 0 then used.x0 = 0 end
if used.x1 > pwidth then used.x1 = pwidth end
if used.y0 < 0 then used.y0 = 0 end
if used.y1 > pheight then used.y1 = pheight end
DocCache:insert(hash, CacheItem:new{
2014-03-13 13:52:43 +00:00
ubbox = used,
size = 256, -- might be closer to 160
2014-03-13 13:52:43 +00:00
})
page:close()
return used
end
function PdfDocument:getPageLinks(pageno)
local hash = "pglinks|"..self.file.."|"..self.reflowable_font_size.."|"..pageno
local cached = DocCache:check(hash)
2014-03-13 13:52:43 +00:00
if cached then
return cached.links
end
local page = self._document:openPage(pageno)
local links = page:getPageLinks()
DocCache:insert(hash, CacheItem:new{
2014-03-13 13:52:43 +00:00
links = links,
size = 64 + (8 * 32 * #links),
2014-03-13 13:52:43 +00:00
})
page:close()
return links
end
-- returns nil if file is not a pdf, true if document is a writable pdf, false else
function PdfDocument:_checkIfWritable()
local suffix = util.getFileNameSuffix(self.file)
if string.lower(suffix) ~= "pdf" then return nil end
if self.is_writable == nil then
local handle = io.open(self.file, 'r+b')
self.is_writable = handle ~= nil
if handle then handle:close() end
end
return self.is_writable
end
local function _quadpointsFromPboxes(pboxes)
-- will also need mupdf_h.lua to be evaluated once
-- but this is guaranteed at this point
local n = #pboxes
local quadpoints = ffi.new("fz_quad[?]", n)
for i=1, n do
-- The order must be left bottom, right bottom, left top, right top.
-- https://bugs.ghostscript.com/show_bug.cgi?id=695130
quadpoints[i-1].ll.x = pboxes[i].x
quadpoints[i-1].ll.y = pboxes[i].y + pboxes[i].h - 1
quadpoints[i-1].lr.x = pboxes[i].x + pboxes[i].w - 1
quadpoints[i-1].lr.y = pboxes[i].y + pboxes[i].h - 1
quadpoints[i-1].ul.x = pboxes[i].x
quadpoints[i-1].ul.y = pboxes[i].y
quadpoints[i-1].ur.x = pboxes[i].x + pboxes[i].w - 1
quadpoints[i-1].ur.y = pboxes[i].y
end
return quadpoints, n
end
local function _quadpointsToPboxes(quadpoints, n)
-- reverse of previous function
local pboxes = {}
for i=1, n do
table.insert(pboxes, {
x = quadpoints[i-1].ul.x,
y = quadpoints[i-1].ul.y,
w = quadpoints[i-1].lr.x - quadpoints[i-1].ul.x + 1,
h = quadpoints[i-1].lr.y - quadpoints[i-1].ul.y + 1,
})
end
return pboxes
end
2014-02-01 16:16:51 +00:00
function PdfDocument:saveHighlight(pageno, item)
local can_write = self:_checkIfWritable()
if can_write ~= true then return can_write end
self.is_edited = true
local quadpoints, n = _quadpointsFromPboxes(item.pboxes)
2014-03-13 13:52:43 +00:00
local page = self._document:openPage(pageno)
local annot_type = C.PDF_ANNOT_HIGHLIGHT
2024-08-30 20:10:43 +00:00
local annot_color = BlitBuffer.colorFromName(item.color)
2014-03-13 13:52:43 +00:00
if item.drawer == "lighten" then
annot_type = C.PDF_ANNOT_HIGHLIGHT
2014-03-13 13:52:43 +00:00
elseif item.drawer == "underscore" then
annot_type = C.PDF_ANNOT_UNDERLINE
2014-03-13 13:52:43 +00:00
elseif item.drawer == "strikeout" then
annot_type = C.PDF_ANNOT_STRIKE_OUT
2014-03-13 13:52:43 +00:00
end
2024-08-30 20:10:43 +00:00
-- NOTE: For highlights, display style may differ compared to ReaderView:drawHighlightRect...
-- (e.g., we do a MUL blend, MuPDF currently appears to do an OVER blend).
page:addMarkupAnnotation(quadpoints, n, annot_type, annot_color) -- may update/adjust quadpoints
-- Update pboxes with the possibly adjusted coordinates (this will have it updated
-- in self.view.highlight.saved[page])
item.pboxes = _quadpointsToPboxes(quadpoints, n)
2014-03-13 13:52:43 +00:00
page:close()
self:resetTileCacheValidity()
2014-02-01 16:16:51 +00:00
end
function PdfDocument:deleteHighlight(pageno, item)
local can_write = self:_checkIfWritable()
if can_write ~= true then return can_write end
self.is_edited = true
local quadpoints, n = _quadpointsFromPboxes(item.pboxes)
local page = self._document:openPage(pageno)
local annot = page:getMarkupAnnotation(quadpoints, n)
if annot ~= nil then
page:deleteMarkupAnnotation(annot)
self:resetTileCacheValidity()
end
page:close()
end
function PdfDocument:updateHighlightContents(pageno, item, contents)
local can_write = self:_checkIfWritable()
if can_write ~= true then return can_write end
self.is_edited = true
local quadpoints, n = _quadpointsFromPboxes(item.pboxes)
local page = self._document:openPage(pageno)
local annot = page:getMarkupAnnotation(quadpoints, n)
if annot ~= nil then
page:updateMarkupAnnotation(annot, contents)
self:resetTileCacheValidity()
end
page:close()
end
2014-02-01 16:16:51 +00:00
function PdfDocument:writeDocument()
logger.info("writing document to", self.file)
2014-03-13 13:52:43 +00:00
self._document:writeDocument(self.file)
2014-02-01 16:16:51 +00:00
end
function PdfDocument:close()
-- NOTE: We can't just rely on Document:close's return code for that, as we need self._document
-- in :writeDocument, and it would have been destroyed.
local DocumentRegistry = require("document/documentregistry")
if DocumentRegistry:getReferenceCount(self.file) == 1 then
-- We're the final reference to this Document instance.
if self.is_edited then
self:writeDocument()
end
2014-03-13 13:52:43 +00:00
end
2014-03-13 13:52:43 +00:00
Document.close(self)
2014-02-01 16:16:51 +00:00
end
function PdfDocument:getLinkFromPosition(pageno, pos)
2014-03-13 13:52:43 +00:00
return self.koptinterface:getLinkFromPosition(self, pageno, pos)
end
function PdfDocument:clipPagePNGFile(pos0, pos1, pboxes, drawer, filename)
return self.koptinterface:clipPagePNGFile(self, pos0, pos1, pboxes, drawer, filename)
end
function PdfDocument:clipPagePNGString(pos0, pos1, pboxes, drawer)
return self.koptinterface:clipPagePNGString(self, pos0, pos1, pboxes, drawer)
end
2013-04-14 07:16:42 +00:00
function PdfDocument:getPageBBox(pageno)
2014-03-13 13:52:43 +00:00
return self.koptinterface:getPageBBox(self, pageno)
2013-04-14 07:16:42 +00:00
end
function PdfDocument:getPageDimensions(pageno, zoom, rotation)
2014-03-13 13:52:43 +00:00
return self.koptinterface:getPageDimensions(self, pageno, zoom, rotation)
end
function PdfDocument:getCoverPageImage()
return self.koptinterface:getCoverPageImage(self)
end
function PdfDocument:findText(pattern, origin, reverse, case_insensitive, page)
return self.koptinterface:findText(self, pattern, origin, reverse, case_insensitive, page)
end
function PdfDocument:findAllText(pattern, case_insensitive, nb_context_words, max_hits)
return self.koptinterface:findAllText(self, pattern, case_insensitive, nb_context_words, max_hits)
2014-11-17 09:58:25 +00:00
end
2024-07-14 20:00:32 +00:00
function PdfDocument:renderPage(pageno, rect, zoom, rotation, gamma, hinting)
return self.koptinterface:renderPage(self, pageno, rect, zoom, rotation, gamma, hinting)
end
2024-07-14 20:00:32 +00:00
function PdfDocument:hintPage(pageno, zoom, rotation, gamma)
return self.koptinterface:hintPage(self, pageno, zoom, rotation, gamma)
end
2024-07-14 20:00:32 +00:00
function PdfDocument:drawPage(target, x, y, rect, pageno, zoom, rotation, gamma)
return self.koptinterface:drawPage(self, target, x, y, rect, pageno, zoom, rotation, gamma)
end
2013-10-18 20:38:07 +00:00
function PdfDocument:register(registry)
--- Document types ---
registry:addProvider("cbt", "application/vnd.comicbook+tar", self, 100)
registry:addProvider("cbz", "application/vnd.comicbook+zip", self, 100)
registry:addProvider("cbz", "application/x-cbz", self, 100) -- Alternative mimetype for OPDS.
registry:addProvider("cfb", "application/octet-stream", self, 80) -- Compound File Binary, a Microsoft general-purpose file with a file-system-like structure.
registry:addProvider("docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", self, 80)
registry:addProvider("epub", "application/epub+zip", self, 50)
registry:addProvider("epub3", "application/epub+zip", self, 50)
registry:addProvider("fb2", "application/fb2", self, 80)
registry:addProvider("htm", "text/html", self, 90)
registry:addProvider("html", "text/html", self, 90)
2024-06-02 06:12:27 +00:00
registry:addProvider("mobi", "application/x-mobipocket-ebook", self, 80)
registry:addProvider("pdf", "application/pdf", self, 100)
registry:addProvider("pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", self, 80)
registry:addProvider("tar", "application/x-tar", self, 10)
registry:addProvider("txt", "text/plain", self, 80)
registry:addProvider("xhtml", "application/xhtml+xml", self, 90)
registry:addProvider("xml", "application/xml", self, 10)
registry:addProvider("xps", "application/oxps", self, 100)
registry:addProvider("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", self, 80)
registry:addProvider("zip", "application/zip", self, 20)
--- Picture types ---
registry:addProvider("gif", "image/gif", self, 90)
-- MS HD Photo == JPEG XR
registry:addProvider("hdp", "image/vnd.ms-photo", self, 90)
registry:addProvider("j2k", "image/jp2", self, 90)
registry:addProvider("jp2", "image/jp2", self, 90)
registry:addProvider("jpeg", "image/jpeg", self, 90)
registry:addProvider("jpg", "image/jpeg", self, 90)
-- JPEG XR
registry:addProvider("jxr", "image/jxr", self, 90)
registry:addProvider("pam", "image/x-portable-arbitrarymap", self, 90)
registry:addProvider("pbm", "image/xportablebitmap", self, 90)
registry:addProvider("pgm", "image/xportablebitmap", self, 90)
registry:addProvider("png", "image/png", self, 90)
registry:addProvider("pnm", "image/xportablebitmap", self, 90)
registry:addProvider("ppm", "image/xportablebitmap", self, 90)
registry:addProvider("svg", "image/svg+xml", self, 80)
registry:addProvider("tif", "image/tiff", self, 90)
registry:addProvider("tiff", "image/tiff", self, 90)
-- Windows Media Photo == JPEG XR
registry:addProvider("wdp", "image/vnd.ms-photo", self, 90)
2013-10-18 20:38:07 +00:00
end
return PdfDocument