2
0
mirror of https://github.com/koreader/koreader synced 2024-11-18 03:25:46 +00:00
koreader/frontend/document/koptinterface.lua

366 lines
11 KiB
Lua
Raw Normal View History

require "dbg"
require "cache"
require "ui/geometry"
require "ui/device"
require "ui/reader/readerconfig"
KoptInterface = {
tessocr_data = "data",
ocr_lang = "eng",
2013-04-30 05:52:10 +00:00
ocr_type = 3, -- default 0, for more accuracy use 3
}
ContextCacheItem = CacheItem:new{}
function ContextCacheItem:onFree()
if self.kctx.free then
DEBUG("free koptcontext", self.kctx)
self.kctx:free()
end
end
2013-04-30 05:52:10 +00:00
OCREngine = CacheItem:new{}
function OCREngine:onFree()
if self.ocrengine.freeOCR then
DEBUG("free OCREngine", self.ocrengine)
self.ocrengine:freeOCR()
end
end
function KoptInterface:waitForContext(kc)
2013-04-08 07:38:04 +00:00
-- if koptcontext is being processed in background thread
-- the isPreCache will return 1.
while kc and kc:isPreCache() == 1 do
DEBUG("waiting for background rendering")
util.usleep(100000)
end
return kc
end
--[[
get reflow context
--]]
function KoptInterface:createContext(doc, pageno, bbox)
-- Now koptcontext keeps track of its dst bitmap reflowed by libk2pdfopt.
-- So there is no need to check background context when creating new context.
local kc = KOPTContext.new()
2013-02-25 13:29:41 +00:00
local screen_size = Screen:getSize()
kc:setTrim(doc.configurable.trim_page)
kc:setWrap(doc.configurable.text_wrap)
kc:setIndent(doc.configurable.detect_indent)
kc:setRotate(doc.configurable.screen_rotation)
kc:setColumns(doc.configurable.max_columns)
2013-02-10 16:51:55 +00:00
kc:setDeviceDim(screen_size.w, screen_size.h)
kc:setDeviceDPI(doc.screen_dpi)
kc:setStraighten(doc.configurable.auto_straighten)
kc:setJustification(doc.configurable.justification)
kc:setZoom(doc.configurable.font_size)
kc:setMargin(doc.configurable.page_margin)
kc:setQuality(doc.configurable.quality)
kc:setContrast(doc.configurable.contrast)
kc:setDefectSize(doc.configurable.defect_size)
kc:setLineSpacing(doc.configurable.line_spacing)
kc:setWordSpacing(doc.configurable.word_spacing)
kc:setLanguage(doc.configurable.doc_language)
kc:setBBox(bbox.x0, bbox.y0, bbox.x1, bbox.y1)
if Dbg.is_on then kc:setDebug() end
return kc
end
2013-02-25 13:29:41 +00:00
function KoptInterface:getContextHash(doc, pageno, bbox)
local screen_size = Screen:getSize()
local screen_size_hash = screen_size.w.."|"..screen_size.h
local bbox_hash = bbox.x0.."|"..bbox.y0.."|"..bbox.x1.."|"..bbox.y1
return doc.file.."|"..pageno.."|"..doc.configurable:hash("|").."|"..bbox_hash.."|"..screen_size_hash
end
2013-04-14 07:16:42 +00:00
function KoptInterface:getAutoBBox(doc, pageno)
2013-07-19 18:51:17 +00:00
local native_size = Document.getNativePageDimensions(doc, pageno)
local bbox = {
x0 = 0, y0 = 0,
2013-07-19 18:51:17 +00:00
x1 = native_size.w,
y1 = native_size.h,
}
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "autobbox|"..context_hash
local cached = Cache:check(hash)
if not cached then
local page = doc._document:openPage(pageno)
local kc = self:createContext(doc, pageno, bbox)
bbox.x0, bbox.y0, bbox.x1, bbox.y1 = page:getAutoBBox(kc)
DEBUG("Auto detected bbox", bbox)
page:close()
Cache:insert(hash, CacheItem:new{ autobbox = bbox })
return bbox
else
return cached.autobbox
end
end
function KoptInterface:getSemiAutoBBox(doc, pageno)
-- use manual bbox
local bbox = Document.getPageBBox(doc, pageno)
2013-04-14 07:16:42 +00:00
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "semiautobbox|"..context_hash
2013-04-14 07:16:42 +00:00
local cached = Cache:check(hash)
if not cached then
local page = doc._document:openPage(pageno)
local kc = self:createContext(doc, pageno, bbox)
local auto_bbox = {}
auto_bbox.x0, auto_bbox.y0, auto_bbox.x1, auto_bbox.y1 = page:getAutoBBox(kc)
auto_bbox.x0 = auto_bbox.x0 + bbox.x0
auto_bbox.y0 = auto_bbox.y0 + bbox.y0
auto_bbox.x1 = auto_bbox.x1 + bbox.x0
auto_bbox.y1 = auto_bbox.y1 + bbox.y0
DEBUG("Semi-auto detected bbox", auto_bbox)
2013-04-14 07:16:42 +00:00
page:close()
Cache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox })
return auto_bbox
2013-04-14 07:16:42 +00:00
else
return cached.semiautobbox
end
end
function KoptInterface:getReflewTextBoxes(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "rfpgboxes|"..context_hash
local cached = Cache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
--kc:setDebug()
local lang = doc.configurable.doc_language
if lang == "chi_sim" or lang == "chi_tra" or
lang == "jpn" or lang == "kor" then
kc:setCJKChar()
end
kc:setLanguage(lang)
local fullwidth, fullheight = kc:getPageDim()
local boxes = kc:getWordBoxes(0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes })
return boxes
end
else
return cached.rfpgboxes
2013-04-14 07:16:42 +00:00
end
end
function KoptInterface:getTextBoxes(doc, pageno)
local hash = "pgboxes|"..doc.file.."|"..pageno
local cached = Cache:check(hash)
if not cached then
local kc_hash = "kctx|"..doc.file.."|"..pageno
local kc = KOPTContext.new()
kc:setDebug()
local lang = doc.configurable.doc_language
if lang == "chi_sim" or lang == "chi_tra" or
lang == "jpn" or lang == "kor" then
kc:setCJKChar()
end
kc:setLanguage(lang)
local page = doc._document:openPage(pageno)
page:getPagePix(kc)
local fullwidth, fullheight = kc:getPageDim()
local boxes = kc:getWordBoxes(0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ pgboxes = boxes })
Cache:insert(kc_hash, ContextCacheItem:new{ kctx = kc })
return boxes
else
return cached.pgboxes
end
end
--[[
get word from OCR in reflew page
--]]
function KoptInterface:getReflewOCRWord(doc, pageno, rect)
2013-04-30 05:52:10 +00:00
local ocrengine = "ocrengine"
if not Cache:check(ocrengine) then
local dummy = KOPTContext.new()
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
end
self.ocr_lang = doc.configurable.doc_language
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "rfocrword|"..context_hash..rect.x..rect.y..rect.w..rect.h
local cached = Cache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
local ok, word = pcall(
kc.getTOCRWord, kc,
rect.x, rect.y, rect.w, rect.h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ rfocrword = word })
return word
end
else
return cached.rfocrword
end
end
--[[
get word from OCR in non-reflew page
--]]
function KoptInterface:getOCRWord(doc, pageno, rect)
local ocrengine = "ocrengine"
if not Cache:check(ocrengine) then
local dummy = KOPTContext.new()
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
end
self.ocr_lang = doc.configurable.doc_language
local hash = "ocrword|"..doc.file.."|"..pageno..rect.x..rect.y..rect.w..rect.h
local cached = Cache:check(hash)
if not cached then
local pgboxes_hash = "pgboxes|"..doc.file.."|"..pageno
local pgboxes_cached = Cache:check(pgboxes_hash)
local kc_hash = "kctx|"..doc.file.."|"..pageno
local kc_cashed = Cache:check(kc_hash)
if pgboxes_cached and kc_cashed then
local kc = kc_cashed.kctx
local ok, word = pcall(
2013-04-30 05:52:10 +00:00
kc.getTOCRWord, kc,
rect.x, rect.y, rect.w, rect.h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ ocrword = word })
return word
end
else
return cached.ocrword
end
end
--[[
get cached koptcontext for centain page. if context doesn't exist in cache make
2013-07-01 06:41:33 +00:00
new context and reflow the src page immediatly, or wait background thread for
reflowed context.
--]]
function KoptInterface:getCachedContext(doc, pageno)
local bbox = doc:getPageBBox(pageno)
2013-02-25 13:29:41 +00:00
local context_hash = self:getContextHash(doc, pageno, bbox)
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash)
if not cached then
-- If kctx is not cached, create one and get reflowed bmp in foreground.
local kc = self:createContext(doc, pageno, bbox)
local page = doc._document:openPage(pageno)
-- reflow page
2013-03-11 08:24:27 +00:00
--local secs, usecs = util.gettime()
page:reflow(kc, 0)
page:close()
2013-03-11 08:24:27 +00:00
--local nsecs, nusecs = util.gettime()
--local dur = nsecs - secs + (nusecs - usecs) / 1000000
--DEBUG("Reflow duration:", dur)
--self:logReflowDuration(pageno, dur)
local fullwidth, fullheight = kc:getPageDim()
DEBUG("reflowed page", pageno, "fullwidth:", fullwidth, "fullheight:", fullheight)
Cache:insert(kctx_hash, ContextCacheItem:new{ kctx = kc })
return kc
else
-- wait for background thread
return self:waitForContext(cached.kctx)
end
end
--[[
get reflowed page dimensions
--]]
function KoptInterface:getPageDimensions(doc, pageno, zoom, rotation)
local kc = self:getCachedContext(doc, pageno)
local fullwidth, fullheight = kc:getPageDim()
return Geom:new{ w = fullwidth, h = fullheight }
end
--[[
inherited from common document interface
render reflowed page into tile cache.
--]]
function KoptInterface:renderPage(doc, pageno, rect, zoom, rotation, render_mode)
doc.render_mode = render_mode
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local renderpg_hash = "renderpg|"..context_hash
local cached = Cache:check(renderpg_hash)
if not cached then
-- do the real reflowing if kctx is not been cached yet
local kc = self:getCachedContext(doc, pageno)
local fullwidth, fullheight = kc:getPageDim()
if not Cache:willAccept(fullwidth * fullheight / 2) then
-- whole page won't fit into cache
error("aborting, since we don't have enough cache for this page")
end
local page = doc._document:openPage(pageno)
2013-04-08 07:38:04 +00:00
-- prepare cache item with contained blitbuffer
local tile = CacheItem:new{
size = fullwidth * fullheight / 2 + 64, -- estimation
excerpt = Geom:new{ w = fullwidth, h = fullheight },
pageno = pageno,
bb = Blitbuffer.new(fullwidth, fullheight)
}
page:rfdraw(kc, tile.bb)
page:close()
Cache:insert(renderpg_hash, tile)
return tile
else
return cached
end
end
--[[
2013-07-01 06:41:33 +00:00
inherited from common document interface render reflowed page into cache in
background thread. this method returns immediatly leaving the precache flag on
in context. subsequent usage of this context should wait for the precache flag
off by calling self:waitForContext(kctx)
--]]
function KoptInterface:hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash)
if not cached then
local kc = self:createContext(doc, pageno, bbox)
local page = doc._document:openPage(pageno)
DEBUG("hinting page", pageno, "in background")
-- reflow will return immediately and running in background thread
kc:setPreCache()
page:reflow(kc, 0)
page:close()
Cache:insert(kctx_hash, ContextCacheItem:new{ kctx = kc })
end
end
--[[
inherited from common document interface
draw cached tile pixels into target blitbuffer.
--]]
function KoptInterface:drawPage(doc, target, x, y, rect, pageno, zoom, rotation, render_mode)
local tile = self:renderPage(doc, pageno, rect, zoom, rotation, render_mode)
2013-02-10 16:51:55 +00:00
--DEBUG("now painting", tile, rect)
target:blitFrom(tile.bb,
2013-04-08 07:38:04 +00:00
x, y,
rect.x - tile.excerpt.x,
rect.y - tile.excerpt.y,
rect.w, rect.h)
end
--[[
helper functions
--]]
function KoptInterface:logReflowDuration(pageno, dur)
local file = io.open("reflowlog.txt", "a+")
if file then
if file:seek("end") == 0 then -- write the header only once
file:write("PAGE\tDUR\n")
end
file:write(string.format("%s\t%s\n", pageno, dur))
file:close()
end
end