local JSON = require("json") local logger = require("logger") local util = require("ffi/util") local _ = require("gettext") local T = require("ffi/util").template --[[ -- Query wikipedia using Wikimedia Web API. -- https://en.wikipedia.org/w/api.php?format=jsonfm&action=query&generator=search&gsrnamespace=0&gsrsearch=ereader&gsrlimit=10&prop=extracts&exintro&explaintext&exlimit=max -- https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=jsonfm&explaintext=&redirects=&titles=E-reader -- -- To get parsed HTML : -- https://en.wikipedia.org/w/api.php?action=parse&page=E-book -- https://en.wikipedia.org/w/api.php?action=parse&page=E-book&prop=text|sections|displaytitle|revid&disablelimitreport=&disableeditsection -- https://www.mediawiki.org/wiki/API:Parsing_wikitext#parse --]] local Wikipedia = { wiki_server = "https://%s.wikipedia.org", wiki_path = "/w/api.php", wiki_params = { action = "query", prop = "extracts", format = "json", -- exintro = nil, -- get more than only the intro explaintext = "", redirects = "", -- title = nil, -- text to lookup, will be added below }, default_lang = "en", -- Search query for better results -- see https://www.mediawiki.org/wiki/API:Main_page wiki_search_params = { action = "query", generator = "search", gsrnamespace = "0", -- gsrsearch = nil, -- text to lookup, will be added below gsrlimit = 20, -- max nb of results to get exlimit = "max", prop = "extracts|info", -- 'extracts' to get text, 'info' to get full page length format = "json", explaintext = "", exintro = "", -- We have to use 'exintro=' to get extracts for ALL results -- (otherwise, we get the full text for only the first result, and -- no text at all for the others }, wiki_phtml_params = { action = "parse", format = "json", -- we only need the following informations prop = "text|sections|displaytitle|revid", -- page = nil, -- text to lookup, will be added below -- disabletoc = "", -- if we want to remove toc IN html disablelimitreport = "", disableeditsection = "", }, -- allow for disabling prettifying full page text wiki_prettify = G_reader_settings:nilOrTrue("wikipedia_prettify"), } function Wikipedia:getWikiServer(lang) return string.format(self.wiki_server, lang or self.default_lang) end -- Possible values for page_type parameter to loadPage() local WIKIPEDIA_INTRO = 1 local WIKIPEDIA_FULL = 2 local WIKIPEDIA_PHTML = 3 --[[ -- return decoded JSON table from Wikipedia --]] function Wikipedia:loadPage(text, lang, page_type, plain) local socket = require('socket') local url = require('socket.url') local http = require('socket.http') local https = require('ssl.https') local ltn12 = require('ltn12') local request, sink = {}, {} local query = "" local parsed = url.parse(self:getWikiServer(lang)) parsed.path = self.wiki_path if page_type == WIKIPEDIA_INTRO then -- search query self.wiki_search_params.explaintext = plain and "" or nil for k,v in pairs(self.wiki_search_params) do query = string.format("%s%s=%s&", query, k, v) end parsed.query = query .. "gsrsearch=" .. url.escape(text) elseif page_type == WIKIPEDIA_FULL then -- full page content self.wiki_params.explaintext = plain and "" or nil for k,v in pairs(self.wiki_params) do query = string.format("%s%s=%s&", query, k, v) end parsed.query = query .. "titles=" .. url.escape(text) elseif page_type == WIKIPEDIA_PHTML then -- parsed html page content for k,v in pairs(self.wiki_phtml_params) do query = string.format("%s%s=%s&", query, k, v) end parsed.query = query .. "page=" .. url.escape(text) else return end -- HTTP request request['url'] = url.build(parsed) request['method'] = 'GET' request['sink'] = ltn12.sink.table(sink) http.TIMEOUT, https.TIMEOUT = 10, 10 local httpRequest = parsed.scheme == 'http' and http.request or https.request -- first argument returned by skip is code local _, headers, status = socket.skip(1, httpRequest(request)) -- raise error message when network is unavailable if headers == nil then error("Network is unreachable") end if status ~= "HTTP/1.1 200 OK" then logger.warn("HTTP status not okay:", status) return end local content = table.concat(sink) if content ~= "" and string.sub(content, 1,1) == "{" then local ok, result = pcall(JSON.decode, content) if ok and result then logger.dbg("wiki result", result) return result else logger.warn("wiki error:", result) end else logger.warn("not JSON from wiki response:", content) end end -- search wikipedia and get intros for results function Wikipedia:wikintro(text, lang) local result = self:loadPage(text, lang, WIKIPEDIA_INTRO, true) if result then local query = result.query if query then return query.pages end end end -- get full content of a wiki page function Wikipedia:wikifull(text, lang) local result = self:loadPage(text, lang, WIKIPEDIA_FULL, true) if result then local query = result.query if query then if self.wiki_prettify then -- Prettification of the plain text full page for pageid, page in pairs(query.pages) do if page.extract then page.extract = self:prettifyText(page.extract) end end end return query.pages end end end -- get parsed html content and other infos of a wiki page function Wikipedia:wikiphtml(text, lang) local result = self:loadPage(text, lang, WIKIPEDIA_PHTML, true) if result and result.parse then return result.parse end if result.error and result.error.info then error(result.error.info) end end -- UTF8 of unicode geometrical shapes we can use to replace -- the "=== title ===" of wkipedia plaintext pages -- These chosen ones are available in most fonts (prettier symbols -- exist in unicode, but are available in a few fonts only) and -- have a quite consistent size/weight in all fonts. local th1_sym = "\xE2\x96\x88" -- full block (big black rectangle) (never met, only for web page title?) local th2_sym = "\xE2\x96\x89" -- big black square local th3_sym = "\xC2\xA0\xE2\x97\x86" -- black diamond (indented, nicer) local th4_sym = "\xE2\x97\xA4" -- black upper left triangle local th5_sym = "\xE2\x9C\xBF" -- black florette local th6_sym = "\xE2\x9D\x96" -- black diamond minus white x -- Others available in most fonts -- local thX_sym = "\xE2\x9C\x9A" -- heavy greek cross -- local thX_sym = "\xE2\x97\xA2" -- black lower right triangle -- local thX_sym = "\xE2\x97\x89" -- fish eye -- local thX_sym = "\xE2\x96\x97" -- quadrant lower right -- For optional prettification of the plain text full page function Wikipedia:prettifyText(text) -- We use \a for an additional leading \n that we don't want shortened later text = text:gsub("\n= ", "\n\a"..th1_sym.." ") -- 2 empty lines before text = text:gsub("\n== ", "\n\a"..th2_sym.." ") -- 2 empty lines before text = text:gsub("\n=== ", "\n"..th3_sym.." ") text = text:gsub("\n==== ", "\n"..th4_sym.." ") text = text:gsub("\n===== ", "\n"..th5_sym.." ") text = text:gsub("\n====== ", "\n"..th6_sym.." ") text = text:gsub("Modifier ==", " ==") -- fr wikipedia fix for some articles modified by clumsy editors text = text:gsub("==$", "==\n") -- for a at end of text to be matched by next gsub text = text:gsub(" ===?\n+", "\n\n") -- to : empty line after text = text:gsub(" ====+\n+", "\n") -- to : single \n, no empty line text = text:gsub("\n\n+\xE2\x80\x94", "\n\xE2\x80\x94") -- em dash, used for quote author, make it stick to prev text text = text:gsub("\n +\n", "\n") -- trim lines full of only spaces (often seen in math formulas) text = text:gsub("^\n*", "") -- trim new lines at start text = text:gsub("\n*$", "") -- trim new lines at end text = text:gsub("\n\n+", "\n\n") -- shorten multiple new lines text = text:gsub("\a", "\n") -- re-add our wished \n return text end local function getUrlContent(url, timeout) local socket = require('socket') local ltn12 = require('ltn12') local requester if url:sub(1,7) == "http://" then requester = require('socket.http') elseif url:sub(1,8) == "https://" then requester = require('ssl.https') else return false, "Unsupported protocol" end requester.TIMEOUT = timeout or 10 local request = {} local sink = {} request['url'] = url request['method'] = 'GET' request['sink'] = ltn12.sink.table(sink) -- first argument returned by skip is code local _, headers, status = socket.skip(1, requester.request(request)) if headers == nil then logger.warn("No HTTP headers") return false, "Network unavailable" end if status ~= "HTTP/1.1 200 OK" then logger.warn("HTTP status not okay:", status) return false, "Network unavailable" end return true, table.concat(sink) end -- UTF8 of unicode geometrical shapes we'll prepend to wikipedia section headers, -- to help identifying hierarchy (othewise, the small font size differences helps). -- Best if identical to the ones used above for prettifying full plain text page. -- These chosen ones are available in most fonts (prettier symbols -- exist in unicode, but are available in a few fonts only) and -- have a quite consistent size/weight in all fonts. local h1_sym = "\xE2\x96\x88" -- full block (big black rectangle) (never met, only for web page title?) local h2_sym = "\xE2\x96\x89" -- big black square local h3_sym = "\xE2\x97\x86" -- black diamond local h4_sym = "\xE2\x97\xA4" -- black upper left triangle local h5_sym = "\xE2\x9C\xBF" -- black florette local h6_sym = "\xE2\x9D\x96" -- black diamond minus white x -- Other available ones in most fonts -- local hXsym = "\xE2\x9C\x9A" -- heavy greek cross -- local hXsym = "\xE2\x97\xA2" -- black lower right triangle -- local hXsym = "\xE2\x97\x89" -- fish eye -- local hXsym = "\xE2\x96\x97" -- quadrant lower right local ext_to_mimetype = { png = "image/png", jpg = "image/jpeg", jpeg = "image/jpeg", gif = "image/gif", svg = "image/svg+xml", html= "application/xhtml+xml", xhtml= "application/xhtml+xml", ncx = "application/x-dtbncx+xml", js = "text/javascript", css = "text/css", otf = "application/opentype", ttf = "application/truetype", woff = "application/font-woff", } -- Create an epub file (with possibly images) function Wikipedia:createEpub(epub_path, page, lang, with_images) -- Use Trapper to display progress and ask questions through the UI. -- We need to have been Trapper.wrap()'ed for UI to be used, otherwise -- Trapper:info() and Trapper:confirm() will just use logger. local UI = require("ui/trapper") UI:info(_("Fetching Wikipedia page…")) local ok, phtml = pcall(self.wikiphtml, self, page, lang) if not ok then UI:info(phtml) -- display error in InfoMessage -- Sleep a bit to make that error seen util.sleep(2) UI:reset() return false end -- We may need to build absolute urls for non-absolute links and images urls local wiki_base_url = self:getWikiServer(lang) -- Get infos from wikipedia result -- (see example at https://en.wikipedia.org/w/api.php?action=parse&page=E-book&prop=text|sections|displaytitle|revid&disablelimitreport=&disableeditsection) local cancelled = false local html = phtml.text["*"] -- html content local page_cleaned = page:gsub("_", " ") -- page title local page_htmltitle = phtml.displaytitle -- page title with possible tags local sections = phtml.sections -- Wikipedia provided TOC local bookid = string.format("wikipedia_%s_%s_%s", lang, phtml.pageid, phtml.revid) -- Not sure if this bookid may ever be used by indexing software/calibre, but if it is, -- should it changes if content is updated (as now, including the wikipedia revisionId), -- or should it stays the same even if revid changes (content of the same book updated). -- We need to find images in HTML to tell how many when asking user if they should be included local images = {} local seen_images = {} local imagenum = 1 local cover_imgid = "" -- best candidate for cover among our images local processImg = function(img_tag) local src = img_tag:match([[src="([^"]*)"]]) if src == nil or src == "" then logger.info("no src found in ", img_tag) return nil end if src:sub(1,2) == "//" then src = "https:" .. src -- Wikipedia redirects from http to https, so use https elseif src:sub(1,1) == "/" then -- non absolute url src = wiki_base_url .. src end local cur_image if seen_images[src] then -- already seen cur_image = seen_images[src] else local ext = src:match(".*%.(%S+)") if ext == nil or ext == "" then -- we won't know what mimetype to use, ignore it logger.info("no file extension found in ", src) return nil end ext = ext:lower() local imgid = string.format("img%05d", imagenum) local imgpath = string.format("images/%s.%s", imgid, ext) local mimetype = ext_to_mimetype[ext] or "" local width = tonumber(img_tag:match([[width="([^"]*)"]])) local height = tonumber(img_tag:match([[height="([^"]*)"]])) -- Get higher resolution (2x) image url local src2x = nil local srcset = img_tag:match([[srcset="([^"]*)"]]) if srcset then srcset = " "..srcset.. ", " -- for next pattern to possibly match 1st or last item src2x = srcset:match([[ (%S+) 2x, ]]) if src2x then if src2x:sub(1,2) == "//" then src2x = "https:" .. src2x elseif src2x:sub(1,1) == "/" then -- non absolute url src2x = wiki_base_url .. src2x end end end cur_image = { imgid = imgid, imgpath = imgpath, src = src, src2x = src2x, mimetype = mimetype, width = width, height = height, } table.insert(images, cur_image) seen_images[src] = cur_image -- Use first image of reasonable size (not an icon) and portrait-like as cover-image if cover_imgid == "" and width and width > 50 and height and height > 50 and height > width then cover_imgid = imgid end imagenum = imagenum + 1 end -- crengine will NOT use width and height attributes, but it will use -- those found in a style attribute. -- If we get src2x images, crengine will scale them down to the 1x image size -- (less space wasted by images while reading), but the 2x quality will be -- there when image is viewed full screen with ImageViewer widget. local style_props = {} if cur_image.width then table.insert(style_props, string.format("width: %spx", cur_image.width)) end if cur_image.height then table.insert(style_props, string.format("height: %spx", cur_image.height)) end local style = table.concat(style_props, "; ") return string.format([[]], cur_image.imgpath, style) end html = html:gsub("(<%s*img [^>]*>)", processImg) logger.dbg("Images found in html:", images) -- See what to do with images local include_images = false local use_img_2x = false if with_images then -- If no UI (Trapper:wrap() not called), UI:confirm() will answer true if #images > 0 then include_images = UI:confirm(T(_("The page contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include")) if include_images then use_img_2x = UI:confirm(_("Would you like to use slightly higher quality images? This will result in a bigger file size."), _("Standard quality"), _("Higher quality")) end else UI:info(_("The page does not contain any images.")) util.sleep(1) -- Let the user see that end end if not include_images then -- Remove img tags to avoid little blank squares of missing images html = html:gsub("<%s*img [^>]*>", "") -- We could remove the whole image container
, -- but it's a lot of nested
and not easy to do. -- So the user will see the image legends and know a bit about -- the images he chose to not get. end UI:info(_("Building EPUB…")) -- Open the zip file (with .tmp for now, as crengine may still -- have a handle to the final epub_path, and we don't want to -- delete a good one if we fail/cancel later) local epub_path_tmp = epub_path .. ".tmp" local ZipWriter = require("ffi/zipwriter") local epub = ZipWriter:new{} if not epub:open(epub_path_tmp) then return false end -- We now create and add all the required epub files -- ---------------------------------------------------------------- -- /mimetype : always "application/epub+zip" epub:add("mimetype", "application/epub+zip") -- ---------------------------------------------------------------- -- /META-INF/container.xml : always the same content epub:add("META-INF/container.xml", [[ ]]) -- ---------------------------------------------------------------- -- OEBPS/content.opf : metadata + list of other files (paths relative to OEBPS/ directory) -- Other possible items in this file that are of no interest to crengine : -- In : -- -- -- (crengine only uses to get the cover image) -- In : -- -- And a section : -- -- -- -- local koreader_version = "KOReader" if lfs.attributes("git-rev", "mode") == "file" then koreader_version = "KOReader "..io.open("git-rev", "r"):read() end local content_opf_parts = {} -- head table.insert(content_opf_parts, string.format([[ %s Wikipedia %s %s %s %s ]], page_cleaned, lang:upper(), bookid, lang, koreader_version, cover_imgid)) -- images files if include_images then for inum, img in ipairs(images) do table.insert(content_opf_parts, string.format([[ %s]], img.imgid, img.imgpath, img.mimetype, "\n")) end end -- tail table.insert(content_opf_parts, [[ ]]) epub:add("OEBPS/content.opf", table.concat(content_opf_parts)) -- ---------------------------------------------------------------- -- OEBPS/stylesheet.css -- crengine will use its own data/epub.css, we just add/fix a few styles -- to look more alike wikipedia web pages (that the user can ignore -- with "Embedded Style" off) epub:add("OEBPS/stylesheet.css", [[ /* make section headers looks left aligned and avoid some page breaks */ h1, h2 { text-align: left; } h3, h4, h5, h6, h7 { page-break-before: avoid; page-break-after: avoid; text-align: left; } /* avoid page breaks around our centered titles on first page */ h1.koreaderwikifrontpage, h5.koreaderwikifrontpage { page-break-before: avoid; page-break-inside: avoid; page-break-after: avoid; text-align: center; margin-top: 0em; } p.koreaderwikifrontpage { font-style: italic; font-size: 90%; margin-left: 2em; margin-right: 2em; margin-top: 1em; margin-bottom: 1em; } hr.koreaderwikifrontpage { margin-left: 20%; margin-right: 20%; margin-bottom: 1.2em; } /* So many links, make them look like normal text except for underline */ a { display:inline; text-decoration: underline; color: black, font-weight: normal; } /* No underline for links without their href that we removed */ a.newwikinonexistent { text-decoration: none; } /* show a box around image thumbnails */ div.thumb { width: 80%; border: dotted 1px black; margin-top: 0.5em; margin-bottom: 0.5em; margin-left: 2.5em; margin-right: 2.5em; padding-top: ]].. (include_images and "0.5em" or "0.15em") .. [[; padding-bottom: 0.2em; padding-left: 0.5em; padding-right: 0.5em; text-align: center; font-size: 90%; } /* don't waste left margin for notes and list of pages */ ul, ol { margin-left: 0em; } /* helps crengine to not display them as block elements */ time, abbr, sup { display: inline; } ]]) -- ---------------------------------------------------------------- -- OEBPS/toc.ncx : table of content local toc_ncx_parts = {} local depth = 0 local cur_level = 0 local np_end = [[]] local num = 1 -- Add our own first section for first page, with page name as title table.insert(toc_ncx_parts, string.format([[%s]], num, num, page_cleaned)) table.insert(toc_ncx_parts, np_end) -- Wikipedia sections items seem to be already sorted by index, so no need to sort for isec, s in ipairs(sections) do num = num + 1 local s_anchor = s.anchor local s_title = string.format("%s %s", s.number, s.line) s_title = (s_title:gsub("(%b<>)", "")) -- titles may include and other html tags local s_level = s.toclevel if s_level > depth then depth = s_level -- max depth required in toc.ncx end if s_level == cur_level then table.insert(toc_ncx_parts, np_end) -- close same-level previous navPoint elseif s_level < cur_level then table.insert(toc_ncx_parts, np_end) -- close same-level previous navPoint while s_level < cur_level do -- close all in-between navPoint table.insert(toc_ncx_parts, np_end) cur_level = cur_level - 1 end elseif s_level > cur_level + 1 then -- a jump from level N to level N+2 or more ... should not happen -- per epub spec, but we don't know about wikipedia... -- so we create missing intermediate navPoints with same anchor as current section while s_level > cur_level + 1 do table.insert(toc_ncx_parts, "\n"..(" "):rep(cur_level)) table.insert(toc_ncx_parts, string.format([[-]], num, num, s_anchor)) cur_level = cur_level + 1 num = num + 1 end -- elseif s_level == cur_level + 1 then -- sublevel, nothing to close, nothing to add end cur_level = s_level table.insert(toc_ncx_parts, "\n"..(" "):rep(cur_level)) -- indentation, in case a person looks at it table.insert(toc_ncx_parts, string.format([[%s]], num, num, s_title, s_anchor)) end -- close nested while cur_level > 0 do table.insert(toc_ncx_parts, np_end) cur_level = cur_level - 1 end -- Prepend NCX head table.insert(toc_ncx_parts, 1, string.format([[ %s ]], bookid, depth, page_cleaned)) -- Append NCX tail table.insert(toc_ncx_parts, [[ ]]) epub:add("OEBPS/toc.ncx", table.concat(toc_ncx_parts)) -- ---------------------------------------------------------------- -- OEBPS/content.html -- Some small fixes to Wikipedia HTML to make crengine and the user happier -- Most images are in a link to the image info page, which is a useless -- external link for us, so let's remove this link. html = html:gsub("]*>%s*(<%s*img [^>]*>)%s*", "%1") -- For some
, which include nested divs, although -- perfectly balanced, crengine seems to miss some closing
and we -- end up having our image bordered box including the remaining main wiki text. -- It looks like this code is supposed to deal with class= containing multiple -- class names : -- https://github.com/koreader/crengine/commit/0930ec7230e720c148fd6f231d69558832b4d53a -- and that it may stumble on some cases. -- It's all perfectly fine if we make all these div with a single class name -- html = html:gsub([[
]], [[
]]) -- -- But we may as well make all class= have a single name to avoid other problems -- (no real risk with that, as we don't define any style for wikipedia class names, -- except div.thumb that always appears first). html = html:gsub([[(<[^>]* class="[^ "]+)%s+[^"]*"]], [[%1"]]) -- crengine seems to consider unknown tag as 'block' elements, so we may -- want to remove or replace those that should be considered 'inline' elements html = html:gsub("]*>", "") -- Fix internal wikipedia links with full server url (including lang) so -- ReaderLink can notice them and deal with them with a LookupWikipedia event. -- html = html:gsub([[href="/wiki/]], [[href="]]..wiki_base_url..[[/wiki/]]) -- -- Also, crengine deals strangely with percent encoded utf8 : -- if the link in the html is : -- we get from credocument:getLinkFromPosition() : http://fr.wikipedia.org/wiki/Fran____oix -- These are bytes "\xc3\x83\xc2\xa7", that is U+C3 and U+A7 encoded as UTF8, -- when we should have get "\xc3\xa7" ... -- We can avoid that by putting in the url plain unencoded UTF8 local hex_to_char = function(x) return string.char(tonumber(x, 16)) end local fixEncodedWikiPageTitle = function(wiki_page) wiki_page = wiki_page:gsub("%%(%x%x)", hex_to_char) return string.format([[href="%s/wiki/%s"]], wiki_base_url, wiki_page) end html = html:gsub([[href="/wiki/([^"]*)"]], fixEncodedWikiPageTitle) -- Remove href from links to non existant wiki page so they are not clickable : -- PageTitle____on -- (removal of the href="" will make them non clickable) html = html:gsub([[]* class="new"[^>]*>]], [[]]) -- Fix some other protocol-less links to wikipedia (href="//fr.wikipedia.org/w/index.php..) html = html:gsub([[href="//]], [[href="https://]]) -- crengine does not return link if multiple class names in () -- it would be no problem as we can't follow them, but when the user tap -- on it, the tap is propagated to other widgets and page change happen... -- html = html:gsub([[ (if it starts a line) or after (if it -- ends a line or a block) by wrapping it with U+200B ZERO WIDTH SPACE which will -- make the DOM tree walking code to find a link stop at it. -- html = html:gsub("(<[aA])", "\xE2\x80\x8B%1") -- html = html:gsub("()", "%1\xE2\x80\x8B") -- Fixed in crengine lvtinydom. if self.wiki_prettify then -- Prepend some symbols to section titles for a better visual feeling of hierarchy html = html:gsub("

", "

"..h1_sym.." ") html = html:gsub("

", "

"..h2_sym.." ") html = html:gsub("

", "

"..h3_sym.." ") html = html:gsub("

", "

"..h4_sym.." ") html = html:gsub("

", "
"..h5_sym.." ") html = html:gsub("
", "
"..h6_sym.." ") end -- Note: in all the gsub patterns above, we used lowercase for tags and attributes -- because it's how they are in wikipedia HTML and it makes the pattern simple. -- If one day this changes, they'll have to be replaced with href => [Hh][Rr][Ee][Ff] ... -- We can finally build the final HTML with some header of our own local saved_on = T(_("Saved on %1"), os.date("%b %d, %Y %H:%M:%S")) local online_version_htmllink = string.format([[%s]], wiki_base_url, page:gsub(" ", "_"), _("online version")) local see_online_version = T(_("See %1 for up-to-date content"), online_version_htmllink) epub:add("OEBPS/content.html", string.format([[ %s

%s

Wikipedia %s

%s
%s


%s ]], page_cleaned, page_htmltitle, lang:upper(), saved_on, see_online_version, html)) -- ---------------------------------------------------------------- -- OEBPS/images/* if include_images then local nb_images = #images for inum, img in ipairs(images) do -- Process can be interrupted at this point between each image download -- by tapping while the InfoMessage is displayed local go_on = UI:info(T(_("Fetching image %1 / %2 …"), inum, nb_images)) if not go_on then cancelled = true break end local src = img.src if use_img_2x and img.src2x then src = img.src2x end logger.dbg("Getting img ", src) local success, content = getUrlContent(src) -- success, content = getUrlContent(src..".unexistant") -- to simulate failure if success then logger.dbg("success, size:", #content) else logger.info("failed fetching:", src) end if success then epub:add("OEBPS/"..img.imgpath, content) else go_on = UI:confirm(T(_("Downloading image %1 failed. Continue anyway?"), inum), _("Stop"), _("Continue")) if not go_on then cancelled = true break end end end end -- Done with adding files if cancelled then if UI:confirm(_("Download did not complete.\nDo you want to create an EPUB with the already downloaded images?"), _("Don't create"), _("Create")) then cancelled = false end end if cancelled then UI:info(_("Canceled. Cleaning up…")) else UI:info(_("Packing EPUB…")) end epub:close() -- This was nearly a no-op, so sleep a bit to make that progress step seen util.usleep(300000) UI:reset() -- close last InfoMessage if cancelled then -- Build was cancelled, remove half created .epub if lfs.attributes(epub_path_tmp, "mode") == "file" then os.remove(epub_path_tmp) end return false end -- Finally move the .tmp to the final file os.rename(epub_path_tmp, epub_path) logger.info("successfully created:", epub_path) return true end -- Wrap Wikipedia:createEpub() with UI progress info, provided -- by Trapper module. function Wikipedia:createEpubWithUI(epub_path, page, lang, result_callback) -- To do any UI interaction while building the EPUB, we need -- to use a coroutine, so that our code can be suspended while waiting -- for user interaction, and resumed by UI widgets callbacks. -- All this is hidden and done by Trapper with a simple API. local Trapper = require("ui/trapper") Trapper:wrap(function() Trapper:setPausedText("Download paused") -- If errors in Wikipedia:createEpub(), the coroutine (used by -- Trapper) would just abort (no reader crash, no error logged). -- So we use pcall to catch any errors, log it, and report -- the failure via result_callback. local ok, success = pcall(self.createEpub, self, epub_path, page, lang, true) if ok and success then result_callback(true) else Trapper:reset() -- close any last widget not cleaned if error logger.warn("Wikipedia.createEpub pcall:", ok, success) result_callback(false) end end) end return Wikipedia