diff --git a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua index 20663095f..adfd93924 100644 --- a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua +++ b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua @@ -1,10 +1,15 @@ -local NewsHelpers = require("http_utilities") local Version = require("version") +local ffiutil = require("ffi/util") +local http = require("socket.http") local logger = require("logger") +local ltn12 = require("ltn12") +local socket = require("socket") local socket_url = require("socket.url") +local socketutil = require("socketutil") local _ = require("gettext") +local T = ffiutil.template -local EpubBuilder = { +local EpubDownloadBackend = { -- Can be set so HTTP requests will be done under Trapper and -- be interruptible trap_widget = nil, @@ -12,89 +17,8 @@ local EpubBuilder = { -- and error() with this code. We make the value of this error -- accessible here so that caller can know it's a user dismiss. dismissed_error_code = "Interrupted by user", - title = nil, - ncx_toc = nil, - ncx_manifest = nil, - ncx_contents = nil, - ncx_images = nil, } - -function EpubBuilder:new(o) - o = o or {} - self.__index = self - setmetatable(o, self) - - return o -end - -function EpubBuilder:build(abs_output_path) - -- Open the zip file (with .tmp for now, as crengine may still - -- have a handle to the final epub_path, and we don't want to - -- delete a good one if we fail/cancel later) - local tmp_path = abs_output_path .. ".tmp" - local ZipWriter = require("ffi/zipwriter") - local epub = ZipWriter:new{} - - if not epub:open(tmp_path) then - logger.dbg("Failed to open tmp_path") - return false - end - - epub:add("mimetype", "application/epub+zip") - epub:add("META-INF/container.xml", [[ - - - - - -]]) - - -- Add the manifest. - if not self.ncx_manifest or #self.ncx_manifest == 0 then - error("EPUB does not contain a valid manifest.") - end - --logger.dbg("Adding Manifest:", self.ncx_manifest) - epub:add("OEBPS/content.opf", table.concat(self.ncx_manifest)) - - -- Add the table of contents. - if not self.ncx_toc or #self.ncx_toc == 0 then - error("EPUB does not contain a valid table of contents.") - end - --logger.dbg("Adding TOC:", self.ncx_toc) - epub:add("OEBPS/toc.ncx", table.concat(self.ncx_toc)) - - -- Add the contents. - if not self.ncx_contents or #self.ncx_manifest == 0 then - error("EPUB does not contain any content.") - end - --logger.dbg("Adding Content:", self.ncx_contents) - - for index, content in ipairs(self.ncx_contents) do - epub:add("OEBPS/" .. content.filename, content.html) - end - - -- Add the images. - --logger.dbg("Adding Images:", self.ncx_images) - if self.ncx_images then - for index, image in ipairs(self.ncx_images) do - epub:add( - "OEBPS/" .. image.path, - image.content, - image.no_compression - ) - end - end - - epub:close() - os.rename(tmp_path, abs_output_path) - - collectgarbage() - -end - -function EpubBuilder:release() - -- Stub for cleanup methods -end +local max_redirects = 5; --prevent infinite redirects -- filter HTML using CSS selector local function filter(text, element) @@ -144,9 +68,79 @@ local function filter(text, element) return "" .. filtered .. "" end -function EpubBuilder:getResponseAsString(url) - logger.dbg("EpubBuilder:getResponseAsString(", url, ")") - local success, content = NewsHelpers:getUrlContent(url) +-- Get URL content +local function getUrlContent(url, timeout, maxtime, redirectCount) + logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")") + if not redirectCount then + redirectCount = 0 + elseif redirectCount == max_redirects then + error("EpubDownloadBackend: reached max redirects: ", redirectCount) + end + + if not timeout then timeout = 10 end + logger.dbg("timeout:", timeout) + + local sink = {} + local parsed = socket_url.parse(url) + socketutil:set_timeout(timeout, maxtime or 30) + local request = { + url = url, + method = "GET", + sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink), + } + logger.dbg("request:", request) + local code, headers, status = socket.skip(1, http.request(request)) + socketutil:reset_timeout() + logger.dbg("After http.request") + local content = table.concat(sink) -- empty or content accumulated till now + logger.dbg("type(code):", type(code)) + logger.dbg("code:", code) + logger.dbg("headers:", headers) + logger.dbg("status:", status) + logger.dbg("#content:", #content) + + if code == socketutil.TIMEOUT_CODE or + code == socketutil.SSL_HANDSHAKE_CODE or + code == socketutil.SINK_TIMEOUT_CODE + then + logger.warn("request interrupted:", code) + return false, code + end + if headers == nil then + logger.warn("No HTTP headers:", code, status) + return false, "Network or remote server unavailable" + end + if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK + if code and code > 299 and code < 400 and headers and headers.location then -- handle 301, 302... + local redirected_url = headers.location + local parsed_redirect_location = socket_url.parse(redirected_url) + if not parsed_redirect_location.host then + parsed_redirect_location.host = parsed.host + parsed_redirect_location.scheme = parsed.scheme + redirected_url = socket_url.build(parsed_redirect_location) + end + logger.dbg("getUrlContent: Redirecting to url: ", redirected_url) + return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1) + else + error("EpubDownloadBackend: Don't know how to handle HTTP response status: ", status) + end + logger.warn("HTTP status not okay:", code, status) + return false, "Remote server error or unavailable" + end + if headers and headers["content-length"] then + -- Check we really got the announced content size + local content_length = tonumber(headers["content-length"]) + if #content ~= content_length then + return false, "Incomplete content received" + end + end + logger.dbg("Returning content ok") + return true, content +end + +function EpubDownloadBackend:getResponseAsString(url) + logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")") + local success, content = getUrlContent(url) if (success) then return content else @@ -154,14 +148,38 @@ function EpubBuilder:getResponseAsString(url) end end -function EpubBuilder:setTrapWidget(trap_widget) +function EpubDownloadBackend:setTrapWidget(trap_widget) self.trap_widget = trap_widget end -function EpubBuilder:resetTrapWidget() +function EpubDownloadBackend:resetTrapWidget() self.trap_widget = nil end +function EpubDownloadBackend:loadPage(url) + local completed, success, content + if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget() + local Trapper = require("ui/trapper") + local timeout, maxtime = 30, 60 + -- We use dismissableRunInSubprocess with complex return values: + completed, success, content = Trapper:dismissableRunInSubprocess(function() + return getUrlContent(url, timeout, maxtime) + end, self.trap_widget) + if not completed then + error(self.dismissed_error_code) -- "Interrupted by user" + end + else + local timeout, maxtime = 10, 60 + success, content = getUrlContent(url, timeout, maxtime) + end + logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...") + if not success then + error(content) + else + return content + end +end + local ext_to_mimetype = { png = "image/png", jpg = "image/jpeg", @@ -177,15 +195,29 @@ local ext_to_mimetype = { ttf = "application/truetype", woff = "application/font-woff", } --- GetPublishableHtml -function EpubBuilder:getImagesAndHtml(html, url, include_images, filter_enable, filter_element) +-- Create an epub file (with possibly images) +function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, message, filter_enable, filter_element) + logger.dbg("EpubDownloadBackend:createEpub(", epub_path, ")") + -- Use Trapper to display progress and ask questions through the UI. + -- We need to have been Trapper.wrap()'ed for UI to be used, otherwise + -- Trapper:info() and Trapper:confirm() will just use logger. + local UI = require("ui/trapper") + -- We may need to build absolute urls for non-absolute links and images urls local base_url = socket_url.parse(url) + + local cancelled = false + local page_htmltitle = html:match([[(.*)]]) + logger.dbg("page_htmltitle is ", page_htmltitle) +-- local sections = html.sections -- Wikipedia provided TOC + local bookid = "bookid_placeholder" --string.format("wikipedia_%s_%s_%s", lang, phtml.pageid, phtml.revid) + -- Not sure if this bookid may ever be used by indexing software/calibre, but if it is, + -- should it changes if content is updated (as now, including the wikipedia revisionId), + -- or should it stays the same even if revid changes (content of the same book updated). + if filter_enable then html = filter(html, filter_element) end local images = {} local seen_images = {} local imagenum = 1 local cover_imgid = nil -- best candidate for cover among our images - html = filter_enable and filter(html, filter_element) or html - local processImg = function(img_tag) local src = img_tag:match([[src="([^"]*)"]]) if src == nil or src == "" then @@ -240,20 +272,13 @@ function EpubBuilder:getImagesAndHtml(html, url, include_images, filter_enable, width = width, height = height, } - + table.insert(images, cur_image) seen_images[src] = cur_image -- Use first image of reasonable size (not an icon) and portrait-like as cover-image if not cover_imgid and width and width > 50 and height and height > 50 and height > width then logger.dbg("Found a suitable cover image") cover_imgid = imgid - cur_image["cover_image"] = true end - - table.insert( - images, - cur_image - ) - imagenum = imagenum + 1 end -- crengine will NOT use width and height attributes, but it will use @@ -271,53 +296,130 @@ function EpubBuilder:getImagesAndHtml(html, url, include_images, filter_enable, local style = table.concat(style_props, "; ") return string.format([[]], cur_image.imgpath, style) end + html = html:gsub("(<%s*img [^>]*>)", processImg) + logger.dbg("Images found in html:", images) - if include_images then - html = html:gsub("(<%s*img [^>]*>)", processImg) - else + -- See what to do with images + local use_img_2x = false + if not include_images then -- Remove img tags to avoid little blank squares of missing images html = html:gsub("<%s*img [^>]*>", "") -- We could remove the whole image container
, -- but it's a lot of nested
and not easy to do. -- So the user will see the image legends and know a bit about - -- the images they chose to not get. + -- the images he chose to not get. end - -- Force a GC to free the memory we used (the second call may help - -- reclaim more memory). - collectgarbage() - collectgarbage() - return images, html -end + UI:info(T(_("%1\n\nBuilding EPUB…"), message)) + -- Open the zip file (with .tmp for now, as crengine may still + -- have a handle to the final epub_path, and we don't want to + -- delete a good one if we fail/cancel later) + local epub_path_tmp = epub_path .. ".tmp" + local ZipWriter = require("ffi/zipwriter") + local epub = ZipWriter:new{} + if not epub:open(epub_path_tmp) then + logger.dbg("Failed to open epub_path_tmp") + return false + end -function EpubBuilder:setTitle(title) - self.title = title -end + -- We now create and add all the required epub files + -- ---------------------------------------------------------------- + -- /mimetype : always "application/epub+zip" + epub:add("mimetype", "application/epub+zip") -function EpubBuilder:addToc(chapters) + -- ---------------------------------------------------------------- + -- /META-INF/container.xml : always the same content + epub:add("META-INF/container.xml", [[ + + + + + +]]) + logger.dbg("Added META-INF/container.xml") + + -- ---------------------------------------------------------------- + -- OEBPS/content.opf : metadata + list of other files (paths relative to OEBPS/ directory) + -- Other possible items in this file that are of no interest to crengine : + -- In : + -- + -- + -- (crengine only uses to get the cover image) + -- In : + -- + -- And a section : + -- + -- + -- + -- + local content_opf_parts = {} + -- head + local meta_cover = "" + if include_images and cover_imgid then + meta_cover = string.format([[]], cover_imgid) + end + logger.dbg("meta_cover:", meta_cover) + table.insert(content_opf_parts, string.format([[ + + + + %s + KOReader %s + %s + + + + + +]], page_htmltitle, Version:getCurrentRevision(), meta_cover)) + -- images files + if include_images then + for inum, img in ipairs(images) do + table.insert(content_opf_parts, string.format([[ %s]], img.imgid, img.imgpath, img.mimetype, "\n")) + end + end + -- tail + table.insert(content_opf_parts, [[ + + + + + +]]) + epub:add("OEBPS/content.opf", table.concat(content_opf_parts)) + logger.dbg("Added OEBPS/content.opf") + + -- ---------------------------------------------------------------- + -- OEBPS/stylesheet.css + --- @todo We told it we'd include a stylesheet.css, so it's probably best + -- that we do. In theory, we could try to fetch any *.css files linked in + -- the main html. + epub:add("OEBPS/stylesheet.css", [[ +/* Empty */ +]]) + logger.dbg("Added OEBPS/stylesheet.css") + + -- ---------------------------------------------------------------- + -- OEBPS/toc.ncx : table of content local toc_ncx_parts = {} local depth = 0 - local num = 0 - - for index, chapter in ipairs(chapters) do - -- Add nav part for each chapter. - table.insert( - toc_ncx_parts, - string.format([[%s]], - num, - num, - chapter.title, - chapter.md5 - ) - ) - num = num + 1 + local cur_level = 0 + local np_end = [[]] + local num = 1 + -- Add our own first section for first page, with page name as title + table.insert(toc_ncx_parts, string.format([[%s]], num, num, page_htmltitle)) + table.insert(toc_ncx_parts, np_end) + --- @todo Not essential for most articles, but longer articles might benefit + -- from parsing tags and constructing a proper TOC + while cur_level > 0 do + table.insert(toc_ncx_parts, np_end) + cur_level = cur_level - 1 end - -- Prepend NCX head. - table.insert( - toc_ncx_parts, - 1, - string.format([[ + -- Prepend NCX head + table.insert(toc_ncx_parts, 1, string.format([[ @@ -331,172 +433,99 @@ function EpubBuilder:addToc(chapters) %s -]], -"placeholder_bookid", -depth, -self.title - ) - ) - -- Append NCX tail. - table.insert( - toc_ncx_parts, - [[ +]], bookid, depth, page_htmltitle)) + -- Append NCX tail + table.insert(toc_ncx_parts, [[ -]] - ) - self.ncx_toc = toc_ncx_parts -end +]]) + epub:add("OEBPS/toc.ncx", table.concat(toc_ncx_parts)) + logger.dbg("Added OEBPS/toc.ncx") -function EpubBuilder:addManifest(chapters, images) - local content_opf_parts = {} - local spine_parts = {} - local meta_cover = "" + -- ---------------------------------------------------------------- + -- OEBPS/content.html + epub:add("OEBPS/content.html", html) + logger.dbg("Added OEBPS/content.html") - if #images > 0 then - for inum, image in ipairs(images) do - table.insert( - content_opf_parts, - string.format([[%s]], - image.imgid, - image.imgpath, - image.mimetype, - "\n" - ) - ) - -- See if the image has the tag we previously set indicating - -- it can be used as a cover image. - if image.cover_image then - meta_cover = string.format([[]], image.imgid) + -- Force a GC to free the memory we used till now (the second call may + -- help reclaim more memory). + collectgarbage() + collectgarbage() + + -- ---------------------------------------------------------------- + -- OEBPS/images/* + if include_images then + local nb_images = #images + for inum, img in ipairs(images) do + -- Process can be interrupted at this point between each image download + -- by tapping while the InfoMessage is displayed + -- We use the fast_refresh option from image #2 for a quicker download + local go_on = UI:info(T(_("%1\n\nRetrieving image %2 / %3 …"), message, inum, nb_images), inum >= 2) + if not go_on then + logger.dbg("cancelled") + cancelled = true + break + end + local src = img.src + if use_img_2x and img.src2x then + src = img.src2x + end + logger.dbg("Getting img ", src) + local success, content = getUrlContent(src) + -- success, content = getUrlContent(src..".unexistant") -- to simulate failure + if success then + logger.dbg("success, size:", #content) + else + logger.dbg("failed fetching:", src) + end + if success then + -- Images do not need to be compressed, so spare some cpu cycles + local no_compression = true + if img.mimetype == "image/svg+xml" then -- except for SVG images (which are XML text) + no_compression = false + end + epub:add("OEBPS/"..img.imgpath, content, no_compression) + logger.dbg("Adding OEBPS/"..img.imgpath) + else + go_on = UI:confirm(T(_("Downloading image %1 failed. Continue anyway?"), inum), _("Stop"), _("Continue")) + if not go_on then + cancelled = true + break + end end end end - if #chapters > 0 then - for index, chapter in ipairs(chapters) do - table.insert( - content_opf_parts, - string.format([[%s]], - chapter.md5, - chapter.md5, - "\n" - ) - ) - table.insert( - spine_parts, - string.format([[%s]], - chapter.md5, - "\n" - ) - ) + -- Done with adding files + if cancelled then + if UI:confirm(_("Download did not complete.\nDo you want to create an EPUB with the already downloaded images?"), _("Don't create"), _("Create")) then + cancelled = false end end - - logger.dbg("meta_cover:", meta_cover) - - table.insert( - content_opf_parts, - 1, - string.format([[ - - - %s - KOReader %s - %s - - - -]], self.title, Version:getCurrentRevision(), meta_cover) - ) - -- tail - table.insert( - content_opf_parts, - string.format([[ - - -%s - - -]], table.concat(spine_parts) - ) - ) - - self.ncx_manifest = content_opf_parts -end - -function EpubBuilder:addContents(chapters) - local contents = {} - - for index, chapter in ipairs(chapters) do - table.insert( - contents, - { - filename = chapter.md5 .. ".html", - html = chapter.html, - } - ) + if cancelled then + UI:info(_("Canceled. Cleaning up…")) + else + UI:info(T(_("%1\n\nPacking EPUB…"), message)) end + epub:close() - self.ncx_contents = contents -end - -function EpubBuilder:addImages(images) - local images_table = {} - - for index, image in ipairs(images) do - if not image.src then - return - end - - local src = image.src - local success, content = NewsHelpers:getUrlContent(src) - -- success, content = NewsHelpers:getUrlContent(src..".unexistant") -- to simulate failure - if success then - logger.dbg("EpubBuilder:addImages = success, size:", #content) - else - logger.dbg("EpubBuilder:addImages = failure fetching:", src) - end - - if success then - -- Images do not need to be compressed, so spare some cpu cycles - local no_compression = true - if image.mimetype == "image/svg+xml" then -- except for SVG images (which are XML text) - no_compression = false - end - table.insert( - images_table, - { - path = image.imgpath, - content = content, - compression = no_compression - } - ) + if cancelled then + -- Build was cancelled, remove half created .epub + if lfs.attributes(epub_path_tmp, "mode") == "file" then + os.remove(epub_path_tmp) end + return false end - self.ncx_images = images_table + -- Finally move the .tmp to the final file + os.rename(epub_path_tmp, epub_path) + logger.dbg("successfully created:", epub_path) + -- Force a GC to free the memory we used (the second call may help + -- reclaim more memory). + collectgarbage() + collectgarbage() + return true end --- There can be multiple links. --- For now we just assume the first link is probably the right one. ---- @todo Write unit tests. --- Some feeds that can be used for unit test. --- http://fransdejonge.com/feed/ for multiple links. --- https://github.com/koreader/koreader/commits/master.atom for single link with attributes. -function EpubBuilder:getFeedLink(possible_link) - local E = {} - logger.dbg("Possible link", possible_link) - if type(possible_link) == "string" then - return possible_link - elseif (possible_link._attr or E).href then - return possible_link._attr.href - elseif ((possible_link[1] or E)._attr or E).href then - return possible_link[1]._attr.href - end -end - - -return EpubBuilder +return EpubDownloadBackend diff --git a/plugins/newsdownloader.koplugin/feed_source.lua b/plugins/newsdownloader.koplugin/feed_source.lua deleted file mode 100644 index 4f7a50b36..000000000 --- a/plugins/newsdownloader.koplugin/feed_source.lua +++ /dev/null @@ -1,409 +0,0 @@ -local BD = require("ui/bidi") -local DownloadBackend = require("epubdownloadbackend") -local NewsHelpers = require("http_utilities") -local dateparser = require("lib.dateparser") -local logger = require("logger") -local md5 = require("ffi/sha2").md5 -local util = require("util") -local _ = require("gettext") -local N_ = _.ngettext -local FFIUtil = require("ffi/util") -local T = FFIUtil.template - -local FeedSource = { - file_extension = ".epub" -} - -function FeedSource:new(o) - o = o or {} - self.__index = self - setmetatable(o, self) - return o -end - -function FeedSource:getInitializedFeeds(feed_list, progress_callback, error_callback) - local initialized_feeds = {} - local unsupported_feeds_urls = {} - - for idx, feed in ipairs(feed_list) do - local url = feed[1] - -- Show a UI update - progress_callback(T( - _("Setting up feed %1 of %2."), - idx, - url - )) - -- Initialize the feed - local ok, response = pcall(function() - return self:initializeDocument( - self:fetchDocumentByUrl(url) - ) - end) - -- If the initialization worked, add the feed - -- to a list of initialized feeds - if ok and response then - table.insert(initialized_feeds, { - config = feed, - document = response, - }) - else - table.insert(unsupported_feeds_urls, { - url .. ": " .. response - }) - end - end - - if #unsupported_feeds_urls > 0 then - -- When some errors are present, we get a sour message that includes - -- information about the source of the error. - local unsupported_urls = "" - for key, value in pairs(unsupported_feeds_urls) do - -- Create the error message. - -- unsupported_urls = unsupported_urls .. " " .. value[1] .. " " .. value[2] - unsupported_urls = value[1] .. "\n\n" - -- Not sure what this does. - if key ~= #unsupported_feeds_urls then - unsupported_urls = BD.url(unsupported_urls) .. ", " - end - end - error_callback( - T(N_("Could not initialize a feed:\n\n%2\n\nPlease review your feed configuration.", "Could not initialize %1 feeds:\n\n%2\n\nPlease review your feed configurations.", #unsupported_feeds_urls), - #unsupported_feeds_urls, unsupported_urls) - ) - end - - return initialized_feeds -end - --- This function contacts the feed website and attempts to get --- the RSS/Atom document with a list of the latest items. -function FeedSource:fetchDocumentByUrl(url) - local document - -- Get the XML document representing the feed - local ok, response = pcall(function() - local success, content = NewsHelpers:getUrlContent(url) - if (success) then - return content - else - error("Failed to download content for url: " .. url, 0) - end - end) - -- Check to see if a response is available to deserialize. - if ok then - -- Deserialize the XML document into something Lua can use - document = NewsHelpers:deserializeXMLString(response) - end - -- Return the document or any errors that may have occured - if ok or document then - return document - else - if not ok then - error("(Reason: Failed to download feed document)", 0) - else - error("(Reason: Error during feed document deserialization)", 0) - end - end -end - --- Supply this method with the XML document returned by the feed, --- and it will initialized the document by extracting the feed title, --- feed items, and items count. -function FeedSource:initializeDocument(document) - local feed_title - local feed_items - local total_items - - local ok = pcall(function() - return self:getFeedType( - document, - function() - -- RSS callback - feed_title = util.htmlEntitiesToUtf8(document.rss.channel.title) - feed_items = document.rss.channel.item - total_items = #document.rss.channel.item - end, - function() - -- Atom callback - feed_title = FeedSource:getFeedTitle(document.feed.title) - feed_items = document.feed.entry - total_items = #document.feed.entry - end, - function() - -- RDF callback - feed_title = util.htmlEntitiesToUtf8(document["rdf:RDF"].channel.title) - feed_items = document["rdf:RDF"].item - total_items = #document["rdf:RDF"].item - end - ) - end) - - if ok then - document.title = feed_title - document.items = feed_items - document.total_items = total_items - return document - else - error(_("Could not initialize feed document"), 0) - end -end - -function FeedSource:getItemsContent(feed, progress_callback, error_callback) - local limit = tonumber(feed.config.limit) - local total_items = (limit == 0) and - feed.document.total_items or - limit - local initialized_feed_items = {} - -- Download each ite0m in the feed - for index, item in pairs(feed.document.items) do - -- If limit has been met, stop downloading feed. - if limit ~= 0 and index - 1 == limit then - break - end - -- Display feedback to user. - progress_callback(T( - _("%3\n Downloading item %1 of %2"), - index, - total_items, - feed.document.title - )) - -- Download the article's HTML. - local ok, response = pcall(function() - return self:initializeItemHtml( - feed, - self:getItemHtml( - item, - feed.config.download_full_article - ) - ) - end) - - -- Add the result to our table, or send a - -- result to the error callback. - if ok then - table.insert(initialized_feed_items, { - html = response.html, - images = response.images, - item_slug = FeedSource:getItemTitleWithDate(item), - item_title = item.title, - md5 = md5(item.title), - feed_title = feed.document.title, - }) - else - error_callback( - T(_("Could not get content for: %1"), feed.document.title) - ) - end - - end - - if #initialized_feed_items > 0 then - return initialized_feed_items - else - return nil - end -end - -function FeedSource:initializeItemHtml(feed, html) - local url = feed.config[1] - -- local download_full_article = feed.config.download_full_article ~= false - local include_images = feed.config.include_images ~= false - local filter_element = feed.config.filter_element or - feed.config.filter_element == nil - local enable_filter = feed.config.enable_filter ~= false - local item_images, item_html = DownloadBackend:getImagesAndHtml( - html, - url, - include_images, - enable_filter, - filter_element - ) - return { - html = item_html, - images = item_images - } -end - -function FeedSource:getFeedType(document, rss_cb, atom_cb, rdf_cb) - -- Check to see if the feed uses RSS. - local is_rss = document.rss and - document.rss.channel and - document.rss.channel.title and - document.rss.channel.item and - document.rss.channel.item[1] and - document.rss.channel.item[1].title and - document.rss.channel.item[1].link - -- Check to see if the feed uses Atom. - local is_atom = document.feed and - document.feed.title and - document.feed.entry[1] and - document.feed.entry[1].title and - document.feed.entry[1].link - -- Setup the feed values based on feed type - local is_rdf = document["rdf:RDF"] and - document["rdf:RDF"].channel and - document["rdf:RDF"].channel.title - if is_atom then - return atom_cb() - elseif is_rss then - return rss_cb() - elseif is_rdf then - return rdf_cb() - end - -- Return the values through our callback, or call an - -- error message if the feed wasn't RSS or Atom - if not is_rss or not is_atom or not is_rdf then - local error_message - if not is_rss or not is_rdf then - error_message = _("(Reason: Couldn't process RSS)") - elseif not is_atom then - error_message = _("(Reason: Couldn't process Atom)") - end - error(error_message) - end -end - -function FeedSource:getItemHtml(item, download_full_article) - if download_full_article then - return NewsHelpers:loadPage( - FeedSource:getFeedLink(item.link) - ) - else - local feed_description = item.description or item.summary - local footer = _("This is just a description of the feed. To download the full article instead, go to the News Downloader settings and change 'download_full_article' to 'true'.") - return string.format([[ - -%s -

%s

%s
-
%s
- -]], item.title, item.title, feed_description, footer) - end -end - --- @todo: move this elsewhere -function FeedSource:getEpubOutputDir(download_dir, sub_dir, epub_title) - - local feed_output_dir = ("%s%s/"):format( - download_dir, - util.getSafeFilename(util.htmlEntitiesToUtf8(sub_dir))) - - -- Create the output directory if it doesn't exist. - if not lfs.attributes(feed_output_dir, "mode") then - lfs.mkdir(feed_output_dir) - end - - local file_name = FeedSource:getFeedTitle(epub_title) - - return ("%s%s%s"):format( - feed_output_dir, - file_name, - self.file_extension - ) -end - -function FeedSource:createEpub(title, chapters, abs_output_path, progress_callback, error_callback) - - local file_exists = lfs.attributes(abs_output_path, "mode") - - if file_exists then - logger.dbg("NewsDownloader: Skipping. EPUB file already exists", abs_output_path) - return true - end - - if #chapters == 0 then - error(_("Error: chapters contains 0 items"), 0) - end - - local images = {} - - for index, chapter in ipairs(chapters) do - for jndex, image in ipairs(chapter.images) do - table.insert( - images, - image - ) - end - end - - local epub = DownloadBackend:new{} - - progress_callback(T(_("Building EPUB %1"), title)) - epub:setTitle(title) - epub:addToc(chapters) - epub:addManifest(chapters, images) - - progress_callback(T(_("Building EPUB %1: %2"), title, _("Adding contents"))) - epub:addContents(chapters) - - progress_callback(T(_("Building EPUB %1: %2"), title, _("Adding images"))) - epub:addImages(images) - - progress_callback(T(_("Building EPUB %1: %2"), title, _("Writing EPUB to disk"))) - local ok = pcall(function() - return epub:build(abs_output_path) - end) - - if ok then - if lfs.attributes(abs_output_path, "mode") then - return true - end - end - - return false -end - -local function parseDate(dateTime) - -- Uses lua-feedparser https://github.com/slact/lua-feedparser - -- feedparser is available under the (new) BSD license. - -- see: koreader/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser - local date = dateparser.parse(dateTime) - return os.date("%y-%m-%d_%H-%M_", date) -end - -function FeedSource:getFeedTitleWithDate(feed) - local title = util.getSafeFilename(FeedSource:getFeedTitle(feed.document.title)) - return os.date("%y-%m-%d_%H-%M_") .. title -end - --- Creates a title with date from a feed item. -function FeedSource:getItemTitleWithDate(item) - local title = util.getSafeFilename(FeedSource:getFeedTitle(item.title)) - if item.updated then - title = parseDate(item.updated) .. title - elseif item.pubDate then - title = parseDate(item.pubDate) .. title - elseif item.published then - title = parseDate(item.published) .. title - end - return title -end - --- If a title looks like blabla it'll just be feed.title. --- If a title looks like blabla then we get a table --- where [1] is the title string and the attributes are also available. -function FeedSource:getFeedTitle(possible_title) - if type(possible_title) == "string" then - return util.htmlEntitiesToUtf8(possible_title) - elseif possible_title[1] and type(possible_title[1]) == "string" then - return util.htmlEntitiesToUtf8(possible_title[1]) - end -end --- There can be multiple links. --- For now we just assume the first link is probably the right one. ---- @todo Write unit tests. --- Some feeds that can be used for unit test. --- http://fransdejonge.com/feed/ for multiple links. --- https://github.com/koreader/koreader/commits/master.atom for single link with attributes. -function FeedSource:getFeedLink(possible_link) - local E = {} - if type(possible_link) == "string" then - return possible_link - elseif (possible_link._attr or E).href then - return possible_link._attr.href - elseif ((possible_link[1] or E)._attr or E).href then - return possible_link[1]._attr.href - end -end - - -return FeedSource diff --git a/plugins/newsdownloader.koplugin/feed_view.lua b/plugins/newsdownloader.koplugin/feed_view.lua index b52f30f6d..2e68d637e 100644 --- a/plugins/newsdownloader.koplugin/feed_view.lua +++ b/plugins/newsdownloader.koplugin/feed_view.lua @@ -7,10 +7,7 @@ local FeedView = { DOWNLOAD_FULL_ARTICLE = "download_full_article", INCLUDE_IMAGES = "include_images", ENABLE_FILTER = "enable_filter", - FILTER_ELEMENT = "filter_element", - VOLUMIZE = "volumize", - ACTION_RESET_HISTORY = "reset_history", - ACTION_DELETE_FEED = "delete_feed", + FILTER_ELEMENT = "filter_element" } function FeedView:getList(feed_config, callback, edit_feed_attribute_callback, delete_feed_callback) @@ -52,7 +49,7 @@ function FeedView:getList(feed_config, callback, edit_feed_attribute_callback, d return view_content end -function FeedView:getItem(id, feed, edit_feed_callback, feed_action_callback) +function FeedView:getItem(id, feed, edit_feed_callback, delete_feed_callback) logger.dbg("NewsDownloader:", feed) @@ -70,7 +67,6 @@ function FeedView:getItem(id, feed, edit_feed_callback, feed_action_callback) local include_images = feed.include_images ~= false local enable_filter = feed.enable_filter ~= false local filter_element = feed.filter_element - local volumize = feed.volumize ~= false local vc = { { @@ -140,22 +136,11 @@ function FeedView:getItem(id, feed, edit_feed_callback, feed_action_callback) ) end }, - { - _("Volumize feed"), - volumize, - callback = function() - edit_feed_callback( - id, - FeedView.VOLUMIZE, - volumize - ) - end - }, } - -- These actions only pertain to initiated feeds, so we don't always - -- display them. - if feed_action_callback then + -- We don't always display this. For instance: if a feed + -- is being created, this button is not necessary. + if delete_feed_callback then table.insert( vc, "---" @@ -166,22 +151,8 @@ function FeedView:getItem(id, feed, edit_feed_callback, feed_action_callback) _("Delete feed"), "", callback = function() - feed_action_callback( - id, - FeedView.ACTION_DELETE_FEED - ) - end - } - ) - table.insert( - vc, - { - _("Reset feed history"), - "", - callback = function() - feed_action_callback( - url, - FeedView.ACTION_RESET_HISTORY + delete_feed_callback( + id ) end } diff --git a/plugins/newsdownloader.koplugin/http_utilities.lua b/plugins/newsdownloader.koplugin/http_utilities.lua deleted file mode 100644 index d79fc830e..000000000 --- a/plugins/newsdownloader.koplugin/http_utilities.lua +++ /dev/null @@ -1,126 +0,0 @@ -local logger = require("logger") -local http = require("socket.http") -local socketutil = require("socketutil") -local socket_url = require("socket.url") -local socket = require("socket") -local ltn12 = require("ltn12") - -local NewsHelpers = { -} - -local max_redirects = 5; --prevent infinite redirects - --- Get URL content -function NewsHelpers:getUrlContent(url, timeout, maxtime, redirectCount) - logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")") - if not redirectCount then - redirectCount = 0 - elseif redirectCount == max_redirects then - error("EpubDownloadBackend: reached max redirects: ", redirectCount) - end - - if not timeout then timeout = 10 end - logger.dbg("timeout:", timeout) - - local sink = {} - local parsed = socket_url.parse(url) - socketutil:set_timeout(timeout, maxtime or 30) - local request = { - url = url, - method = "GET", - sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink), - } - logger.dbg("request:", request) - local code, headers, status = socket.skip(1, http.request(request)) - socketutil:reset_timeout() - logger.dbg("After http.request") - local content = table.concat(sink) -- empty or content accumulated till now - logger.dbg("type(code):", type(code)) - logger.dbg("code:", code) - logger.dbg("headers:", headers) - logger.dbg("status:", status) - logger.dbg("#content:", #content) - - if code == socketutil.TIMEOUT_CODE or - code == socketutil.SSL_HANDSHAKE_CODE or - code == socketutil.SINK_TIMEOUT_CODE - then - logger.warn("request interrupted:", code) - return false, code - end - if headers == nil then - logger.warn("No HTTP headers:", code, status) - return false, "Network or remote server unavailable" - end - if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK - if code and code > 299 and code < 400 and headers and headers.location then -- handle 301, 302... - local redirected_url = headers.location - local parsed_redirect_location = socket_url.parse(redirected_url) - if not parsed_redirect_location.host then - parsed_redirect_location.host = parsed.host - parsed_redirect_location.scheme = parsed.scheme - redirected_url = socket_url.build(parsed_redirect_location) - end - logger.dbg("getUrlContent: Redirecting to url: ", redirected_url) - return self:getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1) - else - -- error("EpubDownloadBackend: Don't know how to handle HTTP response status: " .. status) - -- error("EpubDownloadBackend: Don't know how to handle HTTP response status.") - logger.warn("HTTP status not okay:", code, status) - return false, status - end - end - if headers and headers["content-length"] then - -- Check we really got the announced content size - local content_length = tonumber(headers["content-length"]) - if #content ~= content_length then - return false, "Incomplete content received" - end - end - logger.dbg("Returning content ok") - return true, content -end - -function NewsHelpers:loadPage(url) - logger.dbg("Load page: ", url) - local success, content ---[[ if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget() - local Trapper = require("ui/trapper") - local timeout, maxtime = 30, 60 - -- We use dismissableRunInSubprocess with complex return values: - completed, success, content = Trapper:dismissableRunInSubprocess(function() - return NewsHelpers:getUrlContent(url, timeout, maxtime) - end, self.trap_widget) - if not completed then - error(self.dismissed_error_code) -- "Interrupted by user" - end - else]]-- - local timeout, maxtime = 10, 60 - success, content = NewsHelpers:getUrlContent(url, timeout, maxtime) --- end - logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...") - if not success then - error(content) - else - return content - end -end - -function NewsHelpers:deserializeXMLString(xml_str) - -- uses LuaXML https://github.com/manoelcampos/LuaXML - -- The MIT License (MIT) - -- Copyright (c) 2016 Manoel Campos da Silva Filho - -- see: koreader/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML - local treehdl = require("lib/handler") - local libxml = require("lib/xml") - -- Instantiate the object that parses the XML file as a Lua table. - local xmlhandler = treehdl.simpleTreeHandler() - -- Instantiate the object that parses the XML to a Lua table. - local ok = pcall(function() - libxml.xmlParser(xmlhandler):parse(xml_str) - end) - if not ok then return end - return xmlhandler.root -end - -return NewsHelpers diff --git a/plugins/newsdownloader.koplugin/main.lua b/plugins/newsdownloader.koplugin/main.lua index 6db0ed90f..5ddf53c8b 100644 --- a/plugins/newsdownloader.koplugin/main.lua +++ b/plugins/newsdownloader.koplugin/main.lua @@ -1,9 +1,11 @@ local BD = require("ui/bidi") local DataStorage = require("datastorage") +--local DownloadBackend = require("internaldownloadbackend") +--local DownloadBackend = require("luahttpdownloadbackend") +local DownloadBackend = require("epubdownloadbackend") local ReadHistory = require("readhistory") local FFIUtil = require("ffi/util") local FeedView = require("feed_view") -local FeedSource = require("feed_source") local InfoMessage = require("ui/widget/infomessage") local LuaSettings = require("frontend/luasettings") local UIManager = require("ui/uimanager") @@ -13,6 +15,7 @@ local MultiConfirmBox = require("ui/widget/multiconfirmbox") local NetworkMgr = require("ui/network/manager") local Persist = require("persist") local WidgetContainer = require("ui/widget/container/widgetcontainer") +local dateparser = require("lib.dateparser") local logger = require("logger") local util = require("util") local _ = require("gettext") @@ -24,11 +27,10 @@ local NewsDownloader = WidgetContainer:new{ feed_config_file = "feed_config.lua", feed_config_path = nil, news_config_file = "news_settings.lua", - news_history_file = "news_history.lua", settings = nil, - history = nil, download_dir_name = "news", download_dir = nil, + file_extension = ".epub", config_key_custom_dl_dir = "custom_dl_dir", empty_feed = { [1] = "https://", @@ -36,12 +38,46 @@ local NewsDownloader = WidgetContainer:new{ download_full_article = true, include_images = true, enable_filter = false, - filter_element = "", - volumize = false + filter_element = "" }, kv = {} } +local FEED_TYPE_RSS = "rss" +local FEED_TYPE_ATOM = "atom" + +--local initialized = false +--local feed_config_file_name = "feed_config.lua" +--local news_downloader_config_file = "news_downloader_settings.lua + +-- If a title looks like blabla it'll just be feed.title. +-- If a title looks like blabla then we get a table +-- where [1] is the title string and the attributes are also available. +local function getFeedTitle(possible_title) + if type(possible_title) == "string" then + return util.htmlEntitiesToUtf8(possible_title) + elseif possible_title[1] and type(possible_title[1]) == "string" then + return util.htmlEntitiesToUtf8(possible_title[1]) + end +end + +-- There can be multiple links. +-- For now we just assume the first link is probably the right one. +--- @todo Write unit tests. +-- Some feeds that can be used for unit test. +-- http://fransdejonge.com/feed/ for multiple links. +-- https://github.com/koreader/koreader/commits/master.atom for single link with attributes. +local function getFeedLink(possible_link) + local E = {} + if type(possible_link) == "string" then + return possible_link + elseif (possible_link._attr or E).href then + return possible_link._attr.href + elseif ((possible_link[1] or E)._attr or E).href then + return possible_link[1]._attr.href + end +end + function NewsDownloader:init() self.ui.menu:registerToMainMenu(self) end @@ -69,39 +105,7 @@ function NewsDownloader:getSubMenuItems() text = _("Sync news feeds"), keep_menu_open = true, callback = function(touchmenu_instance) - NetworkMgr:runWhenOnline( - function() self:syncAllFeedsWithUI( - touchmenu_instance, - function(feed_message) - -- Callback to fire after sync is finished - local UI = require("ui/trapper") - -- This callback is called after the - -- processing is complete. - -- - -- Clear the info widgets before displaying the next ui widget. - -- UI:clear() - -- Ask the user if they want to go to their downloads folder - -- or if they'd rather remain at the menu. - feed_message = feed_message _("Go to downloaders folder?") - local should_go_to_downloads = UI:confirm( - feed_message, - _("Close"), - _("Go to downloads") - ) - if should_go_to_downloads then - -- Go to downloads folder. - UI:clear() - self:openDownloadsFolder() - touchmenu_instance:closeMenu() - NetworkMgr:afterWifiAction() - return - else - -- Return to the menu. - NetworkMgr:afterWifiAction() - return - end - end - ) end) + NetworkMgr:runWhenOnline(function() self:loadConfigAndProcessFeedsWithUI(touchmenu_instance) end) end, }, { @@ -123,6 +127,17 @@ function NewsDownloader:getSubMenuItems() keep_menu_open = true, callback = function() self:setCustomDownloadDirectory() end, }, + { + text = _("Never download images"), + keep_menu_open = true, + checked_func = function() + return self.settings:isTrue("never_download_images") + end, + callback = function() + self.settings:toggle("never_download_images") + self.settings:flush() + end, + }, { text = _("Delete all downloaded items"), keep_menu_open = true, @@ -136,9 +151,6 @@ function NewsDownloader:getSubMenuItems() ) if should_delete then self:removeNewsButKeepFeedConfig() - -- Move user to the downloads folder to avoid an error where they - -- are within a feed folder which we have just deleted. - self:openDownloadsFolder() Trapper:reset() else Trapper:reset() @@ -161,7 +173,7 @@ function NewsDownloader:getSubMenuItems() } return sub_item_table end --- lazyInitialization sets up our variables to point to the +-- lazyInitialization sets up variables that point to the -- Downloads folder and the feeds configuration file. function NewsDownloader:lazyInitialization() if not self.initialized then @@ -176,8 +188,6 @@ function NewsDownloader:lazyInitialization() DataStorage:getFullDataDir(), self.download_dir_name) end - logger.dbg("NewsDownloader: initializing download history") - self.history = LuaSettings:open(("%s/%s"):format(DataStorage:getSettingsDir(), self.news_history_file)) logger.dbg("NewsDownloader: Custom directory set to:", self.download_dir) -- If the directory doesn't exist we will create it. if not lfs.attributes(self.download_dir, "mode") then @@ -195,16 +205,15 @@ function NewsDownloader:lazyInitialization() self.initialized = true end end --- This function loads the config file. If the config is not available --- then this function includes prompts for handling that. -function NewsDownloader:loadConfig() + +function NewsDownloader:loadConfigAndProcessFeeds(touchmenu_instance) local UI = require("ui/trapper") logger.dbg("force repaint due to upcoming blocking calls") - -- Check if the feed config file exists + local ok, feed_config = pcall(dofile, self.feed_config_path) if not ok or not feed_config then UI:info(T(_("Invalid configuration file. Detailed error message:\n%1"), feed_config)) - return false + return end -- If the file contains no table elements, then the user hasn't set any feeds. if #feed_config <= 0 then @@ -228,198 +237,344 @@ function NewsDownloader:loadConfig() feed_item_vc ) end - return false + return + end + + local never_download_images = self.settings:isTrue("never_download_images") + local unsupported_feeds_urls = {} + local total_feed_entries = #feed_config + local feed_message + + for idx, feed in ipairs(feed_config) do + local url = feed[1] + local limit = feed.limit + local download_full_article = feed.download_full_article == nil or feed.download_full_article + local include_images = not never_download_images and feed.include_images + local enable_filter = feed.enable_filter or feed.enable_filter == nil + local filter_element = feed.filter_element or feed.filter_element == nil + -- Check if the two required attributes are set. + if url and limit then + feed_message = T(_("Processing %1/%2:\n%3"), idx, total_feed_entries, BD.url(url)) + UI:info(feed_message) + -- Process the feed source. + self:processFeedSource( + url, + tonumber(limit), + unsupported_feeds_urls, + download_full_article, + include_images, + feed_message, + enable_filter, + filter_element) + else + logger.warn("NewsDownloader: invalid feed config entry.", feed) + end + end + + if #unsupported_feeds_urls <= 0 then + -- When no errors are present, we get a happy message. + feed_message = _("Downloading news finished.") + else + -- When some errors are present, we get a sour message that includes + -- information about the source of the error. + local unsupported_urls = "" + for key, value in pairs(unsupported_feeds_urls) do + -- Create the error message. + unsupported_urls = unsupported_urls .. " " .. value[1] .. " " .. value[2] + -- Not sure what this does. + if key ~= #unsupported_feeds_urls then + unsupported_urls = BD.url(unsupported_urls) .. ", " + end + end + -- Tell the user there were problems. + feed_message = _("Downloading news finished with errors.") + -- Display a dialogue that requires the user to acknowledge + -- that errors occured. + UI:confirm( + T(_([[ +Could not process some feeds. +Unsupported format in: %1. Please +review your feed configuration file.]]) + , unsupported_urls), + _("Continue"), + "" + ) + end + -- Clear the info widgets before displaying the next ui widget. + UI:clear() + -- Check to see if this method was called from the menu. If it was, + -- we will have gotten a touchmenu_instance. This will context gives the user + -- two options about what to do next, which are handled by this block. + if touchmenu_instance then + -- Ask the user if they want to go to their downloads folder + -- or if they'd rather remain at the menu. + feed_message = feed_message .. _("Go to download folder?") + local should_go_to_downloads = UI:confirm( + feed_message, + _("Close"), + _("Go to downloads") + ) + if should_go_to_downloads then + -- Go to downloads folder. + UI:clear() + self:openDownloadsFolder() + touchmenu_instance:closeMenu() + NetworkMgr:afterWifiAction() + return + else + -- Return to the menu. + NetworkMgr:afterWifiAction() + return + end end - -- If we made it this far, then the feed config is valid - -- and the next step is to process its contents - return feed_config + return end -function NewsDownloader:syncAllFeedsWithUI(touchmenu_instance, callback) +function NewsDownloader:loadConfigAndProcessFeedsWithUI(touchmenu_instance) local Trapper = require("ui/trapper") Trapper:wrap(function() - local UI = require("ui/trapper") - -- Get the config - local config = self:loadConfig() - local sync_errors = {} - -- Get the HTML for the feeds - local feedSource = FeedSource:new{} - -- Get the initialized feeds list - local initialized_feeds = feedSource:getInitializedFeeds( - config, - function(progress_message) - -- This callback relays updates to the UI - UI:info(progress_message) - end, - function(error_message) - table.insert( - sync_errors, - error_message - ) - end - ) - -- In this block, each feed item will be its own - -- epub complete with title and chapters - local epubs_to_make = {} - local epubs_successfully_created = {} - local feed_history = {} - - for feed_index, feed in pairs(initialized_feeds) do - -- Go through each feed and make new entry - local items_content = feedSource:getItemsContent( - feed, - function(progress_message) - UI:info(progress_message) - end, - function(error_message) - table.insert( - sync_errors, - error_message - ) - end - ) - - local volumize = feed.config.volumize ~= false - local chapters = {} - local feed_title = feedSource:getFeedTitleWithDate(feed) - local feed_id = feed.config[1] -- The url. - local sub_dir = feedSource:getFeedTitle(feed.document.title) - local item_history = {} - - for content_index, content in pairs(items_content) do - -- Check to see if we've already downloaded this item. - local history_for_feed = self.history:child(feed_id) + self:loadConfigAndProcessFeeds(touchmenu_instance) + end) +end - if history_for_feed:has(content.md5) then - logger.dbg("NewsDownloader: ", "Item already downloaded") - UI:info(_("Skipping downloaded item")) - else - local abs_path = feedSource:getEpubOutputDir( - self.download_dir, - sub_dir, - content.item_title - ) - - -- Not sure the slug returned is what we want. - -- Should be something like 2022_09_20-ArticleTitle - table.insert( - chapters, - { - title = content.item_title, - slug = content.item_slug, - md5 = content.md5, - html = content.html, - images = content.images - } - ) - - if not volumize then - -- We're not volumizing, so each chapter - -- will be its own epub. - table.insert( - epubs_to_make, - { - title = content.item_title, - chapters = chapters, - abs_path = abs_path, - id = feed_id, - } - ) - -- Reset the chapters list. - chapters = {} - end - - table.insert( - item_history, - content.md5 - ) - end - end - -- We're volumizing, so all of the chapters we collected - -- get added to a single epub. - if volumize and #chapters > 0 then - local abs_path = feedSource:getEpubOutputDir( - self.download_dir, - sub_dir, - feed_title - ) +function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element) + local ok, response = pcall(function() + return DownloadBackend:getResponseAsString(url) + end) + local feeds + -- Check to see if a response is available to deserialize. + if ok then + feeds = self:deserializeXMLString(response) + end + -- If the response is not available (for a reason that we don't know), + -- add the URL to the unsupported feeds list. + if not ok or not feeds then + local error_message + if not ok then + error_message = _("(Reason: Failed to download content)") + else + error_message = _("(Reason: Error during feed deserialization)") + end + table.insert( + unsupported_feeds_urls, + { + url, + error_message + } + ) + return + end + -- Check to see if the feed uses RSS. + local is_rss = feeds.rss + and feeds.rss.channel + and feeds.rss.channel.title + and feeds.rss.channel.item + and feeds.rss.channel.item[1] + and feeds.rss.channel.item[1].title + and feeds.rss.channel.item[1].link + -- Check to see if the feed uses Atom. + local is_atom = feeds.feed + and feeds.feed.title + and feeds.feed.entry[1] + and feeds.feed.entry[1].title + and feeds.feed.entry[1].link + -- Process the feeds accordingly. + if is_atom then + ok = pcall(function() + return self:processFeed( + FEED_TYPE_ATOM, + feeds, + limit, + download_full_article, + include_images, + message, + enable_filter, + filter_element + ) + end) + elseif is_rss then + ok = pcall(function() + return self:processFeed( + FEED_TYPE_RSS, + feeds, + limit, + download_full_article, + include_images, + message, + enable_filter, + filter_element + ) + end) + end + -- If the feed can't be processed, or it is neither + -- Atom or RSS, then add it to the unsupported feeds list + -- and return an error message. + if not ok or (not is_rss and not is_atom) then + local error_message + if not ok then + error_message = _("(Reason: Failed to download content)") + elseif not is_rss then + error_message = _("(Reason: Couldn't process RSS)") + elseif not is_atom then + error_message = _("(Reason: Couldn't process Atom)") + end + table.insert( + unsupported_feeds_urls, + { + url, + error_message + } + ) + end +end - table.insert( - epubs_to_make, - { - title = feed_title, - chapters = chapters, - abs_path = abs_path, - id = feed_id, - } - ) - end +function NewsDownloader:deserializeXMLString(xml_str) + -- uses LuaXML https://github.com/manoelcampos/LuaXML + -- The MIT License (MIT) + -- Copyright (c) 2016 Manoel Campos da Silva Filho + -- see: koreader/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML + local treehdl = require("lib/handler") + local libxml = require("lib/xml") + -- Instantiate the object that parses the XML file as a Lua table. + local xmlhandler = treehdl.simpleTreeHandler() + -- Instantiate the object that parses the XML to a Lua table. + local ok = pcall(function() + libxml.xmlParser(xmlhandler):parse(xml_str) + end) + if not ok then return end + return xmlhandler.root +end - feed_history[feed_id] = item_history - end +function NewsDownloader:processFeed(feed_type, feeds, limit, download_full_article, include_images, message, enable_filter, filter_element) + local feed_title + local feed_item + local total_items + -- Setup the above vars based on feed type. + if feed_type == FEED_TYPE_RSS then + feed_title = util.htmlEntitiesToUtf8(feeds.rss.channel.title) + feed_item = feeds.rss.channel.item + total_items = (limit == 0) + and #feeds.rss.channel.item + or limit + else + feed_title = getFeedTitle(feeds.feed.title) + feed_item = feeds.feed.entry + total_items = (limit == 0) + and #feeds.feed.entry + or limit + end + -- Get the path to the output directory. + local feed_output_dir = ("%s%s/"):format( + self.download_dir, + util.getSafeFilename(util.htmlEntitiesToUtf8(feed_title))) + -- Create the output directory if it doesn't exist. + if not lfs.attributes(feed_output_dir, "mode") then + lfs.mkdir(feed_output_dir) + end + -- Download the feed + for index, feed in pairs(feed_item) do + -- If limit has been met, stop downloading feed. + if limit ~= 0 and index - 1 == limit then + break + end + -- Create a message to display during processing. + local article_message = T( + _("%1\n\nFetching article %2/%3:"), + message, + index, + total_items + ) + -- Get the feed description. + local feed_description + if feed_type == FEED_TYPE_RSS then + feed_description = feed.description + else + feed_description = feed.summary + end + -- Download the article. + if download_full_article then + self:downloadFeed( + feed, + feed_output_dir, + include_images, + article_message, + enable_filter, + filter_element + ) + else + self:createFromDescription( + feed, + feed_description, + feed_output_dir, + include_images, + article_message + ) + end + end +end - -- Make each EPUB. - for epub_index, epub in pairs(epubs_to_make) do - local ok = feedSource:createEpub( - epub.title, - epub.chapters, - epub.abs_path, - function(progress_message) - UI:info(progress_message) - end, - function(error_message) - table.insert( - sync_errors, - error_message - ) - end - ) - if ok then - -- Save the hashes to the setting for this feed. - local hashes_to_save = feed_history[epub.id] - local history_for_feed = self.history:child(epub.id) - - for index, hash in ipairs(hashes_to_save) do - if history_for_feed:hasNot(hash) then - history_for_feed:saveSetting(hash, true) - end - end - -- Add the epub title to the successfully created table. - table.insert( - epubs_successfully_created, - epub.title - ) - else - table.insert( - sync_errors, - T( - _('Error building EPUB %1'), - epub.title - ) - ) - end - end +local function parseDate(dateTime) + -- Uses lua-feedparser https://github.com/slact/lua-feedparser + -- feedparser is available under the (new) BSD license. + -- see: koreader/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser + local date = dateparser.parse(dateTime) + return os.date("%y-%m-%d_%H-%M_", date) +end - logger.dbg(epubs_to_make) +-- This appears to be used by Atom feeds in processFeed. +local function getTitleWithDate(feed) + local title = util.getSafeFilename(getFeedTitle(feed.title)) + if feed.updated then + title = parseDate(feed.updated) .. title + elseif feed.pubDate then + title = parseDate(feed.pubDate) .. title + elseif feed.published then + title = parseDate(feed.published) .. title + end + return title +end - self.history:flush() +function NewsDownloader:downloadFeed(feed, feed_output_dir, include_images, message, enable_filter, filter_element) + local title_with_date = getTitleWithDate(feed) + local news_file_path = ("%s%s%s"):format(feed_output_dir, + title_with_date, + self.file_extension) - -- Relay any errors - for index, error_message in pairs(sync_errors) do - UI:confirm( - error_message, - _("Continue"), - "" - ) - end + local file_mode = lfs.attributes(news_file_path, "mode") + if file_mode == "file" then + logger.dbg("NewsDownloader:", news_file_path, "already exists. Skipping") + else + logger.dbg("NewsDownloader: News file will be stored to :", news_file_path) + local article_message = T(_("%1\n%2"), message, title_with_date) + local link = getFeedLink(feed.link) + local html = DownloadBackend:loadPage(link) + DownloadBackend:createEpub(news_file_path, html, link, include_images, article_message, enable_filter, filter_element) + end +end - local message = (#epubs_successfully_created == 0) and - _("Sync complete. No new EPUBs created.") or - T(_("Sync complete. EPUBs created: %1"), - table.concat(epubs_successfully_created, ", ")) +function NewsDownloader:createFromDescription(feed, content, feed_output_dir, include_images, message) + local title_with_date = getTitleWithDate(feed) + local news_file_path = ("%s%s%s"):format(feed_output_dir, + title_with_date, + self.file_extension) + local file_mode = lfs.attributes(news_file_path, "mode") + if file_mode == "file" then + logger.dbg("NewsDownloader:", news_file_path, "already exists. Skipping") + else + logger.dbg("NewsDownloader: News file will be stored to :", news_file_path) + local article_message = T(_("%1\n%2"), message, title_with_date) + local footer = _("This is just a description of the feed. To download the full article instead, go to the News Downloader settings and change 'download_full_article' to 'true'.") - callback(message) - end) + local html = string.format([[ + +%s +

%s

%s
+
+ +]], feed.title, feed.title, content, footer) + local link = getFeedLink(feed.link) + DownloadBackend:createEpub(news_file_path, html, link, include_images, article_message) + end end function NewsDownloader:removeNewsButKeepFeedConfig() @@ -436,7 +591,7 @@ function NewsDownloader:removeNewsButKeepFeedConfig() end end UIManager:show(InfoMessage:new{ - text = _("All downloaded news feed items deleted. To download these again in the future, reset the feed history.") + text = _("All downloaded news feed items deleted.") }) end @@ -457,10 +612,11 @@ function NewsDownloader:setCustomDownloadDirectory() end function NewsDownloader:viewFeedList() + local UI = require("ui/trapper") + UI:info(_("Loading news feed list…")) -- Protected call to see if feed config path returns a file that can be opened. local ok, feed_config = pcall(dofile, self.feed_config_path) if not ok or not feed_config then - local UI = require("ui/trapper") local change_feed_config = UI:confirm( _("Could not open feed list. Feeds configuration file is invalid."), _("Close"), @@ -471,6 +627,15 @@ function NewsDownloader:viewFeedList() end return end + UI:clear() + -- See if the config file contains any feed items + if #feed_config <= 0 then + logger.err("NewsDownloader: empty feed list.", self.feed_config_path) + -- Why not ask the user if they want to add one? + -- Or, in future, move along to our list UI with an entry for new feeds + + --return + end local view_content = FeedView:getList( feed_config, @@ -482,25 +647,8 @@ function NewsDownloader:viewFeedList() function(id, edit_key, value) self:editFeedAttribute(id, edit_key, value) end, - function(id, action) - if action == FeedView.ACTION_DELETE_FEED then - self:deleteFeed(id) - elseif action == FeedView.ACTION_RESET_HISTORY then - local Trapper = require("ui/trapper") - Trapper:wrap(function() - local should_reset = Trapper:confirm( - _("Are you sure you want to reset the feed history? Proceeding will cause items to be re-downloaded next time you sync."), - _("Cancel"), - _("Reset") - ) - if should_reset then - self:resetFeedHistory(id) - Trapper:reset() - else - Trapper:reset() - end - end) - end + function(id) + self:deleteFeed(id) end ) -- Add a "Add new feed" button with callback @@ -556,15 +704,10 @@ end function NewsDownloader:editFeedAttribute(id, key, value) local kv = self.kv - -- This block determines what kind of UI to produce, or action to run, - -- based on the key value. Some values need an input dialog, others need - -- a Yes/No dialog. - if key == FeedView.RESET_HISTORY then - -- Show a "are you sure" box. - -- Reset the history - self.history:removeTableItem(value, 1) - self.history:flush() - elseif key == FeedView.URL + -- There are basically two types of values: string (incl. numbers) + -- and booleans. This block chooses what type of value our + -- attribute will need and displays the corresponding dialog. + if key == FeedView.URL or key == FeedView.LIMIT or key == FeedView.FILTER_ELEMENT then @@ -625,8 +768,6 @@ function NewsDownloader:editFeedAttribute(id, key, value) text = _("Include images?") elseif key == FeedView.ENABLE_FILTER then text = _("Enable CSS filter?") - elseif key == FeedView.VOLUMIZE then - text = _("Volumize feed?") end local multi_box @@ -670,7 +811,6 @@ function NewsDownloader:updateFeedConfig(id, key, value) end local ok, feed_config = pcall(dofile, self.feed_config_path) - if not ok or not feed_config then UI:info(T(_("Invalid configuration file. Detailed error message:\n%1"), feed_config)) return @@ -679,6 +819,7 @@ function NewsDownloader:updateFeedConfig(id, key, value) if #feed_config <= 0 then logger.dbg("NewsDownloader: empty feed list.", self.feed_config_path) end + -- Check to see if the id is larger than the number of feeds. If it is, -- then we know this is a new add. Insert the base array. if id > #feed_config then @@ -712,17 +853,65 @@ function NewsDownloader:updateFeedConfig(id, key, value) ) end elseif key == FeedView.LIMIT then - feed.limit = value + if feed.limit then + feed.limit = value + else + table.insert( + feed, + { + "limit", + value + } + ) + end elseif key == FeedView.DOWNLOAD_FULL_ARTICLE then - feed.download_full_article = value + if feed.download_full_article ~= nil then + feed.download_full_article = value + else + table.insert( + feed, + { + "download_full_article", + value + } + ) + end elseif key == FeedView.INCLUDE_IMAGES then - feed.include_images = value + if feed.include_images ~= nil then + feed.include_images = value + else + table.insert( + feed, + { + "include_images", + value + } + ) + end elseif key == FeedView.ENABLE_FILTER then - feed.enable_filter = value + if feed.enable_filter ~= nil then + feed.enable_filter = value + else + table.insert( + feed, + { + "enable_filter", + value + } + ) + end elseif key == FeedView.FILTER_ELEMENT then - feed.filter_element = value - elseif key == FeedView.VOLUMIZE then - feed.volumize = value + if feed.filter_element then + feed.filter_element = value + else + table.insert( + feed, + { + "filter_element", + value + } + ) + end end end -- Now we insert the updated (or newly created) feed into the @@ -741,31 +930,12 @@ function NewsDownloader:updateFeedConfig(id, key, value) new_config[id], function(cb_id, cb_edit_key, cb_value) self:editFeedAttribute(cb_id, cb_edit_key, cb_value) - end, - function(feed_id, action) - if action == FeedView.ACTION_DELETE_FEED then - self:deleteFeed(feed_id) - elseif action == FeedView.ACTION_RESET_HISTORY then - local Trapper = require("ui/trapper") - Trapper:wrap(function() - local should_reset = Trapper:confirm( - _("Are you sure you want to reset the feed history? Proceeding will cause items to be re-downloaded next time you sync."), - _("Cancel"), - _("Reset") - ) - if should_reset then - self:resetFeedHistory(id) - Trapper:reset() - else - Trapper:reset() - end - end) - end end ) self:viewFeedItem( feed_item_vc ) + end function NewsDownloader:deleteFeed(id) @@ -773,7 +943,6 @@ function NewsDownloader:deleteFeed(id) logger.dbg("Newsdownloader: attempting to delete feed") -- Check to see if we can get the config file. local ok, feed_config = pcall(dofile, self.feed_config_path) - if not ok or not feed_config then UI:info(T(_("Invalid configuration file. Detailed error message:\n%1"), feed_config)) return @@ -783,7 +952,6 @@ function NewsDownloader:deleteFeed(id) -- and key (i.e.: the key that triggered this function. -- If we are at the right spot, we overrite (or create) the value local new_config = {} - for idx, feed in ipairs(feed_config) do -- Check to see if this is the correct feed to update. if idx ~= id then @@ -795,7 +963,6 @@ function NewsDownloader:deleteFeed(id) end -- Save the config local Trapper = require("ui/trapper") - Trapper:wrap(function() logger.dbg("NewsDownloader: config to save", new_config) self:saveConfig(new_config) @@ -804,14 +971,6 @@ function NewsDownloader:deleteFeed(id) self:viewFeedList() end -function NewsDownloader:resetFeedHistory(url) - logger.dbg("Newsdownloader: attempting to reset feed history") - self.history:saveSetting(url, {}) - self.history:flush() - -- Refresh the view - self:viewFeedList() -end - function NewsDownloader:saveConfig(config) local UI = require("ui/trapper") UI:info(_("Saving news feed list…")) @@ -827,9 +986,6 @@ function NewsDownloader:saveConfig(config) UI:reset() end --- This function opens an input dialog that lets the user --- manually change their feed config. This function is called --- when there is an error with the parsing. function NewsDownloader:changeFeedConfig() local feed_config_file = io.open(self.feed_config_path, "rb") local config = feed_config_file:read("*all") @@ -872,7 +1028,6 @@ function NewsDownloader:changeFeedConfig() UIManager:show(config_editor) config_editor:onShowKeyboard() end - function NewsDownloader:openDownloadsFolder() local FileManager = require("apps/filemanager/filemanager") if self.ui.document then @@ -897,4 +1052,38 @@ function NewsDownloader:onCloseDocument() end end +-- +-- KeyValuePage doesn't like to get a table with sub tables. +-- This function flattens an array, moving all nested tables +-- up the food chain, so to speak +-- +function NewsDownloader:flattenArray(base_array, source_array) + for key, value in pairs(source_array) do + if value[2] == nil then + -- If the value is empty, then it's probably supposed to be a line + table.insert( + base_array, + "---" + ) + else + if value["callback"] then + table.insert( + base_array, + { + value[1], value[2], callback = value["callback"] + } + ) + else + table.insert( + base_array, + { + value[1], value[2] + } + ) + end + end + end + return base_array +end + return NewsDownloader