local BD = require("ui/bidi") local DownloadBackend = require("epubdownloadbackend") local NewsHelpers = require("http_utilities") local dateparser = require("lib.dateparser") local logger = require("logger") local md5 = require("ffi/sha2").md5 local util = require("util") local _ = require("gettext") local N_ = _.ngettext local FFIUtil = require("ffi/util") local T = FFIUtil.template local FeedSource = { file_extension = ".epub" } function FeedSource:new(o) o = o or {} self.__index = self setmetatable(o, self) return o end function FeedSource:getInitializedFeeds(feed_list, progress_callback, error_callback) local initialized_feeds = {} local unsupported_feeds_urls = {} for idx, feed in ipairs(feed_list) do local url = feed[1] -- Show a UI update progress_callback(T( _("Setting up feed %1 of %2."), idx, url )) -- Initialize the feed local ok, response = pcall(function() return self:initializeDocument( self:fetchDocumentByUrl(url) ) end) -- If the initialization worked, add the feed -- to a list of initialized feeds if ok and response then table.insert(initialized_feeds, { config = feed, document = response, }) else table.insert(unsupported_feeds_urls, { url .. ": " .. response }) end end if #unsupported_feeds_urls > 0 then -- When some errors are present, we get a sour message that includes -- information about the source of the error. local unsupported_urls = "" for key, value in pairs(unsupported_feeds_urls) do -- Create the error message. -- unsupported_urls = unsupported_urls .. " " .. value[1] .. " " .. value[2] unsupported_urls = value[1] .. "\n\n" -- Not sure what this does. if key ~= #unsupported_feeds_urls then unsupported_urls = BD.url(unsupported_urls) .. ", " end end error_callback( T(N_("Could not initialize a feed:\n\n%2\n\nPlease review your feed configuration.", "Could not initialize %1 feeds:\n\n%2\n\nPlease review your feed configurations.", #unsupported_feeds_urls), #unsupported_feeds_urls, unsupported_urls) ) end return initialized_feeds end -- This function contacts the feed website and attempts to get -- the RSS/Atom document with a list of the latest items. function FeedSource:fetchDocumentByUrl(url) local document -- Get the XML document representing the feed local ok, response = pcall(function() local success, content = NewsHelpers:getUrlContent(url) if (success) then return content else error("Failed to download content for url: " .. url, 0) end end) -- Check to see if a response is available to deserialize. if ok then -- Deserialize the XML document into something Lua can use document = NewsHelpers:deserializeXMLString(response) end -- Return the document or any errors that may have occured if ok or document then return document else if not ok then error("(Reason: Failed to download feed document)", 0) else error("(Reason: Error during feed document deserialization)", 0) end end end -- Supply this method with the XML document returned by the feed, -- and it will initialized the document by extracting the feed title, -- feed items, and items count. function FeedSource:initializeDocument(document) local feed_title local feed_items local total_items local ok = pcall(function() return self:getFeedType( document, function() -- RSS callback feed_title = util.htmlEntitiesToUtf8(document.rss.channel.title) feed_items = document.rss.channel.item total_items = #document.rss.channel.item end, function() -- Atom callback feed_title = FeedSource:getFeedTitle(document.feed.title) feed_items = document.feed.entry total_items = #document.feed.entry end ) end) if ok then document.title = feed_title document.items = feed_items document.total_items = total_items return document else error(_("Could not initialize feed document"), 0) end end function FeedSource:getItemsContent(feed, progress_callback, error_callback) local limit = tonumber(feed.config.limit) local total_items = (limit == 0) and feed.document.total_items or limit local initialized_feed_items = {} -- Download each ite0m in the feed for index, item in pairs(feed.document.items) do -- If limit has been met, stop downloading feed. if limit ~= 0 and index - 1 == limit then break end -- Display feedback to user. progress_callback(T( _("%3\n Downloading item %1 of %2"), index, total_items, feed.document.title )) -- Download the article's HTML. local ok, response = pcall(function() return self:initializeItemHtml( feed, self:getItemHtml( item, feed.config.download_full_article ) ) end) -- Add the result to our table, or send a -- result to the error callback. if ok then table.insert(initialized_feed_items, { html = response.html, images = response.images, item_slug = FeedSource:getItemTitleWithDate(item), item_title = item.title, md5 = md5(item.title), feed_title = feed.document.title, }) else error_callback( T(_("Could not get content for: %1"), feed.document.title) ) end end if #initialized_feed_items > 0 then return initialized_feed_items else return nil end end function FeedSource:initializeItemHtml(feed, html) local url = feed.config[1] -- local download_full_article = feed.config.download_full_article ~= false local include_images = feed.config.include_images ~= false local filter_element = feed.config.filter_element or feed.config.filter_element == nil local enable_filter = feed.config.enable_filter ~= false local item_images, item_html = DownloadBackend:getImagesAndHtml( html, url, include_images, enable_filter, filter_element ) return { html = item_html, images = item_images } end function FeedSource:getFeedType(document, rss_cb, atom_cb) -- Check to see if the feed uses RSS. local is_rss = document.rss and document.rss.channel and document.rss.channel.title and document.rss.channel.item and document.rss.channel.item[1] and document.rss.channel.item[1].title and document.rss.channel.item[1].link -- Check to see if the feed uses Atom. local is_atom = document.feed and document.feed.title and document.feed.entry[1] and document.feed.entry[1].title and document.feed.entry[1].link -- Setup the feed values based on feed type if is_atom then return atom_cb() elseif is_rss then return rss_cb() end -- Return the values through our callback, or call an -- error message if the feed wasn't RSS or Atom if not is_rss or not is_atom then local error_message if not is_rss then error_message = _("(Reason: Couldn't process RSS)") elseif not is_atom then error_message = _("(Reason: Couldn't process Atom)") end error(error_message) end end function FeedSource:getItemHtml(item, download_full_article) if download_full_article then return NewsHelpers:loadPage( FeedSource:getFeedLink(item.link) ) else local feed_description = item.description or item.summary local footer = _("This is just a description of the feed. To download the full article instead, go to the News Downloader settings and change 'download_full_article' to 'true'.") return string.format([[ %s

%s

%s

]], item.title, item.title, feed_description, footer) end end -- @todo: move this elsewhere function FeedSource:getEpubOutputDir(download_dir, sub_dir, epub_title) local feed_output_dir = ("%s%s/"):format( download_dir, util.getSafeFilename(util.htmlEntitiesToUtf8(sub_dir))) -- Create the output directory if it doesn't exist. if not lfs.attributes(feed_output_dir, "mode") then lfs.mkdir(feed_output_dir) end local file_name = FeedSource:getFeedTitle(epub_title) return ("%s%s%s"):format( feed_output_dir, file_name, self.file_extension ) end function FeedSource:createEpub(title, chapters, abs_output_path, progress_callback, error_callback) local file_exists = lfs.attributes(abs_output_path, "mode") if file_exists then logger.dbg("NewsDownloader: Skipping. EPUB file already exists", abs_output_path) return true end if #chapters == 0 then error(_("Error: chapters contains 0 items"), 0) end local images = {} for index, chapter in ipairs(chapters) do for jndex, image in ipairs(chapter.images) do table.insert( images, image ) end end local epub = DownloadBackend:new{} progress_callback(T(_("Building EPUB %1"), title)) epub:setTitle(title) epub:addToc(chapters) epub:addManifest(chapters, images) progress_callback(T(_("Building EPUB %1: %2"), title, _("Adding contents"))) epub:addContents(chapters) progress_callback(T(_("Building EPUB %1: %2"), title, _("Adding images"))) epub:addImages(images) progress_callback(T(_("Building EPUB %1: %2"), title, _("Writing EPUB to disk"))) local ok = pcall(function() return epub:build(abs_output_path) end) if ok then if lfs.attributes(abs_output_path, "mode") then return true end end return false end local function parseDate(dateTime) -- Uses lua-feedparser https://github.com/slact/lua-feedparser -- feedparser is available under the (new) BSD license. -- see: koreader/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser local date = dateparser.parse(dateTime) return os.date("%y-%m-%d_%H-%M_", date) end function FeedSource:getFeedTitleWithDate(feed) local title = util.getSafeFilename(FeedSource:getFeedTitle(feed.document.title)) return os.date("%y-%m-%d_%H-%M_") .. title end -- Creates a title with date from a feed item. function FeedSource:getItemTitleWithDate(item) local title = util.getSafeFilename(FeedSource:getFeedTitle(item.title)) if item.updated then title = parseDate(item.updated) .. title elseif item.pubDate then title = parseDate(item.pubDate) .. title elseif item.published then title = parseDate(item.published) .. title end return title end -- If a title looks like blabla it'll just be feed.title. -- If a title looks like blabla then we get a table -- where [1] is the title string and the attributes are also available. function FeedSource:getFeedTitle(possible_title) if type(possible_title) == "string" then return util.htmlEntitiesToUtf8(possible_title) elseif possible_title[1] and type(possible_title[1]) == "string" then return util.htmlEntitiesToUtf8(possible_title[1]) end end -- There can be multiple links. -- For now we just assume the first link is probably the right one. --- @todo Write unit tests. -- Some feeds that can be used for unit test. -- http://fransdejonge.com/feed/ for multiple links. -- https://github.com/koreader/koreader/commits/master.atom for single link with attributes. function FeedSource:getFeedLink(possible_link) local E = {} if type(possible_link) == "string" then return possible_link elseif (possible_link._attr or E).href then return possible_link._attr.href elseif ((possible_link[1] or E)._attr or E).href then return possible_link[1]._attr.href end end return FeedSource