2017-04-19 15:59:45 +00:00
local DataStorage = require ( " datastorage " )
2017-10-27 21:00:49 +00:00
local ReadHistory = require ( " readhistory " )
2017-04-19 15:59:45 +00:00
local FFIUtil = require ( " ffi/util " )
2017-08-11 14:28:25 +00:00
local InfoMessage = require ( " ui/widget/infomessage " )
local LuaSettings = require ( " frontend/luasettings " )
local UIManager = require ( " ui/uimanager " )
2017-10-22 07:11:23 +00:00
local NetworkMgr = require ( " ui/network/manager " )
2017-08-11 14:28:25 +00:00
local WidgetContainer = require ( " ui/widget/container/widgetcontainer " )
2017-04-19 15:59:45 +00:00
local ffi = require ( " ffi " )
local http = require ( " socket.http " )
2017-08-11 14:28:25 +00:00
local https = require ( " ssl.https " )
local logger = require ( " logger " )
2017-04-19 15:59:45 +00:00
local ltn12 = require ( " ltn12 " )
2017-08-11 14:28:25 +00:00
local socket_url = require ( " socket.url " )
local util = require ( " util " )
local _ = require ( " gettext " )
local T = FFIUtil.template
2017-04-19 15:59:45 +00:00
local NewsDownloader = WidgetContainer : new { }
2017-05-14 08:46:12 +00:00
local initialized = false
2017-10-28 15:53:37 +00:00
local wifi_enabled_before_action = true
2017-05-08 20:15:42 +00:00
local feed_config_file = " feed_config.lua "
2017-05-14 08:46:12 +00:00
local news_downloader_config_file = " news_downloader_settings.lua "
local config_key_custom_dl_dir = " custom_dl_dir " ;
2017-05-08 20:15:42 +00:00
local file_extension = " .html "
local news_download_dir_name = " news "
local news_download_dir_path , feed_config_path
2017-04-19 15:59:45 +00:00
2017-08-11 14:28:25 +00:00
-- if a title looks like <title>blabla</title> it'll just be feed.title
-- if a title looks like <title attr="alb">blabla</title> then we get a table
-- where [1] is the title string and the attributes are also available
local function getFeedTitle ( possible_title )
if type ( possible_title ) == " string " then
return possible_title
elseif possible_title [ 1 ] and type ( possible_title [ 1 ] ) == " string " then
return possible_title [ 1 ]
end
end
-- there can be multiple links
-- for now we just assume the first link is probably the right one
-- @todo write unit tests
-- some feeds that can be used for unit test
-- http://fransdejonge.com/feed/ for multiple links
-- https://github.com/koreader/koreader/commits/master.atom for single link with attributes
local function getFeedLink ( possible_link )
local E = { }
if type ( possible_link ) == " string " then
return possible_link
elseif ( possible_link._attr or E ) . href then
return possible_link._attr . href
elseif ( ( possible_link [ 1 ] or E ) . _attr or E ) . href then
return possible_link [ 1 ] . _attr.href
end
end
2017-04-19 15:59:45 +00:00
2017-10-28 15:53:37 +00:00
-- TODO: implement as NetworkMgr:afterWifiAction with configuration options
function NewsDownloader : afterWifiAction ( )
if not wifi_enabled_before_action then
NetworkMgr : promptWifiOff ( )
end
end
2017-04-19 15:59:45 +00:00
function NewsDownloader : init ( )
self.ui . menu : registerToMainMenu ( self )
end
2018-01-17 17:16:11 +00:00
2017-04-19 15:59:45 +00:00
function NewsDownloader : addToMainMenu ( menu_items )
2017-05-14 08:46:12 +00:00
self : lazyInitialization ( )
2017-05-08 20:15:42 +00:00
menu_items.news_downloader = {
2017-04-19 15:59:45 +00:00
text = _ ( " News (RSS/Atom) downloader " ) ,
sub_item_table = {
{
text = _ ( " Download news " ) ,
2018-01-17 17:16:11 +00:00
callback = function ( )
if not NetworkMgr : isOnline ( ) then
wifi_enabled_before_action = false
NetworkMgr : beforeWifiAction ( self.loadConfigAndProcessFeeds )
else
self : loadConfigAndProcessFeeds ( )
end
end ,
2017-04-19 15:59:45 +00:00
} ,
{
text = _ ( " Go to news folder " ) ,
callback = function ( )
local FileManager = require ( " apps/filemanager/filemanager " )
if FileManager.instance then
2017-05-08 20:15:42 +00:00
FileManager.instance : reinit ( news_download_dir_path )
2017-04-19 15:59:45 +00:00
else
2017-05-08 20:15:42 +00:00
FileManager : showFiles ( news_download_dir_path )
2017-04-19 15:59:45 +00:00
end
end ,
} ,
{
text = _ ( " Remove news " ) ,
2017-05-14 08:46:12 +00:00
callback = function ( ) self : removeNewsButKeepFeedConfig ( ) end ,
} ,
{
text = _ ( " Set custom download directory " ) ,
callback = function ( ) self : setCustomDownloadDirectory ( ) end ,
2017-04-19 15:59:45 +00:00
} ,
2017-06-29 18:40:42 +00:00
{
text = _ ( " Settings " ) ,
2018-01-17 17:16:11 +00:00
sub_item_table = {
{
text = _ ( " Change feeds configuration " ) ,
callback = function ( )
UIManager : show ( InfoMessage : new {
text = T ( _ ( " To change feed (Atom/RSS) sources please manually edit the configuration file: \n %1 \n \n It is very simple and contains comments as well as sample configuration. " ) ,
feed_config_path )
} )
end ,
} ,
} ,
2017-06-29 18:40:42 +00:00
} ,
2017-04-19 15:59:45 +00:00
{
text = _ ( " Help " ) ,
callback = function ( )
UIManager : show ( InfoMessage : new {
2017-06-29 18:40:42 +00:00
text = T ( _ ( " News downloader retrieves RSS and Atom news entries and stores them to: \n %1 \n \n Each entry is a separate html file, that can be browsed by KOReader file manager. \n Items download limit can be configured in Settings. " ) ,
2017-05-08 20:15:42 +00:00
news_download_dir_path )
2017-04-19 15:59:45 +00:00
} )
end ,
} ,
} ,
}
end
2017-05-14 08:46:12 +00:00
function NewsDownloader : lazyInitialization ( )
if not initialized then
logger.dbg ( " NewsDownloader: obtaining news folder " )
local news_downloader_settings = LuaSettings : open ( ( " %s/%s " ) : format ( DataStorage : getSettingsDir ( ) , news_downloader_config_file ) )
if news_downloader_settings : has ( config_key_custom_dl_dir ) then
news_download_dir_path = news_downloader_settings : readSetting ( config_key_custom_dl_dir )
else
2017-10-27 21:00:49 +00:00
news_download_dir_path = ( " %s/%s/ " ) : format ( DataStorage : getFullDataDir ( ) , news_download_dir_name )
2017-05-14 08:46:12 +00:00
end
if not lfs.attributes ( news_download_dir_path , " mode " ) then
logger.dbg ( " NewsDownloader: Creating initial directory " )
lfs.mkdir ( news_download_dir_path )
end
feed_config_path = news_download_dir_path .. feed_config_file
if not lfs.attributes ( feed_config_path , " mode " ) then
logger.dbg ( " NewsDownloader: Creating initial feed config. " )
FFIUtil.copyFile ( FFIUtil.joinPath ( self.path , feed_config_file ) ,
feed_config_path )
end
initialized = true
end
end
2017-04-19 15:59:45 +00:00
function NewsDownloader : loadConfigAndProcessFeeds ( )
local info = InfoMessage : new { text = _ ( " Loading news feed config… " ) }
UIManager : show ( info )
2017-05-14 08:46:12 +00:00
logger.dbg ( " force repaint due to upcoming blocking calls " )
2017-04-19 15:59:45 +00:00
UIManager : forceRePaint ( )
UIManager : close ( info )
2017-05-08 20:15:42 +00:00
local ok , feed_config = pcall ( dofile , feed_config_path )
2017-04-19 15:59:45 +00:00
if not ok or not feed_config then
2017-05-14 08:46:12 +00:00
logger.error ( " NewsDownloader: Feed config not found. " )
2017-04-19 15:59:45 +00:00
return
end
if # feed_config <= 0 then
2017-05-14 08:46:12 +00:00
logger.error ( ' NewsDownloader: empty feed list. ' , feed_config_path )
2017-04-19 15:59:45 +00:00
return
end
2017-05-08 20:15:42 +00:00
local unsupported_feeds_urls = { }
2017-11-05 21:04:40 +00:00
local total_feed_entries = table.getn ( feed_config )
2017-04-19 15:59:45 +00:00
for idx , feed in ipairs ( feed_config ) do
local url = feed [ 1 ]
local limit = feed.limit
2017-10-29 21:23:08 +00:00
local download_full_article = feed.download_full_article == nil or feed.download_full_article
2017-04-19 15:59:45 +00:00
if url and limit then
2017-11-05 21:04:40 +00:00
info = InfoMessage : new { text = T ( _ ( " Processing %1/%2: \n %3 " ) , idx , total_feed_entries , url ) }
2017-04-19 15:59:45 +00:00
UIManager : show ( info )
-- processFeedSource is a blocking call, so manually force a UI refresh beforehand
UIManager : forceRePaint ( )
2018-01-17 17:16:11 +00:00
NewsDownloader : processFeedSource ( url , tonumber ( limit ) , unsupported_feeds_urls , download_full_article )
2017-04-19 15:59:45 +00:00
UIManager : close ( info )
else
logger.warn ( ' NewsDownloader: invalid feed config entry ' , feed )
end
end
2017-05-08 20:15:42 +00:00
if # unsupported_feeds_urls <= 0 then
UIManager : show ( InfoMessage : new {
text = _ ( " Downloading news finished. " ) ,
timeout = 1 ,
} )
else
local unsupported_urls = " "
for k , url in pairs ( unsupported_feeds_urls ) do
unsupported_urls = unsupported_urls .. url
if k ~= # unsupported_feeds_urls then
unsupported_urls = unsupported_urls .. " , "
end
end
UIManager : show ( InfoMessage : new {
text = T ( _ ( " Downloading news finished. Could not process some feeds. Unsupported format in: %1 " ) , unsupported_urls )
} )
end
2017-10-28 15:53:37 +00:00
NewsDownloader : afterWifiAction ( )
2017-04-19 15:59:45 +00:00
end
2017-10-26 20:44:03 +00:00
function NewsDownloader : processFeedSource ( url , limit , unsupported_feeds_urls , download_full_article )
2017-04-19 15:59:45 +00:00
local resp_lines = { }
2017-08-11 14:28:25 +00:00
local parsed = socket_url.parse ( url )
local httpRequest = parsed.scheme == ' http ' and http.request or https.request
httpRequest ( { url = url , sink = ltn12.sink . table ( resp_lines ) , } )
2017-05-14 08:46:12 +00:00
local feeds = self : deserializeXMLString ( table.concat ( resp_lines ) )
2017-08-11 14:28:25 +00:00
2017-05-08 20:15:42 +00:00
if not feeds then
table.insert ( unsupported_feeds_urls , url )
return
end
local is_rss = feeds.rss and feeds.rss . channel and feeds.rss . channel.title and feeds.rss . channel.item and feeds.rss . channel.item [ 1 ] and feeds.rss . channel.item [ 1 ] . title and feeds.rss . channel.item [ 1 ] . link
2017-08-11 14:28:25 +00:00
local is_atom = feeds.feed and feeds.feed . title and feeds.feed . entry [ 1 ] and feeds.feed . entry [ 1 ] . title and feeds.feed . entry [ 1 ] . link
2017-05-08 20:15:42 +00:00
if is_atom then
2017-10-26 20:44:03 +00:00
self : processAtom ( feeds , limit , download_full_article )
2017-05-08 20:15:42 +00:00
elseif is_rss then
2017-10-26 20:44:03 +00:00
self : processRSS ( feeds , limit , download_full_article )
2017-05-08 20:15:42 +00:00
else
table.insert ( unsupported_feeds_urls , url )
2017-04-19 15:59:45 +00:00
return
end
2017-05-08 20:15:42 +00:00
end
2017-05-14 08:46:12 +00:00
function NewsDownloader : deserializeXMLString ( xml_str )
-- uses LuaXML https://github.com/manoelcampos/LuaXML
-- The MIT License (MIT)
-- Copyright (c) 2016 Manoel Campos da Silva Filho
local treehdl = require ( " lib/handler " )
local libxml = require ( " lib/xml " )
--Instantiate the object the states the XML file as a Lua table
local xmlhandler = treehdl.simpleTreeHandler ( )
--Instantiate the object that parses the XML to a Lua table
local ok = pcall ( function ( )
libxml.xmlParser ( xmlhandler ) : parse ( xml_str )
end )
if not ok then return end
return xmlhandler.root
end
2017-10-26 20:44:03 +00:00
function NewsDownloader : processAtom ( feeds , limit , download_full_article )
2017-05-08 20:15:42 +00:00
local feed_output_dir = string.format ( " %s%s/ " ,
news_download_dir_path ,
2017-08-11 14:28:25 +00:00
util.replaceInvalidChars ( getFeedTitle ( feeds.feed . title ) ) )
2017-05-08 20:15:42 +00:00
if not lfs.attributes ( feed_output_dir , " mode " ) then
lfs.mkdir ( feed_output_dir )
end
for index , feed in pairs ( feeds.feed . entry ) do
2017-08-11 14:28:25 +00:00
if limit ~= 0 and index - 1 == limit then
2017-05-08 20:15:42 +00:00
break
end
2017-10-26 20:44:03 +00:00
if download_full_article then
self : downloadFeed ( feed , feed_output_dir )
else
self : createFromDescription ( feed , feed.context , feed_output_dir )
end
2017-05-08 20:15:42 +00:00
end
end
2017-04-19 15:59:45 +00:00
2017-10-26 20:44:03 +00:00
function NewsDownloader : processRSS ( feeds , limit , download_full_article )
2017-04-19 15:59:45 +00:00
local feed_output_dir = ( " %s%s/ " ) : format (
2017-05-08 20:15:42 +00:00
news_download_dir_path , util.replaceInvalidChars ( feeds.rss . channel.title ) )
2017-04-19 15:59:45 +00:00
if not lfs.attributes ( feed_output_dir , " mode " ) then
lfs.mkdir ( feed_output_dir )
end
for index , feed in pairs ( feeds.rss . channel.item ) do
2017-08-11 14:28:25 +00:00
if limit ~= 0 and index - 1 == limit then
2017-04-19 15:59:45 +00:00
break
end
2017-10-26 20:44:03 +00:00
if download_full_article then
self : downloadFeed ( feed , feed_output_dir )
else
self : createFromDescription ( feed , feed.description , feed_output_dir )
end
2017-04-19 15:59:45 +00:00
end
end
2017-05-08 20:15:42 +00:00
function NewsDownloader : downloadFeed ( feed , feed_output_dir )
2017-08-11 14:28:25 +00:00
local link = getFeedLink ( feed.link )
2017-05-08 20:15:42 +00:00
local news_dl_path = ( " %s%s%s " ) : format ( feed_output_dir ,
2017-08-11 14:28:25 +00:00
util.replaceInvalidChars ( getFeedTitle ( feed.title ) ) ,
2017-05-08 20:15:42 +00:00
file_extension )
logger.dbg ( " NewsDownloader: News file will be stored to : " , news_dl_path )
2017-08-11 14:28:25 +00:00
local parsed = socket_url.parse ( link )
local httpRequest = parsed.scheme == ' http ' and http.request or https.request
httpRequest ( { url = link , sink = ltn12.sink . file ( io.open ( news_dl_path , ' w ' ) ) , } )
2017-05-08 20:15:42 +00:00
end
2017-10-26 20:44:03 +00:00
function NewsDownloader : createFromDescription ( feed , context , feed_output_dir )
local news_file_path = ( " %s%s%s " ) : format ( feed_output_dir ,
util.replaceInvalidChars ( getFeedTitle ( feed.title ) ) ,
file_extension )
logger.dbg ( " NewsDownloader: News file will be created : " , news_file_path )
local file = io.open ( news_file_path , " w " )
local footer = _ ( " This is just description of the feed. To download full article go to News Downloader settings and change 'download_full_article' to 'true' " )
local html = string.format ( [ [ < ! DOCTYPE html >
< html >
< head >< meta charset = ' UTF-8 ' >< title >% s </ title ></ head >
< body >< header >< h2 >% s </ h2 ></ header >< article >% s </ article >
< br >< footer >< small >% s </ small ></ footer >
</ body >
</ html > ] ] , feed.title , feed.title , context , footer )
file : write ( html )
file : close ( )
end
2017-05-14 08:46:12 +00:00
function NewsDownloader : removeNewsButKeepFeedConfig ( )
logger.dbg ( " NewsDownloader: Removing news from : " , news_download_dir_path )
for entry in lfs.dir ( news_download_dir_path ) do
if entry ~= " . " and entry ~= " .. " and entry ~= feed_config_file then
local entry_path = news_download_dir_path .. " / " .. entry
local entry_mode = lfs.attributes ( entry_path , " mode " )
if entry_mode == " file " then
ffi.C . remove ( entry_path )
elseif entry_mode == " directory " then
FFIUtil.purgeDir ( entry_path )
end
end
end
UIManager : show ( InfoMessage : new {
text = _ ( " All news removed. " )
} )
end
function NewsDownloader : setCustomDownloadDirectory ( )
UIManager : show ( InfoMessage : new {
2017-05-17 20:24:49 +00:00
text = _ ( " To select a folder press down and hold it for 1 second. " )
2017-05-14 08:46:12 +00:00
} )
require ( " ui/downloadmgr " ) : new {
title = _ ( " Choose download directory " ) ,
onConfirm = function ( path )
logger.dbg ( " NewsDownloader: set download directory to: " , path )
local news_downloader_settings = LuaSettings : open ( ( " %s/%s " ) : format ( DataStorage : getSettingsDir ( ) , news_downloader_config_file ) )
news_downloader_settings : saveSetting ( config_key_custom_dl_dir , ( " %s/ " ) : format ( path ) )
news_downloader_settings : flush ( )
2017-05-17 20:24:49 +00:00
2017-05-14 08:46:12 +00:00
logger.dbg ( " NewsDownloader: Coping to new download folder previous feed_config_file from: " , feed_config_path )
FFIUtil.copyFile ( feed_config_path , ( " %s/%s " ) : format ( path , feed_config_file ) )
2017-05-17 20:24:49 +00:00
initialized = false
self : lazyInitialization ( )
2017-05-14 08:46:12 +00:00
end ,
} : chooseDir ( )
end
2017-10-27 21:00:49 +00:00
function NewsDownloader : onCloseDocument ( )
local document_full_path = self.ui . document.file
if document_full_path and news_download_dir_path == string.sub ( document_full_path , 1 , string.len ( news_download_dir_path ) ) then
logger.dbg ( " NewsDownloader: document_full_path: " , document_full_path )
logger.dbg ( " NewsDownloader: news_download_dir_path: " , news_download_dir_path )
logger.dbg ( " NewsDownloader: removing NewsDownloader file from history. " )
ReadHistory : removeItemByPath ( document_full_path )
end
end
2017-04-19 15:59:45 +00:00
return NewsDownloader