2
0
mirror of https://github.com/koreader/koreader synced 2024-11-13 19:11:25 +00:00
koreader/plugins/newsdownloader.koplugin/http_utilities.lua
Scarlett 8a04dc9852
NewsDownloader: new option to allow EPUB volumization (#8263)
When this feature is enabled on a feed and that feed is
synced, all new feed entries will be collected into a new
single EPUB file. This is achieved by implementing a feed
history feature (downloaded feeds are added as M5D hashes
to a LuaSettings file), and by introducing additional
methods into epubdownloader.lua that allow for multiple
HTML documents to be added into single EPUB file.
2022-01-21 19:23:21 +01:00

127 lines
4.9 KiB
Lua

local logger = require("logger")
local http = require("socket.http")
local socketutil = require("socketutil")
local socket_url = require("socket.url")
local socket = require("socket")
local ltn12 = require("ltn12")
local NewsHelpers = {
}
local max_redirects = 5; --prevent infinite redirects
-- Get URL content
function NewsHelpers:getUrlContent(url, timeout, maxtime, redirectCount)
logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")")
if not redirectCount then
redirectCount = 0
elseif redirectCount == max_redirects then
error("EpubDownloadBackend: reached max redirects: ", redirectCount)
end
if not timeout then timeout = 10 end
logger.dbg("timeout:", timeout)
local sink = {}
local parsed = socket_url.parse(url)
socketutil:set_timeout(timeout, maxtime or 30)
local request = {
url = url,
method = "GET",
sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink),
}
logger.dbg("request:", request)
local code, headers, status = socket.skip(1, http.request(request))
socketutil:reset_timeout()
logger.dbg("After http.request")
local content = table.concat(sink) -- empty or content accumulated till now
logger.dbg("type(code):", type(code))
logger.dbg("code:", code)
logger.dbg("headers:", headers)
logger.dbg("status:", status)
logger.dbg("#content:", #content)
if code == socketutil.TIMEOUT_CODE or
code == socketutil.SSL_HANDSHAKE_CODE or
code == socketutil.SINK_TIMEOUT_CODE
then
logger.warn("request interrupted:", code)
return false, code
end
if headers == nil then
logger.warn("No HTTP headers:", code, status)
return false, "Network or remote server unavailable"
end
if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK
if code and code > 299 and code < 400 and headers and headers.location then -- handle 301, 302...
local redirected_url = headers.location
local parsed_redirect_location = socket_url.parse(redirected_url)
if not parsed_redirect_location.host then
parsed_redirect_location.host = parsed.host
parsed_redirect_location.scheme = parsed.scheme
redirected_url = socket_url.build(parsed_redirect_location)
end
logger.dbg("getUrlContent: Redirecting to url: ", redirected_url)
return self:getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1)
else
-- error("EpubDownloadBackend: Don't know how to handle HTTP response status: " .. status)
-- error("EpubDownloadBackend: Don't know how to handle HTTP response status.")
logger.warn("HTTP status not okay:", code, status)
return false, status
end
end
if headers and headers["content-length"] then
-- Check we really got the announced content size
local content_length = tonumber(headers["content-length"])
if #content ~= content_length then
return false, "Incomplete content received"
end
end
logger.dbg("Returning content ok")
return true, content
end
function NewsHelpers:loadPage(url)
logger.dbg("Load page: ", url)
local success, content
--[[ if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget()
local Trapper = require("ui/trapper")
local timeout, maxtime = 30, 60
-- We use dismissableRunInSubprocess with complex return values:
completed, success, content = Trapper:dismissableRunInSubprocess(function()
return NewsHelpers:getUrlContent(url, timeout, maxtime)
end, self.trap_widget)
if not completed then
error(self.dismissed_error_code) -- "Interrupted by user"
end
else]]--
local timeout, maxtime = 10, 60
success, content = NewsHelpers:getUrlContent(url, timeout, maxtime)
-- end
logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...")
if not success then
error(content)
else
return content
end
end
function NewsHelpers:deserializeXMLString(xml_str)
-- uses LuaXML https://github.com/manoelcampos/LuaXML
-- The MIT License (MIT)
-- Copyright (c) 2016 Manoel Campos da Silva Filho
-- see: koreader/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML
local treehdl = require("lib/handler")
local libxml = require("lib/xml")
-- Instantiate the object that parses the XML file as a Lua table.
local xmlhandler = treehdl.simpleTreeHandler()
-- Instantiate the object that parses the XML to a Lua table.
local ok = pcall(function()
libxml.xmlParser(xmlhandler):parse(xml_str)
end)
if not ok then return end
return xmlhandler.root
end
return NewsHelpers