mirror of
https://github.com/koreader/koreader
synced 2024-11-11 19:11:14 +00:00
127 lines
4.9 KiB
Lua
127 lines
4.9 KiB
Lua
|
local logger = require("logger")
|
||
|
local http = require("socket.http")
|
||
|
local socketutil = require("socketutil")
|
||
|
local socket_url = require("socket.url")
|
||
|
local socket = require("socket")
|
||
|
local ltn12 = require("ltn12")
|
||
|
|
||
|
local NewsHelpers = {
|
||
|
}
|
||
|
|
||
|
local max_redirects = 5; --prevent infinite redirects
|
||
|
|
||
|
-- Get URL content
|
||
|
function NewsHelpers:getUrlContent(url, timeout, maxtime, redirectCount)
|
||
|
logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")")
|
||
|
if not redirectCount then
|
||
|
redirectCount = 0
|
||
|
elseif redirectCount == max_redirects then
|
||
|
error("EpubDownloadBackend: reached max redirects: ", redirectCount)
|
||
|
end
|
||
|
|
||
|
if not timeout then timeout = 10 end
|
||
|
logger.dbg("timeout:", timeout)
|
||
|
|
||
|
local sink = {}
|
||
|
local parsed = socket_url.parse(url)
|
||
|
socketutil:set_timeout(timeout, maxtime or 30)
|
||
|
local request = {
|
||
|
url = url,
|
||
|
method = "GET",
|
||
|
sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink),
|
||
|
}
|
||
|
logger.dbg("request:", request)
|
||
|
local code, headers, status = socket.skip(1, http.request(request))
|
||
|
socketutil:reset_timeout()
|
||
|
logger.dbg("After http.request")
|
||
|
local content = table.concat(sink) -- empty or content accumulated till now
|
||
|
logger.dbg("type(code):", type(code))
|
||
|
logger.dbg("code:", code)
|
||
|
logger.dbg("headers:", headers)
|
||
|
logger.dbg("status:", status)
|
||
|
logger.dbg("#content:", #content)
|
||
|
|
||
|
if code == socketutil.TIMEOUT_CODE or
|
||
|
code == socketutil.SSL_HANDSHAKE_CODE or
|
||
|
code == socketutil.SINK_TIMEOUT_CODE
|
||
|
then
|
||
|
logger.warn("request interrupted:", code)
|
||
|
return false, code
|
||
|
end
|
||
|
if headers == nil then
|
||
|
logger.warn("No HTTP headers:", code, status)
|
||
|
return false, "Network or remote server unavailable"
|
||
|
end
|
||
|
if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK
|
||
|
if code and code > 299 and code < 400 and headers and headers.location then -- handle 301, 302...
|
||
|
local redirected_url = headers.location
|
||
|
local parsed_redirect_location = socket_url.parse(redirected_url)
|
||
|
if not parsed_redirect_location.host then
|
||
|
parsed_redirect_location.host = parsed.host
|
||
|
parsed_redirect_location.scheme = parsed.scheme
|
||
|
redirected_url = socket_url.build(parsed_redirect_location)
|
||
|
end
|
||
|
logger.dbg("getUrlContent: Redirecting to url: ", redirected_url)
|
||
|
return self:getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1)
|
||
|
else
|
||
|
-- error("EpubDownloadBackend: Don't know how to handle HTTP response status: " .. status)
|
||
|
-- error("EpubDownloadBackend: Don't know how to handle HTTP response status.")
|
||
|
logger.warn("HTTP status not okay:", code, status)
|
||
|
return false, status
|
||
|
end
|
||
|
end
|
||
|
if headers and headers["content-length"] then
|
||
|
-- Check we really got the announced content size
|
||
|
local content_length = tonumber(headers["content-length"])
|
||
|
if #content ~= content_length then
|
||
|
return false, "Incomplete content received"
|
||
|
end
|
||
|
end
|
||
|
logger.dbg("Returning content ok")
|
||
|
return true, content
|
||
|
end
|
||
|
|
||
|
function NewsHelpers:loadPage(url)
|
||
|
logger.dbg("Load page: ", url)
|
||
|
local success, content
|
||
|
--[[ if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget()
|
||
|
local Trapper = require("ui/trapper")
|
||
|
local timeout, maxtime = 30, 60
|
||
|
-- We use dismissableRunInSubprocess with complex return values:
|
||
|
completed, success, content = Trapper:dismissableRunInSubprocess(function()
|
||
|
return NewsHelpers:getUrlContent(url, timeout, maxtime)
|
||
|
end, self.trap_widget)
|
||
|
if not completed then
|
||
|
error(self.dismissed_error_code) -- "Interrupted by user"
|
||
|
end
|
||
|
else]]--
|
||
|
local timeout, maxtime = 10, 60
|
||
|
success, content = NewsHelpers:getUrlContent(url, timeout, maxtime)
|
||
|
-- end
|
||
|
logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...")
|
||
|
if not success then
|
||
|
error(content)
|
||
|
else
|
||
|
return content
|
||
|
end
|
||
|
end
|
||
|
|
||
|
function NewsHelpers:deserializeXMLString(xml_str)
|
||
|
-- uses LuaXML https://github.com/manoelcampos/LuaXML
|
||
|
-- The MIT License (MIT)
|
||
|
-- Copyright (c) 2016 Manoel Campos da Silva Filho
|
||
|
-- see: koreader/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML
|
||
|
local treehdl = require("lib/handler")
|
||
|
local libxml = require("lib/xml")
|
||
|
-- Instantiate the object that parses the XML file as a Lua table.
|
||
|
local xmlhandler = treehdl.simpleTreeHandler()
|
||
|
-- Instantiate the object that parses the XML to a Lua table.
|
||
|
local ok = pcall(function()
|
||
|
libxml.xmlParser(xmlhandler):parse(xml_str)
|
||
|
end)
|
||
|
if not ok then return end
|
||
|
return xmlhandler.root
|
||
|
end
|
||
|
|
||
|
return NewsHelpers
|