Roygbyte 2 weeks ago committed by GitHub
commit 036ecca580
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,6 @@
local _ = require("gettext")
return {
name = "downloadtoepub",
fullname = _("Download to EPUB"),
description = _([[Download URLs to an EPUB.]]),
}

@ -0,0 +1,103 @@
local DataStorage = require("datastorage")
local LuaSettings = require("frontend/luasettings")
local logger = require("logger")
local History = {
history_file = "downloadtoepub_history.lua",
lua_settings = nil,
}
History.STACK = "stack"
History.MAX_ITEMS = 100
function History:new(o)
o = o or {}
setmetatable(o, self)
self.__index = self
o:init()
return o
end
function History:init()
self.lua_settings = LuaSettings:open(("%s/%s"):format(DataStorage:getSettingsDir(), self.history_file))
end
function History:add(url, download_path)
-- Add to the history by pushing to the first element of the list.
-- The history stack should only contain one entry of a given ID.
local stack = self:get()
-- Add the new entry to the stack table.
table.insert(stack, {
url = url,
download_path = download_path,
timestamp = os.time(os.date("!*t"))
})
-- Sort the table by the timestamp key.
table.sort(stack, function(a,b) return a.timestamp > b.timestamp end)
-- Delete duplicate entries, given by puzzle id, by looping through
-- the stack and keeping the first occurance (i.e.: newest) of
-- a URL.
local new_stack = {}
local duplicates = {}
local index = 1
for i, value in ipairs(stack) do
if duplicates[value.url] == nil then
duplicates[value.url] = true
table.insert(new_stack, value)
index = index + 1
end
if index > History.MAX_ITEMS then
break;
end
end
-- Save 'er.
self.lua_settings:saveSetting(History.STACK, new_stack)
self.lua_settings:flush()
end
-- Remove all instances of the given URL from history.
function History:remove(url)
local stack = self:get()
local new_stack = {}
for i, value in ipairs(stack) do
logger.dbg(value)
if value.url ~= url then
logger.dbg("lol")
table.insert(new_stack, value)
end
end
self.lua_settings:saveSetting(History.STACK, new_stack)
self.lua_settings:flush()
end
function History:get()
local stack = self.lua_settings:readSetting(History.STACK) or {}
return stack
end
function History:find(v)
local stack = self:get()
local maybe_found = nil
for i, value in ipairs(stack) do
if value.url == v or
value.download_path == v then
maybe_found = value
break;
end
end
return maybe_found
end
function History:save()
end
function History:clear()
self.lua_settings:saveSetting(History.STACK, {})
self.lua_settings:flush()
end
return History

@ -0,0 +1,68 @@
local _ = require("gettext")
local History = require("epubhistory")
local HistoryView = {}
function HistoryView:new(o)
o = o or {}
setmetatable(o, self)
self.__index = self
return o
end
function HistoryView:getLastDownloadButton(load_puzzle_cb)
local history = History:new{}
if #history:get() > 0 then
local history_item = history:get()[1]
return {
text = _(("Last download: \"%s\""):format(history_item['url'])),
callback = function()
load_puzzle_cb(history_item)
end
}
else
return nil
end
end
--[[--
Return a list that can be used to populate a plugin menu.
]]
function HistoryView:getMenuItems(open_epub_cb, clear_history_cb)
local menu_items = {}
local sub_menu_items = {}
-- If the user has started a puzzle, we'll add a new option to the menu.
local history = History:new{}
if #history:get() > 0 then
local history_list = {}
for i, item in ipairs(history:get()) do
table.insert(sub_menu_items, {
text = item['url'],
callback = function()
open_epub_cb(item)
end
})
end
-- Add a clear history button
table.insert(sub_menu_items,
{
text = _("Clear history"),
keep_menu_open = false,
callback = function()
history:clear()
end
}
)
table.insert(menu_items, {
text = _("History"),
sub_item_table = sub_menu_items
})
return menu_items
else
return nil
end
end
return HistoryView

@ -0,0 +1,129 @@
local xml2lua = require("libs/xml2lua/xml2lua")
local Item = require("libs/gazette/epub/package/item")
local Manifest = require("libs/gazette/epub/package/manifest")
local Spine = require("libs/gazette/epub/package/spine")
local Package = {
title = nil,
author = nil,
language = "en",
modified = nil,
manifest = nil,
spine = nil,
}
function Package:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
o.manifest = Manifest:new{}
o.spine = Spine:new{}
o.modified = os.date("%Y-%m-%dT%H:%M:%SZ")
o:setTitle("Default title")
return o
end
Package.extend = Package.new
function Package:setTitle(title)
self.title = title
end
function Package:setAuthor(author)
self.author = author
end
function Package:addItem(item)
local ok, err = self.manifest:addItem(item)
if ok and
item ~= nil
then
self.spine:addItem(item)
self:addItemToNav(item)
end
end
function Package:addItemToNav(item)
if not item or
item.property == Item.PROPERTY.NAV or
item.add_to_nav == false
then
return false
end
local nav = self:getNav()
-- Nav doesn't check to see if content already contained in nav,
-- since it's entirely possible the same content could be linked twice.
-- Why? I dunno, but it's possible.
table.insert(nav.items, item)
return true
end
function Package:getNav()
local nav_index = self.manifest:findItemLocation(function(item)
return item.properties == Item.PROPERTY.NAV
end)
return self.manifest.items[nav_index]
end
function Package:updateNav(item)
local nav_index = self.manifest:findItemLocation(function(item)
return item.properties == Item.PROPERTY.NAV
end)
self.manifest.items[nav_index] = item
return true
end
function Package:getManifestItems()
return self.manifest.items
end
function Package:addToNav()
end
function Package:getPackageXml()
-- TODO: Add error catching/display
local template, err = xml2lua.loadFile("plugins/downloadtoepub.koplugin/libs/gazette/epub/templates/package.xml")
local manifest, err = self.manifest:build()
local spine, err = self.spine:build()
return string.format(
template,
self.title,
self.author,
self.language,
self.modified,
manifest,
spine
)
end
local Epub = Package:extend{
}
function Epub:new(o)
o = Package:new()
self.__index = self
setmetatable(o, self)
return o
end
function Epub:addFromList(iterator)
while true do
local item = iterator()
if type(item) == "table"
then
self:addItem(item)
elseif item == nil
then
break
end
end
end
return Epub

@ -0,0 +1,89 @@
local EpubError = require("libs/gazette/epuberror")
local md5 = require("ffi/sha2").md5
local Item = {
id = nil,
path = nil,
content = nil,
media_type = nil,
properties = nil,
add_to_nav = nil
}
Item.PROPERTY = {
NAV = "nav"
}
Item.TYPE = "default"
function Item:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
return o
end
Item.extend = Item.new
function Item:generateId()
self.id = "a" .. md5(self.path) -- IDs can't start with number
end
function Item:getManifestPart()
if not self.path and
not self.mimetype
then
return false, EpubError:provideFromItem(self)
end
self:generateId()
if self.properties
then
return string.format(
[[<item id="%s" href="%s" media-type="%s" properties="%s"/>]],
self.id,
self.path,
self.media_type,
self.properties
)
else
return string.format(
[[<item id="%s" href="%s" media-type="%s"/>]],
self.id,
self.path,
self.media_type
)
end
end
-- located in a spine factory
function Item:getSpinePart()
return string.format(
[[<itemref idref="%s" />%s]],
self.id,
"\n"
)
end
-- C-y ??
-- this should be located in Nav, or a NavFactorNavFactoryestuestest
function Item:getNavPart()
return string.format(
[[<li><a href="%s">%s</a></li>%s]],
self.path,
self.title,
"\n"
)
end
function Item:getContent()
if type(self.content) == "string"
then
return self.content
else
return false
end
end
return Item

@ -0,0 +1,55 @@
local Item = require("libs/gazette/epub/package/item")
local EpubError = require("libs/gazette/epuberror")
local util = require("util")
local Image = Item:extend {
format = nil,
add_to_nav = false,
}
Image.SUPPORTED_FORMATS = {
jpeg = "image/jpeg",
jpg = "image/jpeg",
png = "image/png",
gif = "image/gif",
svg = "image/svg+xml"
}
function Image:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
if not o.path
then
return false, EpubError.ITEM_MISSING_PATH
end
-- Change "format" to "fileType" or "extension"
local format = o:isFormatSupported(o.path)
if not format
then
return false, EpubError.IMAGE_UNSUPPORTED_FORMAT
end
o.media_type = format
o:generateId()
o.path = o.path
return o
end
function Image:fetchContent(data_source)
end
function Image:isFormatSupported(path)
-- path = path and string.lower(path) or ""
-- local extension = string.match(path, "[^.]+$")
local extension = util.getFileNameSuffix(path)
return Image.SUPPORTED_FORMATS[extension] and
Image.SUPPORTED_FORMATS[extension] or
false
end
return Image

@ -0,0 +1,56 @@
local Item = require("libs/gazette/epub/package/item")
local xml2lua = require("libs/xml2lua/xml2lua")
local _ = require("gettext")
local Nav = Item:extend{
title = nil,
items = nil,
}
function Nav:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
o.title = _("Table of Contents")
o.path = "nav.xhtml"
o.properties = Item.PROPERTY.NAV
o.media_type = "application/xhtml+xml"
o.items = {},
o:generateId()
return o
end
function Nav:setTitle(title)
self.title = title
end
function Nav:addItem(item)
-- insert item, yes, but reference it by it's id...
table.insert(self.items, item)
end
function Nav:getContent()
-- TODO: Add error catching/display
local template, err = xml2lua.loadFile("plugins/downloadtoepub.koplugin/libs/gazette/epub/templates/nav.xhtml")
local items_list = "\n"
for _, item in ipairs(self.items) do
local part = item:getNavPart()
if part
then
items_list = items_list .. part
end
end
template = string.format(
template,
self.title,
items_list
)
return template
end
return Nav

@ -0,0 +1,32 @@
local EpubError = require("libs/gazette/epuberror")
local Item = require("libs/gazette/epub/package/item")
local util = require("util")
local XHtmlItem = Item:extend {
title = "Untitled Document",
add_to_nav = true
}
XHtmlItem.SUPPORTED_FORMATS = {
xhtml = true,
html = true
}
function XHtmlItem:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
if not o.path
then
return false, EpubError.ITEM_MISSING_PATH
end
o.path = util.urlEncode(o.path)
o.media_type = "application/xhtml+xml"
o:generateId()
return o
end
return XHtmlItem

@ -0,0 +1,77 @@
local EpubError = require("libs/gazette/epuberror")
local xml2lua = require("libs/xml2lua/xml2lua")
local Nav = require("libs/gazette/epub/package/item/nav")
local Manifest = {
items = nil,
nav = nil,
}
function Manifest:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
o.items = {}
local nav = Nav:new{}
o:addItem(nav)
return o
end
function Manifest:addItem(item)
if item == nil
then
return false, EpubError.MANIFEST_ITEM_NIL
end
if not self:isItemIncluded(item)
then
table.insert(self.items, item)
return true
else
return false, EpubError.MANIFEST_ITEM_ALREADY_EXISTS
end
end
function Manifest:isItemIncluded(item)
return self:findItemLocation(function(existing_item)
return existing_item.id == item.id
end)
end
function Manifest:findItemLocationByProperties(properties)
return self:findItemLocation(function(existing_item)
if existing_item.properties and
existing_item.properties == properties
then
return true
end
return false
end)
end
function Manifest:findItemLocation(predicate)
for index, item in ipairs(self.items) do
if predicate(item) == true
then
return index
end
end
return false
end
function Manifest:build()
local items_xml = "\n"
for index, item in ipairs(self.items) do
local part, err = item:getManifestPart()
if not part
then
return false, EpubError.MANIFEST_BUILD_ERROR
end
items_xml = items_xml .. part .. "\n"
end
return items_xml
end
return Manifest

@ -0,0 +1,32 @@
local Spine = {
items = nil,
}
function Spine:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
o.items = {}
return o
end
function Spine:addItem(item)
table.insert(self.items, item)
end
function Spine:build()
local xml = ""
for _, item in ipairs(self.items) do
local part, err = item:getSpinePart()
if not part
then
return false, EpubError.SPINE_BUILD_ERROR
end
xml = xml .. part
end
return xml
end
return Spine

@ -0,0 +1,6 @@
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="OPS/package.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>

@ -0,0 +1,12 @@
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en">
<head>
<title>%s</title>
</head>
<body>
<nav epub:type="toc">
<ol>
%s
</ol>
</nav>
</body>
</html>

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title id="title">%s</dc:title>
<dc:creator>%s</dc:creator>
<dc:language>%s</dc:language>
<dc:identifier id="q">NOID</dc:identifier>
<meta property="dcterms:modified">%s</meta>
</metadata>
<manifest>
%s
</manifest>
<spine>
%s
</spine>
</package>

@ -0,0 +1,104 @@
local EpubError = require("libs/gazette/epuberror")
local ZipWriter = require("ffi/zipwriter")
local xml2lua = require("libs/xml2lua/xml2lua")
local Epub32Writer = ZipWriter:new {
path = nil,
temp_path = nil,
}
function Epub32Writer:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
return o
end
function Epub32Writer:build(epub)
local ok, err = self:openTempPath()
if not ok
then
return false, EpubError.EPUBWRITER_INVALID_PATH
end
self:addMimetype()
self:addContainer()
self:addPackage(epub:getPackageXml())
self:addItems(epub:getManifestItems())
self:close()
os.rename(self.temp_path, self.path)
return true
end
function Epub32Writer:setPath(path)
local ok, err = self:isOutputAvailable(path)
if not ok
then
return false, err
else
self.path = path
return true
end
end
function Epub32Writer:addMimetype()
self:add("mimetype", "application/epub+zip")
end
function Epub32Writer:addContainer()
local container = Epub32Writer:getPart("container.xml")
self:add("META-INF/container.xml", container)
end
function Epub32Writer:addPackage(packagio)
self:add("OPS/package.opf", packagio)
end
function Epub32Writer:addItems(items)
for _, item in ipairs(items) do
local content = item:getContent()
if content
then
self:add("OPS/" .. item.path, content)
end
end
end
function Epub32Writer:openTempPath()
self.temp_path = self.path .. ".tmp"
if not self:open(self.temp_path)
then
return false, EpubError.EPUBWRITER_INVALID_PATH
else
return true
end
end
function Epub32Writer:isOutputAvailable(path)
local test_path = path
if not self:open(test_path)
then
return false, EpubError.EPUBWRITER_INVALID_PATH
else
self:close()
os.remove(test_path)
return true
end
end
function Epub32Writer:getPart(filename)
local file, err = xml2lua.loadFile("plugins/downloadtoepub.koplugin/libs/gazette/epub/templates/" .. filename)
if file
then
return file
else
return false, err
end
end
return Epub32Writer

@ -0,0 +1,34 @@
local Epub32Writer = require("libs/gazette/epub32writer")
local EpubBuildDirector = {
writer = nil,
epub = nil,
}
function EpubBuildDirector:new(writer)
if not writer then
local defaultWriter, err = Epub32Writer:new{}
if not defaultWriter then
return false, err
end
self.writer = defaultWriter
else
self.writer = writer
end
return self
end
function EpubBuildDirector:setDestination(path)
return self.writer:setPath(path)
end
function EpubBuildDirector:construct(epub)
local ok, err = self.writer:build(epub)
if ok then
return self.writer.path
else
return false, err
end
end
return EpubBuildDirector

@ -0,0 +1,37 @@
local _ = require("gettext")
local T = require("ffi/util").template
local EpubError = {
EPUB_INVALID_CONTENTS = _("Contents invalid"),
EPUBWRITER_INVALID_PATH = _("The path couldn't be opened."),
ITEMFACTORY_UNSUPPORTED_TYPE = _("Item type is not supported."),
ITEMFACTORY_NONEXISTENT_CONSTRUCTOR = _("Item type is supported but ItemFactory doesn't have a constructor for it."),
RESOURCE_WEBPAGE_INVALID_URL = _(""),
ITEM_MISSNG_ID = _("Item missing id"),
ITEM_MISSING_MEDIA_TYPE = _("Item missing media type"),
ITEM_MISSING_PATH = _("Item missing path"),
ITEM_NONSPECIFIC_ERROR = _("Something's wrong with your item. That's all I know"),
IMAGE_UNSUPPORTED_FORMAT = _("Image format is not supported."),
MANIFEST_BUILD_ERROR = _("Could not build manifest part for item."),
MANIFEST_ITEM_ALREADY_EXISTS = _("Item already exists in manifest"),
MANIFEST_ITEM_NIL = _("Can't add a nil item to the manifest."),
SPINE_BUILD_ERROR = _("Could not build spine part for item."),
}
function EpubError:provideFromEpubWriter(epubwriter)
end
function EpubError:provideFromItem(item)
if not item.media_type
then
return EpubError.ITEM_MISSING_MEDIA_TYPE
elseif not item.path
then
return EpubError.ITEM_MISSING_PATH
else
return EpubError.ITEM_NONSPECIFIC_ERROR
end
end
return EpubError

@ -0,0 +1,78 @@
local EpubError = require("libs/gazette/epuberror")
local XHtmlItem = require("libs/gazette/epub/package/item/xhtmlitem")
local Image = require("libs/gazette/epub/package/item/image")
local util = require("util")
local ItemFactory = {
}
ItemFactory.ITEM_TYPES = {
xhtml = XHtmlItem.SUPPORTED_FORMATS,
image = Image.SUPPORTED_FORMATS
}
ItemFactory.ITEM_CONSTRUCTORS = {
xhtml = function(path, content)
return XHtmlItem:new{
path = path,
content = content,
}
end,
image = function(path, content)
return Image:new{
path = path,
content = content
}
end
}
function ItemFactory:makeItemFromResource(resource)
local title = resource.title
local path = resource.filename
local content = resource:getData()
local item, item_type = self:makeItem(path, content)
if item_type == "xhtml" and
title
then
item.title = title
end
return item
end
function ItemFactory:makeItem(path, content)
local suffix = util.getFileNameSuffix(
string.lower(path)
)
local matched_type = ItemFactory:getItemTypeFromFileNameSuffix(suffix)
if not matched_type
then
return false, EpubError.ITEMFACTORY_UNSUPPORTED_TYPE
end
local item_constructor = ItemFactory.ITEM_CONSTRUCTORS[matched_type]
if not item_constructor
then
return false, EpubError.ITEMFACTORY_NONEXISTENT_CONSTRUCTOR
end
return item_constructor(path, content), matched_type
end
function ItemFactory:getItemTypeFromFileNameSuffix(suffix)
local matched_item_type = nil
for item_type, supported_formats in pairs(ItemFactory.ITEM_TYPES) do
if supported_formats[suffix]
then
matched_item_type = item_type
break
end
end
return matched_item_type
end
return ItemFactory

@ -0,0 +1,80 @@
local Resource = require("libs/gazette/resources/resource")
local Element = require("libs/gazette/resources/htmldocument/element")
local util = require("util")
local HtmlDocument = Resource:extend{
url = nil,
html = nil,
filename = nil,
title = nil,
}
function HtmlDocument:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
if not o.url
and not o.html
then
return false
end
if not o.html
then
local content, err = o:fetchUrlContent(o.url)
if err
then
return false, err
else
o.html = content
end
end
o.title = o.title or o:findTitle()
if not o.filename
then
local _, filename = util.splitFilePathName(o.url or o.title)
-- Some URLs will have a suffix (".html"), some won't.
-- So the URL gets split to its pure filename and the suffix
-- is manually appended.
local pure_filename, suffix = util.splitFileNameSuffix(filename)
local safe_filename = util.getSafeFilename(pure_filename)
o.filename = safe_filename .. ".html"
end
return o
end
function HtmlDocument:getData()
return self.html
end
function HtmlDocument:findImageElements()
return self:extractElements("img")
end
function HtmlDocument:findTitle()
return string.match(self.html,"<title>(.+)</title>")
end
function HtmlDocument:extractElements(tag)
local elements = {}
-- Build the element in two parts because the second part
-- is generated based on the supplied tag. And it frigs with
-- the first part because of the %s thing
local element_to_match = "(<%s" .. string.format("*%s [^>]*>)", tag)
for element_html in string.gmatch(self.html, element_to_match) do
local element = Element:new(element_html)
table.insert(elements, element)
end
return elements
end
function HtmlDocument:modifyElements(tag, callback)
local element_to_match = "(<%s" .. string.format("*%s [^>]*>)", tag)
self.html = string.gsub(self.html, element_to_match, callback)
end
return HtmlDocument

@ -0,0 +1,29 @@
local Element = {
html = nil
}
function Element:new(html)
o = {}
self.__index = self
setmetatable(o, self)
o.html = html
return o
end
function Element:src()
return self:attributeValue("src")
end
function Element:attributeValue(attribute)
local attribute_to_match = string.format([[%s="([^"]*)"]], attribute)
local value = self.html:match(attribute_to_match)
if not value or value == ""
then
return false, string.format("Error: no %s value in this element", attribute)
end
return value
end
return Element

@ -0,0 +1,19 @@
local Template = {}
Template.HTML = [[<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>%s</title>
</head>
<body>
<header>
<h1>%s</h1>
</header>
<main>
%s
</main>
<footer>
</footer>
</body>
</html>]]
return Template

@ -0,0 +1,51 @@
local util = require("util")
local Resource = require("libs/gazette/resources/resource")
local Image = Resource:extend{
filename = nil,
url = nil,
payload = nil
}
function Image:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
if not o.url
then
return false
end
if not o.payload
then
local payload, err = o:fetchUrlContent(o.url)
if err
then
return false, err
else
o.payload = payload
end
end
if not o.filename
then
o.filename = o:filenameFromUrl(o.url)
end
return o
end
function Image:getData()
return self.payload
end
function Image:filenameFromUrl(url)
local _, filename = util.splitFilePathName(url)
local safe_filename = util.getSafeFilename(filename)
return safe_filename
end
return Image
-- string.match(o.url, "((data:image/[a-z]+;base64,)(%w+))")

@ -0,0 +1,44 @@
local HttpError = require("libs/http/httperror")
local RequestFactory = require("libs/http/requestfactory")
local Resource = {
data = nil,
filename = nil
}
function Resource:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
return o
end
Resource.extend = Resource.new
function Resource:getData()
return self.data
end
function Resource:fetchUrlContent(url)
local request, err = RequestFactory:makeGetRequest(url, {})
if not request
then
return false, err
end
local response, err = request:send()
if err or not response.content
then
return false, HttpError:provideFromResponse(response)
end
if not response:isOk()
then
return false, HttpError:provideFromResponse(response)
end
return response.content
end
return Resource

@ -0,0 +1,109 @@
local Resource = require("libs/gazette/resources/resource")
local HtmlDocument = require("libs/gazette/resources/htmldocument")
local Image = require("libs/gazette/resources/image")
local ItemFactory = require("libs/gazette/factories/itemfactory")
local RequestFactory = require("libs/http/requestfactory")
local util = require("util")
local socket_url = require("socket.url")
local WebPage = Resource:extend {
url = nil,
base_url = nil,
title = nil,
items = nil,
resources = nil,
}
function WebPage:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
if not o.url
then
return false
end
if not o.html
then
local content, err = o:fetchUrlContent(o.url)
if err
then
return false, err
else
o.html = content
end
end
o.base_url = socket_url.parse(o.url)
o.resources = {}
o.items = {}
return o
end
function WebPage:build()
self:createResources()
self:createItems()
end
function WebPage:createResources()
local html_document = HtmlDocument:new{
url = self.url or nil,
html = self.html or nil,
title = self.title or nil
}
table.insert(self.resources, html_document)
local images = self:downloadImages(
html_document:findImageElements()
)
html_document:modifyElements("img", function(element)
local image = images[element]
if not image
then
return element
end
-- local path = string.format("%s/%s", html_document.filename, image.filename)
return string.format([[<img src="%s"/>]], image.filename)
end)
for _, image in pairs(images) do
table.insert(self.resources, image)
end
end
function WebPage:createItems()
for _, resource in ipairs(self.resources) do
local item, err = ItemFactory:makeItemFromResource(resource)
if err
then
goto continue
end
table.insert(self.items, item)
::continue::
end
end
function WebPage:downloadImages(image_elements)
local image_items = {}
for _, element in ipairs(image_elements) do
local src = element:src()
if not src
then
goto continue
end
local url = socket_url.absolute(self.base_url, src)
local image, err = Image:new{
url = url
}
if image
then
image_items[element.html] = image
end
::continue::
end
return image_items
end
return WebPage

@ -0,0 +1,17 @@
local ResourceIterator = {
}
function ResourceIterator:new(webpage)
local i = 0
local item_count = #webpage.items
return function()
i = i + 1
if i <= item_count
then
return webpage.items[i]
end
end
end
return ResourceIterator

@ -0,0 +1,34 @@
local _ = require("gettext")
local T = require("ffi/util").template
local HttpError = {
RESPONSE_NONSPECIFIC_ERROR = _("There was an error. That's all I know."),
REQUEST_UNSUPPORTED_SCHEME = _("Scheme not supported."),
REQUEST_INCOMPLETE = _("Request couldn't complete. Code %1."),
REQUEST_PAGE_NOT_FOUND = _("Page not found."),
RESPONSE_HAS_NO_CONTENT = _("No content found in response."),
}
function HttpError:extend(o)
o = o or {}
setmetatable(o, self)
self.__index = self
return o
end
function HttpError:provideFromResponse(response)
if not response:hasCompleted()
then
return T(HttpError.REQUEST_INCOMPLETE, response.code)
elseif response.code == 404 or not response:isHostKnown()
then
return HttpError.REQUEST_PAGE_NOT_FOUND
elseif not response:hasContent()
then
return HttpError.RESPONSE_HAS_NO_CONTENT
end
return HttpError.RESPONSE_NONSPECIFIC_ERROR
end
return HttpError

@ -0,0 +1,59 @@
local http = require("socket.http")
local socketutil = require("socketutil")
local socket = require("socket")
local ltn12 = require("ltn12")
local logger = require("logger")
local ResponseFactory = require("libs/http/responsefactory")
local DEFAULT_TIMEOUT = 30
local DEFAULT_MAXTIME = 30
local DEFAULT_REDIRECTS = 5
local Request = {
url = nil,
method = nil,
maxtime = DEFAULT_MAXTIME,
timeout = DEFAULT_TIMEOUT,
redirects = DEFAULT_REDIRECTS,
sink = {},
}
Request.method = {
get = "GET",
post = "POST",
}
Request.scheme = {
http = "HTTP",
https = "HTTPS"
}
Request.default = {
timeout = DEFAULT_TIMEOUT,
maxtime = DEFAULT_MAXTIME,
redirects = DEFAULT_REDIRECTS,
}
function Request:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
return o
end
function Request:send()
self.sink = {}
socketutil:set_timeout(self.timeout, self.maxtime)
local code, headers, status = socket.skip(1, http.request({
url = self.url,
method = self.method,
sink = self.maxtime and socketutil.table_sink(self.sink) or ltn12.sink.table(self.sink)
}))
local content = table.concat(self.sink)
socketutil:reset_timeout()
return ResponseFactory:make(code, headers, status, content)
end
return Request

@ -0,0 +1,26 @@
local Request = require("libs/http/request")
local HttpError = require("libs/http/httperror")
local socket_url = require("socket.url")
local RequestFactory = {
}
function RequestFactory:makeGetRequest(url, config)
local parsed_url = socket_url.parse(url)
if not Request.scheme[parsed_url["scheme"]]
then
return false, HttpError.REQUEST_UNSUPPORTED_SCHEME
end
return Request:new{
url = url,
timeout = config.timeout,
maxtime = config.maxtime,
method = Request.method.get
}
end
return RequestFactory

@ -0,0 +1,150 @@
local socketutil = require("socketutil")
local socket_url = require("socket.url")
local Response = {
code = nil,
headers = nil,
status = nil,
url = nil,
content = nil,
}
function Response:new(o)
o = o or {}
self.__index = self
setmetatable(o, self)
if o:hasHeaders()
then
o:setUrlFromHeaders()
end
if not o:isHostKnown()
then
o.code = 404
end
if o:isXml() and
o:hasContent()
then
o.content = o:decodeXml(o.content)
end
return o
end
Response.extend = Response.new
function Response:canBeConsumed()
if self:hasCompleted() and
self:hasHeaders()
then
return true
else
return false
end
end
function Response:hasRedirected()
if type(self.code) == "number" and
self.code > 299 and
self.code < 400
then
return true
else
return false
end
end
function Response:isOk()
if type(self.code) == "number" and
self.code == 200
then
return true
else
return false
end
end
function Response:hasCompleted()
if not self.code or
self.code == socketutil.TIMEOUT_CODE or
self.code == socketutil.SSL_HANDSHAKE_CODE or
self.code == socketutil.SINK_TIMEOUT_CODE
then
return false
else
return true
end
end
function Response:hasHeaders()
if self.headers == nil or
not self.headers["content-type"]
then
return false
else
return true
end
end
function Response:hasContent()
if self.content == nil or
not type(self.content) == "string"
-- tonumber(self.headers["content-length"]) ~= #self.content)
-- It would be ideal to check the content's length, but not all
-- requests supply that value.
then
return false
else
return true
end
end
function Response:isHostKnown()
if self.code == "host or service not provided, or not known"
then
return false
else
return true
end
end
function Response:isXml()
if self:hasHeaders() and
string.match(self.headers["content-type"], "(.*)xml(.*)")
then
return true
else
return false
end
end
function Response:setUrlFromHeaders()
local url = self.headers.location
if url
then
local parsed_url = socket_url.parse(url)
self.url = socket_url.build(parsed_url)
end
end
function Response:decodeXml(xml_to_decode)
local xml2lua = require("../libs/xml2lua/xml2lua")
local handler = require("../libs/xml2lua/xmlhandler.tree"):new()
local parser = xml2lua.parser(handler)
local ok, error_message = pcall(function()
parser:parse(xml_to_decode)
end)
if not ok then
-- when this method returns, the response's content attribute
-- will be set to nil, meaning the response will be considered
-- without content.
return nil
end
return handler.root
end
return Response

@ -0,0 +1,16 @@
local Response = require("libs/http/response")
local ResponseFactory = {
}
function ResponseFactory:make(code, headers, status, content)
return Response:new{
code = code,
headers = headers,
status = status,
content = content
}
end
return ResponseFactory

@ -0,0 +1,434 @@
--- @module Class providing the actual XML parser.
-- Available options are:
-- * stripWS
-- Strip non-significant whitespace (leading/trailing)
-- and do not generate events for empty text elements
--
-- * expandEntities
-- Expand entities (standard entities + single char
-- numeric entities only currently - could be extended
-- at runtime if suitable DTD parser added elements
-- to table (see obj._ENTITIES). May also be possible
-- to expand multibyre entities for UTF-8 only
--
-- * errorHandler
-- Custom error handler function
--
-- NOTE: Boolean options must be set to 'nil' not '0'
---Converts the decimal code of a character to its corresponding char
--if it's a graphical char, otherwise, returns the HTML ISO code
--for that decimal value in the format &#code
--@param code the decimal value to convert to its respective character
local function decimalToHtmlChar(code)
local num = tonumber(code)
if num >= 0 and num < 256 then
return string.char(num)
end
return "&#"..code..";"
end
---Converts the hexadecimal code of a character to its corresponding char
--if it's a graphical char, otherwise, returns the HTML ISO code
--for that hexadecimal value in the format &#xCode
--@param code the hexadecimal value to convert to its respective character
local function hexadecimalToHtmlChar(code)
local num = tonumber(code, 16)
if num >= 0 and num < 256 then
return string.char(num)
end
return "&#x"..code..";"
end
local XmlParser = {
-- Private attribures/functions
_XML = '^([^<]*)<(%/?)([^>]-)(%/?)>',
_ATTR1 = '([%w-:_]+)%s*=%s*"(.-)"',
_ATTR2 = '([%w-:_]+)%s*=%s*\'(.-)\'',
_CDATA = '<%!%[CDATA%[(.-)%]%]>',
_PI = '<%?(.-)%?>',
_COMMENT = '<!%-%-(.-)%-%->',
_TAG = '^(.-)%s.*',
_LEADINGWS = '^%s+',
_TRAILINGWS = '%s+$',
_WS = '^%s*$',
_DTD1 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*(%b[])%s*>',
_DTD2 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*(%b[])%s*>',
--_DTD3 = '<!DOCTYPE%s+(.-)%s*(%b[])%s*>',
_DTD3 = '<!DOCTYPE%s.->',
_DTD4 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*>',
_DTD5 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*>',
--Matches an attribute with non-closing double quotes (The equal sign is matched non-greedly by using =+?)
_ATTRERR1 = '=+?%s*"[^"]*$',
--Matches an attribute with non-closing single quotes (The equal sign is matched non-greedly by using =+?)
_ATTRERR2 = '=+?%s*\'[^\']*$',
--Matches a closing tag such as </person> or the end of a openning tag such as <person>
_TAGEXT = '(%/?)>',
_errstr = {
xmlErr = "Error Parsing XML",
declErr = "Error Parsing XMLDecl",
declStartErr = "XMLDecl not at start of document",
declAttrErr = "Invalid XMLDecl attributes",
piErr = "Error Parsing Processing Instruction",
commentErr = "Error Parsing Comment",
cdataErr = "Error Parsing CDATA",
dtdErr = "Error Parsing DTD",
endTagErr = "End Tag Attributes Invalid",
unmatchedTagErr = "Unbalanced Tag",
incompleteXmlErr = "Incomplete XML Document",
},
_ENTITIES = {
["&lt;"] = "<",
["&gt;"] = ">",
["&amp;"] = "&",
["&quot;"] = '"',
["&apos;"] = "'",
["&#(%d+);"] = decimalToHtmlChar,
["&#x(%x+);"] = hexadecimalToHtmlChar,
},
}
--- Instantiates a XmlParser object.
--@param _handler Handler module to be used to convert the XML string
-- to another formats. See the available handlers at the handler directory.
-- Usually you get an instance to a handler module using, for instance:
-- local handler = require("xmlhandler/tree").
--@param _options Options for this XmlParser instance.
--@see XmlParser.options
function XmlParser.new(_handler, _options)
local obj = {
handler = _handler,
options = _options,
_stack = {}
}
setmetatable(obj, XmlParser)
obj.__index = XmlParser
return obj;
end
---Checks if a function/field exists in a table or in its metatable
--@param table the table to check if it has a given function
--@param elementName the name of the function/field to check if exists
--@return true if the function/field exists, false otherwise
local function fexists(table, elementName)
if table == nil then
return false
end
if table[elementName] == nil then
return fexists(getmetatable(table), elementName)
else
return true
end
end
local function err(self, errMsg, pos)
if self.options.errorHandler then
self.options.errorHandler(errMsg,pos)
end
end
--- Removes whitespaces
local function stripWS(self, s)
if self.options.stripWS then
s = string.gsub(s,'^%s+','')
s = string.gsub(s,'%s+$','')
end
return s
end
local function parseEntities(self, s)
if self.options.expandEntities then
for k,v in pairs(self._ENTITIES) do
s = string.gsub(s,k,v)
end
end
return s
end
--- Parses a string representing a tag.
--@param s String containing tag text
--@return a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
local function parseTag(self, s)
local tag = {
name = string.gsub(s, self._TAG, '%1'),
attrs = {}
}
local parseFunction = function (k, v)
tag.attrs[k] = parseEntities(self, v)
tag.attrs._ = 1
end
string.gsub(s, self._ATTR1, parseFunction)
string.gsub(s, self._ATTR2, parseFunction)
if tag.attrs._ then
tag.attrs._ = nil
else
tag.attrs = nil
end
return tag
end
local function parseXmlDeclaration(self, xml, f)
-- XML Declaration
f.match, f.endMatch, f.text = string.find(xml, self._PI, f.pos)
if not f.match then
err(self, self._errstr.declErr, f.pos)
end
if f.match ~= 1 then
-- Must be at start of doc if present
err(self, self._errstr.declStartErr, f.pos)
end
local tag = parseTag(self, f.text)
-- TODO: Check if attributes are valid
-- Check for version (mandatory)
if tag.attrs and tag.attrs.version == nil then
err(self, self._errstr.declAttrErr, f.pos)
end
if fexists(self.handler, 'decl') then
self.handler:decl(tag, f.match, f.endMatch)
end
return tag
end
local function parseXmlProcessingInstruction(self, xml, f)
local tag = {}
-- XML Processing Instruction (PI)
f.match, f.endMatch, f.text = string.find(xml, self._PI, f.pos)
if not f.match then
err(self, self._errstr.piErr, f.pos)
end
if fexists(self.handler, 'pi') then
-- Parse PI attributes & text
tag = parseTag(self, f.text)
local pi = string.sub(f.text, string.len(tag.name)+1)
if pi ~= "" then
if tag.attrs then
tag.attrs._text = pi
else
tag.attrs = { _text = pi }
end
end
self.handler:pi(tag, f.match, f.endMatch)
end
return tag
end
local function parseComment(self, xml, f)
f.match, f.endMatch, f.text = string.find(xml, self._COMMENT, f.pos)
if not f.match then
err(self, self._errstr.commentErr, f.pos)
end
if fexists(self.handler, 'comment') then
f.text = parseEntities(self, stripWS(self, f.text))
self.handler:comment(f.text, next, f.match, f.endMatch)
end
end
local function _parseDtd(self, xml, pos)
-- match,endMatch,root,type,name,uri,internal
local dtdPatterns = {self._DTD1, self._DTD2, self._DTD3, self._DTD4, self._DTD5}
for _, dtd in pairs(dtdPatterns) do
local m,e,r,t,n,u,i = string.find(xml, dtd, pos)
if m then
return m, e, {_root=r, _type=t, _name=n, _uri=u, _internal=i}
end
end
return nil
end
local function parseDtd(self, xml, f)
f.match, f.endMatch, _ = _parseDtd(self, xml, f.pos)
if not f.match then
err(self, self._errstr.dtdErr, f.pos)
end
if fexists(self.handler, 'dtd') then
local tag = {name="DOCTYPE", value=string.sub(xml, f.match+10, f.endMatch-1)}
self.handler:dtd(tag, f.match, f.endMatch)
end
end
local function parseCdata(self, xml, f)
f.match, f.endMatch, f.text = string.find(xml, self._CDATA, f.pos)
if not f.match then
err(self, self._errstr.cdataErr, f.pos)
end
if fexists(self.handler, 'cdata') then
self.handler:cdata(f.text, nil, f.match, f.endMatch)
end
end
--- Parse a Normal tag
-- Need check for embedded '>' in attribute value and extend
-- match recursively if necessary eg. <tag attr="123>456">
local function parseNormalTag(self, xml, f)
--Check for errors
while 1 do
--If there isn't an attribute without closing quotes (single or double quotes)
--then breaks to follow the normal processing of the tag.
--Otherwise, try to find where the quotes close.
f.errStart, f.errEnd = string.find(f.tagstr, self._ATTRERR1)
if f.errEnd == nil then
f.errStart, f.errEnd = string.find(f.tagstr, self._ATTRERR2)
if f.errEnd == nil then
break
end
end
f.extStart, f.extEnd, f.endt2 = string.find(xml, self._TAGEXT, f.endMatch+1)
f.tagstr = f.tagstr .. string.sub(xml, f.endMatch, f.extEnd-1)
if not f.match then
err(self, self._errstr.xmlErr, f.pos)
end
f.endMatch = f.extEnd
end
-- Extract tag name and attrs
local tag = parseTag(self, f.tagstr)
if (f.endt1=="/") then
if fexists(self.handler, 'endtag') then
if tag.attrs then
-- Shouldn't have any attributes in endtag
err(self, string.format("%s (/%s)", self._errstr.endTagErr, tag.name), f.pos)
end
if table.remove(self._stack) ~= tag.name then
err(self, string.format("%s (/%s)", self._errstr.unmatchedTagErr, tag.name), f.pos)
end
self.handler:endtag(tag, f.match, f.endMatch)
end
else
table.insert(self._stack, tag.name)
if fexists(self.handler, 'starttag') then
self.handler:starttag(tag, f.match, f.endMatch)
end
-- Self-Closing Tag
if (f.endt2=="/") then
table.remove(self._stack)
if fexists(self.handler, 'endtag') then
self.handler:endtag(tag, f.match, f.endMatch)
end
end
end
return tag
end
local function parseTagType(self, xml, f)
-- Test for tag type
if string.find(string.sub(f.tagstr, 1, 5), "?xml%s") then
parseXmlDeclaration(self, xml, f)
elseif string.sub(f.tagstr, 1, 1) == "?" then
parseXmlProcessingInstruction(self, xml, f)
elseif string.sub(f.tagstr, 1, 3) == "!--" then
parseComment(self, xml, f)
elseif string.sub(f.tagstr, 1, 8) == "!DOCTYPE" then
parseDtd(self, xml, f)
elseif string.sub(f.tagstr, 1, 8) == "![CDATA[" then
parseCdata(self, xml, f)
else
parseNormalTag(self, xml, f)
end
end
--- Get next tag (first pass - fix exceptions below).
--@return true if the next tag could be got, false otherwise
local function getNextTag(self, xml, f)
f.match, f.endMatch, f.text, f.endt1, f.tagstr, f.endt2 = string.find(xml, self._XML, f.pos)
if not f.match then
if string.find(xml, self._WS, f.pos) then
-- No more text - check document complete
if #self._stack ~= 0 then
err(self, self._errstr.incompleteXmlErr, f.pos)
else
return false
end
else
-- Unparsable text
err(self, self._errstr.xmlErr, f.pos)
end
end
f.text = f.text or ''
f.tagstr = f.tagstr or ''
f.match = f.match or 0
return f.endMatch ~= nil
end
--Main function which starts the XML parsing process
--@param xml the XML string to parse
--@param parseAttributes indicates if tag attributes should be parsed or not.
-- If omitted, the default value is true.
function XmlParser:parse(xml, parseAttributes)
if type(self) ~= "table" or getmetatable(self) ~= XmlParser then
error("You must call xmlparser:parse(parameters) instead of xmlparser.parse(parameters)")
end
if parseAttributes == nil then
parseAttributes = true
end
self.handler.parseAttributes = parseAttributes
--Stores string.find results and parameters
--and other auxiliar variables
local f = {
--string.find return
match = 0,
endMatch = 0,
-- text, end1, tagstr, end2,
--string.find parameters and auxiliar variables
pos = 1,
-- startText, endText,
-- errStart, errEnd, extStart, extEnd,
}
while f.match do
if not getNextTag(self, xml, f) then
break
end
-- Handle leading text
f.startText = f.match
f.endText = f.match + string.len(f.text) - 1
f.match = f.match + string.len(f.text)
f.text = parseEntities(self, stripWS(self, f.text))
if f.text ~= "" and fexists(self.handler, 'text') then
self.handler:text(f.text, nil, f.match, f.endText)
end
parseTagType(self, xml, f)
f.pos = f.endMatch + 1
end
end
XmlParser.__index = XmlParser
return XmlParser

@ -0,0 +1,248 @@
--- @module Module providing a non-validating XML stream parser in Lua.
--
-- Features:
-- =========
--
-- * Tokenises well-formed XML (relatively robustly)
-- * Flexible handler based event API (see below)
-- * Parses all XML Infoset elements - ie.
-- - Tags
-- - Text
-- - Comments
-- - CDATA
-- - XML Decl
-- - Processing Instructions
-- - DOCTYPE declarations
-- * Provides limited well-formedness checking
-- (checks for basic syntax & balanced tags only)
-- * Flexible whitespace handling (selectable)
-- * Entity Handling (selectable)
--
-- Limitations:
-- ============
--
-- * Non-validating
-- * No charset handling
-- * No namespace support
-- * Shallow well-formedness checking only (fails
-- to detect most semantic errors)
--
-- API:
-- ====
--
-- The parser provides a partially object-oriented API with
-- functionality split into tokeniser and handler components.
--
-- The handler instance is passed to the tokeniser and receives
-- callbacks for each XML element processed (if a suitable handler
-- function is defined). The API is conceptually similar to the
-- SAX API but implemented differently.
--
-- XML data is passed to the parser instance through the 'parse'
-- method (Note: must be passed a single string currently)
--
-- License:
-- ========
--G
-- This code is freely distributable under the terms of the [MIT license](LICENSE).
--
--
--@author Paul Chakravarti (paulc@passtheaardvark.com)
--@author Manoel Campos da Silva Filho
local xml2lua = {_VERSION = "1.5-2"}
local XmlParser = require("libs/xml2lua/XmlParser")
---Recursivelly prints a table in an easy-to-ready format
--@param tb The table to be printed
--@param level the indentation level to start with
local function printableInternal(tb, level)
if tb == nil then
return
end
level = level or 1
local spaces = string.rep(' ', level*2)
for k,v in pairs(tb) do
if type(v) == "table" then
print(spaces .. k)
printableInternal(v, level+1)
else
print(spaces .. k..'='..v)
end
end
end
---Instantiates a XmlParser object to parse a XML string
--@param handler Handler module to be used to convert the XML string
--to another formats. See the available handlers at the handler directory.
-- Usually you get an instance to a handler module using, for instance:
-- local handler = require("xmlhandler/tree").
--@return a XmlParser object used to parse the XML
--@see XmlParser
function xml2lua.parser(handler)
if handler == xml2lua then
error("You must call xml2lua.parse(handler) instead of xml2lua:parse(handler)")
end
local options = {
--Indicates if whitespaces should be striped or not
stripWS = 1,
expandEntities = 1,
errorHandler = function(errMsg, pos)
error(string.format("%s [char=%d]\n", errMsg or "Parse Error", pos))
end
}
return XmlParser.new(handler, options)
end
---Recursivelly prints a table in an easy-to-ready format
--@param tb The table to be printed
function xml2lua.printable(tb)
printableInternal(tb)
end
---Handler to generate a string prepresentation of a table
--Convenience function for printHandler (Does not support recursive tables).
--@param t Table to be parsed
--@return a string representation of the table
function xml2lua.toString(t)
local sep = ''
local res = ''
if type(t) ~= 'table' then
return t
end
for k,v in pairs(t) do
if type(v) == 'table' then
v = xml2lua.toString(v)
end
res = res .. sep .. string.format("%s=%s", k, v)
sep = ','
end
res = '{'..res..'}'
return res
end
--- Loads an XML file from a specified path
-- @param xmlFilePath the path for the XML file to load
-- @return the XML loaded file content
function xml2lua.loadFile(xmlFilePath)
local f, e = io.open(xmlFilePath, "r")
if f then
--Gets the entire file content and stores into a string
local content = f:read("*a")
f:close()
return content
end
error(e)
end
---Gets an _attr element from a table that represents the attributes of an XML tag,
--and generates a XML String representing the attibutes to be inserted
--into the openning tag of the XML
--
--@param attrTable table from where the _attr field will be got
--@return a XML String representation of the tag attributes
local function attrToXml(attrTable)
local s = ""
attrTable = attrTable or {}
for k, v in pairs(attrTable) do
s = s .. " " .. k .. "=" .. '"' .. v .. '"'
end
return s
end
---Gets the first key of a given table
local function getFirstKey(tb)
if type(tb) == "table" then
for k, _ in pairs(tb) do
return k
end
return nil
end
return tb
end
--- Parses a given entry in a lua table
-- and inserts it as a XML string into a destination table.
-- Entries in such a destination table will be concatenated to generated
-- the final XML string from the origin table.
-- @param xmltb the destination table where the XML string from the parsed key will be inserted
-- @param tagName the name of the table field that will be used as XML tag name
-- @param fieldValue a field from the lua table to be recursively parsed to XML or a primitive value that will be enclosed in a tag name
-- @param level a int value used to include indentation in the generated XML from the table key
local function parseTableKeyToXml(xmltb, tagName, fieldValue, level)
local spaces = string.rep(' ', level*2)
local strValue, attrsStr = "", ""
if type(fieldValue) == "table" then
attrsStr = attrToXml(fieldValue._attr)
fieldValue._attr = nil
--If after removing the _attr field there is just one element inside it,
--the tag was enclosing a single primitive value instead of other inner tags.
strValue = #fieldValue == 1 and spaces..tostring(fieldValue[1]) or xml2lua.toXml(fieldValue, tagName, level+1)
strValue = '\n'..strValue..'\n'..spaces
else
strValue = tostring(fieldValue)
end
table.insert(xmltb, spaces..'<'..tagName.. attrsStr ..'>'..strValue..'</'..tagName..'>')
end
---Converts a Lua table to a XML String representation.
--@param tb Table to be converted to XML
--@param tableName Name of the table variable given to this function,
-- to be used as the root tag. If a value is not provided
-- no root tag will be created.
--@param level Only used internally, when the function is called recursively to print indentation
--
--@return a String representing the table content in XML
function xml2lua.toXml(tb, tableName, level)
level = level or 1
local firstLevel = level
tableName = tableName or ''
local xmltb = (tableName ~= '' and level == 1) and {'<'..tableName..'>'} or {}
for k, v in pairs(tb) do
if type(v) == 'table' then
-- If the key is a number, the given table is an array and the value is an element inside that array.
-- In this case, the name of the array is used as tag name for each element.
-- So, we are parsing an array of objects, not an array of primitives.
if type(k) == 'number' then
parseTableKeyToXml(xmltb, tableName, v, level)
else
level = level + 1
-- If the type of the first key of the value inside the table
-- is a number, it means we have a HashTable-like structure,
-- in this case with keys as strings and values as arrays.
if type(getFirstKey(v)) == 'number' then
parseTableKeyToXml(xmltb, k, v, level)
else
-- Otherwise, the "HashTable" values are objects
parseTableKeyToXml(xmltb, k, v, level)
end
end
else
-- When values are primitives:
-- If the type of the key is number, the value is an element from an array.
-- In this case, uses the array name as the tag name.
if type(k) == 'number' then
k = tableName
end
parseTableKeyToXml(xmltb, k, v, level)
end
end
if tableName ~= '' and firstLevel == 1 then
table.insert(xmltb, '</'..tableName..'>\n')
end
return table.concat(xmltb, '\n')
end
return xml2lua

@ -0,0 +1,155 @@
local function init()
return {
options = {commentNode=1, piNode=1, dtdNode=1, declNode=1},
current = { _children = {}, _type = "ROOT" },
_stack = {}
}
end
--- @module Handler to generate a DOM-like node tree structure with
-- a single ROOT node parent - each node is a table comprising
-- the fields below.
--
-- node = { _name = <Element Name>,
-- _type = ROOT|ELEMENT|TEXT|COMMENT|PI|DECL|DTD,
-- _attr = { Node attributes - see callback API },
-- _parent = <Parent Node>
-- _children = { List of child nodes - ROOT/NODE only }
-- }
-- where:
-- - PI = XML Processing Instruction tag.
-- - DECL = XML declaration tag
--
-- The dom structure is capable of representing any valid XML document
--
-- Options
-- =======
-- options.(comment|pi|dtd|decl)Node = bool
-- - Include/exclude given node types
--
-- License:
-- ========
--
-- This code is freely distributable under the terms of the [MIT license](LICENSE).
--
--@author Paul Chakravarti (paulc@passtheaardvark.com)
--@author Manoel Campos da Silva Filho
local dom = init()
---Instantiates a new handler object.
--Each instance can handle a single XML.
--By using such a constructor, you can parse
--multiple XML files in the same application.
--@return the handler instance
function dom:new()
local obj = init()
obj.__index = self
setmetatable(obj, self)
return obj
end
---Parses a start tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
function dom:starttag(tag)
local node = { _type = 'ELEMENT',
_name = tag.name,
_attr = tag.attrs,
_children = {}
}
if self.root == nil then
self.root = node
end
table.insert(self._stack, node)
table.insert(self.current._children, node)
self.current = node
end
---Parses an end tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
function dom:endtag(tag, s)
--Table representing the containing tag of the current tag
local prev = self._stack[#self._stack]
if tag.name ~= prev._name then
error("XML Error - Unmatched Tag ["..s..":"..tag.name.."]\n")
end
table.remove(self._stack)
self.current = self._stack[#self._stack]
end
---Parses a tag content.
-- @param text text to process
function dom:text(text)
local node = { _type = "TEXT",
_text = text
}
table.insert(self.current._children, node)
end
---Parses a comment tag.
-- @param text comment text
function dom:comment(text)
if self.options.commentNode then
local node = { _type = "COMMENT",
_text = text
}
table.insert(self.current._children, node)
end
end
--- Parses a XML processing instruction (PI) tag
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
function dom:pi(tag)
if self.options.piNode then
local node = { _type = "PI",
_name = tag.name,
_attr = tag.attrs,
}
table.insert(self.current._children, node)
end
end
---Parse the XML declaration line (the line that indicates the XML version).
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
function dom:decl(tag)
if self.options.declNode then
local node = { _type = "DECL",
_name = tag.name,
_attr = tag.attrs,
}
table.insert(self.current._children, node)
end
end
---Parses a DTD tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
function dom:dtd(tag)
if self.options.dtdNode then
local node = { _type = "DTD",
_name = tag.name,
_attr = tag.attrs,
}
table.insert(self.current._children, node)
end
end
---Parses CDATA tag content.
dom.cdata = dom.text
dom.__index = dom
return dom

@ -0,0 +1,108 @@
---@module Handler to generate a simple event trace which
--outputs messages to the terminal during the XML
--parsing, usually for debugging purposes.
--
-- License:
-- ========
--
-- This code is freely distributable under the terms of the [MIT license](LICENSE).
--
--@author Paul Chakravarti (paulc@passtheaardvark.com)
--@author Manoel Campos da Silva Filho
local print = {}
---Parses a start tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
-- @param s position where the tag starts
-- @param e position where the tag ends
function print:starttag(tag, s, e)
io.write("Start : "..tag.name.."\n")
if tag.attrs then
for k,v in pairs(tag.attrs) do
io.write(string.format(" + %s='%s'\n", k, v))
end
end
end
---Parses an end tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
-- @param s position where the tag starts
-- @param e position where the tag ends
function print:endtag(tag, s, e)
io.write("End : "..tag.name.."\n")
end
---Parses a tag content.
-- @param text text to process
-- @param s position where the tag starts
-- @param e position where the tag ends
function print:text(text, s, e)
io.write("Text : "..text.."\n")
end
---Parses CDATA tag content.
-- @param text CDATA content to be processed
-- @param s position where the tag starts
-- @param e position where the tag ends
function print:cdata(text, s, e)
io.write("CDATA : "..text.."\n")
end
---Parses a comment tag.
-- @param text comment text
-- @param s position where the tag starts
-- @param e position where the tag ends
function print:comment(text, s, e)
io.write("Comment : "..text.."\n")
end
---Parses a DTD tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
-- @param s position where the tag starts
-- @param e position where the tag ends
function print:dtd(tag, s, e)
io.write("DTD : "..tag.name.."\n")
if tag.attrs then
for k,v in pairs(tag.attrs) do
io.write(string.format(" + %s='%s'\n", k, v))
end
end
end
--- Parse a XML processing instructions (PI) tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
-- @param s position where the tag starts
-- @param e position where the tag ends
function print:pi(tag, s, e)
io.write("PI : "..tag.name.."\n")
if tag.attrs then
for k,v in pairs(tag.attrs) do
io. write(string.format(" + %s='%s'\n",k,v))
end
end
end
---Parse the XML declaration line (the line that indicates the XML version).
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
-- @param s position where the tag starts
-- @param e position where the tag ends
function print:decl(tag, s, e)
io.write("XML Decl : "..tag.name.."\n")
if tag.attrs then
for k,v in pairs(tag.attrs) do
io.write(string.format(" + %s='%s'\n", k, v))
end
end
end
return print

@ -0,0 +1,170 @@
local function init()
local obj = {
root = {},
options = {noreduce = {}}
}
obj._stack = {obj.root}
return obj
end
--- @module XML Tree Handler.
-- Generates a lua table from an XML content string.
-- It is a simplified handler which attempts
-- to generate a more 'natural' table based structure which
-- supports many common XML formats.
--
-- The XML tree structure is mapped directly into a recursive
-- table structure with node names as keys and child elements
-- as either a table of values or directly as a string value
-- for text. Where there is only a single child element this
-- is inserted as a named key - if there are multiple
-- elements these are inserted as a vector (in some cases it
-- may be preferable to always insert elements as a vector
-- which can be specified on a per element basis in the
-- options). Attributes are inserted as a child element with
-- a key of '_attr'.
--
-- Only Tag/Text & CDATA elements are processed - all others
-- are ignored.
--
-- This format has some limitations - primarily
--
-- * Mixed-Content behaves unpredictably - the relationship
-- between text elements and embedded tags is lost and
-- multiple levels of mixed content does not work
-- * If a leaf element has both a text element and attributes
-- then the text must be accessed through a vector (to
-- provide a container for the attribute)
--
-- In general however this format is relatively useful.
--
-- It is much easier to understand by running some test
-- data through 'testxml.lua -simpletree' than to read this)
--
-- Options
-- =======
-- options.noreduce = { <tag> = bool,.. }
-- - Nodes not to reduce children vector even if only
-- one child
--
-- License:
-- ========
--
-- This code is freely distributable under the terms of the [MIT license](LICENSE).
--
--@author Paul Chakravarti (paulc@passtheaardvark.com)
--@author Manoel Campos da Silva Filho
local tree = init()
---Instantiates a new handler object.
--Each instance can handle a single XML.
--By using such a constructor, you can parse
--multiple XML files in the same application.
--@return the handler instance
function tree:new()
local obj = init()
obj.__index = self
setmetatable(obj, self)
return obj
end
--- Recursively removes redundant vectors for nodes
-- with single child elements
function tree:reduce(node, key, parent)
for k,v in pairs(node) do
if type(v) == 'table' then
self:reduce(v,k,node)
end
end
if #node == 1 and not self.options.noreduce[key] and
node._attr == nil then
parent[key] = node[1]
end
end
--- If an object is not an array,
-- creates an empty array and insert that object as the 1st element.
--
-- It's a workaround for duplicated XML tags outside an inner tag. Check issue #55 for details.
-- It checks if a given tag already exists on the parsing stack.
-- In such a case, if that tag is represented as a single element,
-- an array is created and that element is inserted on it.
-- The existing tag is then replaced by the created array.
-- For instance, if we have a tag x = {attr1=1, attr2=2}
-- and another x tag is found, the previous entry will be changed to an array
-- x = {{attr1=1, attr2=2}}. This way, the duplicated tag will be
-- inserted into this array as x = {{attr1=1, attr2=2}, {attr1=3, attr2=4}}
-- https://github.com/manoelcampos/xml2lua/issues/55
--
-- @param obj the object to try to convert to an array
-- @return the same object if it's already an array or a new array with the object
-- as the 1st element.
local function convertObjectToArray(obj)
--#obj == 0 verifies if the field is not an array
if #obj == 0 then
local array = {}
table.insert(array, obj)
return array
end
return obj
end
---Parses a start tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
function tree:starttag(tag)
local node = {}
if self.parseAttributes == true then
node._attr=tag.attrs
end
--Table in the stack representing the tag being processed
local current = self._stack[#self._stack]
if current[tag.name] then
local array = convertObjectToArray(current[tag.name])
table.insert(array, node)
current[tag.name] = array
else
current[tag.name] = {node}
end
table.insert(self._stack, node)
end
---Parses an end tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the atributtes of the tag
function tree:endtag(tag, s)
--Table in the stack representing the tag being processed
--Table in the stack representing the containing tag of the current tag
local prev = self._stack[#self._stack-1]
if not prev[tag.name] then
error("XML Error - Unmatched Tag ["..s..":"..tag.name.."]\n")
end
if prev == self.root then
-- Once parsing complete, recursively reduce tree
self:reduce(prev, nil, nil)
end
table.remove(self._stack)
end
---Parses a tag content.
-- @param t text to process
function tree:text(text)
local current = self._stack[#self._stack]
table.insert(current, text)
end
---Parses CDATA tag content.
tree.cdata = tree.text
tree.__index = tree
return tree

@ -0,0 +1,350 @@
--[[--
Download URLs as EPUBs
@module koplugin.DownloadToEPUB
--]]--
local BD = require("ui/bidi")
local Blitbuffer = require("ffi/blitbuffer")
local ConfirmBox = require("ui/widget/confirmbox")
local DataStorage = require("datastorage")
local Device = require("device")
local Dispatcher = require("dispatcher")
local Event = require("ui/event")
local FFIUtil = require("ffi/util")
local FileManager = require("apps/filemanager/filemanager")
local InfoMessage = require("ui/widget/infomessage")
local LuaSettings = require("frontend/luasettings")
local MultiConfirmBox = require("ui/widget/multiconfirmbox")
local NetworkMgr = require("ui/network/manager")
local UIManager = require("ui/uimanager")
local WidgetContainer = require("ui/widget/container/widgetcontainer")
local VerticalGroup = require("ui/widget/verticalgroup")
local Screen = Device.screen
local Size = require("ui/size")
local filemanagerutil = require("apps/filemanager/filemanagerutil")
local logger = require("logger")
local util = require("frontend/util")
local T = FFIUtil.template
local _ = require("gettext")
-- Gazette Modules
local EpubBuildDirector = require("libs/gazette/epubbuilddirector")
local WebPage = require("libs/gazette/resources/webpage")
local ResourceAdapter = require("libs/gazette/resources/webpageadapter")
local Epub = require("libs/gazette/epub/epub")
local History = require("epubhistory")
local HistoryView = require("epubhistoryview")
local DownloadToEpub = WidgetContainer:new{
name = "Download to EPUB",
download_directory = ("%s/%s/"):format(DataStorage:getFullDataDir(), "EPUB Downloads")
}
local EpubBuilder = {
output_directory = nil,
}
function DownloadToEpub:init()
self.settings = self.readSettings()
if self.settings.data.download_directory then
self.download_directory = self.settings.data.download_directory
end
self:createDownloadDirectoryIfNotExists()
self.ui.menu:registerToMainMenu(self)
if self.ui and self.ui.link then
self.ui.link:addToExternalLinkDialog("30_downloadtoepub", function(this, link_url)
return {
text = _("Download to EPUB"),
callback = function()
UIManager:close(this.external_link_dialog)
this.ui:handleEvent(Event:new("DownloadEpubFromUrl", link_url))
end,
show_in_dialog_func = function()
return true
end
}
end)
end
end
function DownloadToEpub:addToMainMenu(menu_items)
menu_items.downloadtoepub = {
text = _("Download to EPUB"),
sorting_hint = "tools",
sub_item_table = {
{
text = _("Go to EPUB downloads"),
callback = function()
self:goToDownloadDirectory()
end,
},
{
text = _("Settings"),
sub_item_table = {
{
text_func = function()
local path = filemanagerutil.abbreviate(self.download_directory)
return T(_("Set download directory (%1)"), BD.dirpath(path))
end,
keep_menu_open = true,
callback = function() self:setDownloadDirectory() end,
},
}
},
{
text = _("About"),
keep_menu_open = true,
callback = function()
UIManager:show(InfoMessage:new{
text = "DownloadToEpub lets you download external links as EPUBs to your device."
})
end,
},
}
}
local history_view = HistoryView:new{}
local last_download_item = history_view:getLastDownloadButton(function(history_item)
self:maybeOpenEpub(history_item['download_path'])
end)
local history_menu_items = history_view:getMenuItems(function(history_item)
self:maybeOpenEpub(history_item['download_path'])
end)
if last_download_item then table.insert(menu_items.downloadtoepub.sub_item_table, 2, last_download_item) end
if history_menu_items then table.insert(menu_items.downloadtoepub.sub_item_table, 3, history_menu_items[1]) end
end
function DownloadToEpub:maybeOpenEpub(file_path)
if util.pathExists(file_path) then
logger.dbg("DownloadToEpub: Opening " .. file_path)
local Event = require("ui/event")
UIManager:broadcastEvent(Event:new("SetupShowReader"))
local ReaderUI = require("apps/reader/readerui")
ReaderUI:showReader(file_path)
else
logger.dbg("DownloadToEpub: Couldn't open " .. file_path .. ". It's been moved or deleted.")
self:showRedownloadPrompt(file_path)
end
end
function DownloadToEpub:readSettings()
local settings = LuaSettings:open(DataStorage:getSettingsDir() .. "downloadtoepub.lua")
if not settings.data.downloadtoepub then
settings.data.downloadtoepub = {}
end
return settings
end
function DownloadToEpub:saveSettings()
local temp_settings = {
download_directory = self.download_directory
}
self.settings:saveSetting("downloadtoepub", temp_settings)
self.settings:flush()
end
function DownloadToEpub:setDownloadDirectory()
local downloadmgr = require("ui/downloadmgr")
downloadmgr:new{
onConfirm = function(path)
self.download_directory = path
self:saveSettings()
end
}:chooseDir()
end
function DownloadToEpub:goToDownloadDirectory()
local FileManager = require("apps/filemanager/filemanager")
if self.ui.document then
self.ui:onClose()
end
if FileManager.instance then
FileManager.instance:reinit(self.download_directory)
else
FileManager:showFiles(self.download_directory)
end
end
function DownloadToEpub:createDownloadDirectoryIfNotExists()
if not util.pathExists(self.download_directory) then
logger.dbg("DownloadToEpub: Creating path (" .. self.download_directory .. ")")
lfs.mkdir(self.download_directory)
end
end
function DownloadToEpub:onDownloadEpubFromUrl(link_url)
local prompt
prompt = ConfirmBox:new{
text = T(_("Download to EPUB? \n\nLink: %1"), link_url),
ok_text = _("Yes"),
ok_callback = function()
UIManager:close(prompt)
self:downloadEpubWithUi(link_url, function(file_path, err)
if err then
UIManager:show(InfoMessage:new{ text = T(_("Error downloading EPUB: %1", err)) })
else
local history = History:new{}
history:init()
logger.dbg("DownloadToEpub: Maybe deleting from history " .. link_url)
history:remove(link_url) -- link might have already been downloaded. If so, remove the history item.
logger.dbg("DownloadToEpub: Adding to history " .. link_url .. " " .. file_path)
history:add(link_url, file_path)
logger.dbg("DownloadToEpub: Finished downloading epub to " .. file_path)
self:showReadPrompt(file_path)
end
end)
end,
}
UIManager:show(prompt)
end
function DownloadToEpub:downloadEpubWithUi(link_url, callback)
local info = InfoMessage:new{ text = ("Downloading... " .. link_url) }
UIManager:show(info)
UIManager:forceRePaint()
UIManager:close(info)
NetworkMgr:runWhenOnline(function()
local epub_builder = EpubBuilder:new{
output_directory = self.download_directory,
}
local file_path, err = epub_builder:buildFromUrl(link_url)
callback(file_path, err)
end)
end
function DownloadToEpub:showRedownloadPrompt(file_path) -- supply this with a directory?
local prompt
local history = History:new{}
history:init()
local history_item = history:find(file_path)
if history_item then
prompt = MultiConfirmBox:new{
text = T(_("Couldn't open EPUB! \n\nFile has been moved since download (%1)\n\nInitially downloaded from (%2)\n\nWhat would you like to do?"),
file_path,
history_item.url),
choice1_text = _("Redownload EPUB"),
choice1_callback = function()
logger.dbg("DownloadToEpub: Redownloading " .. history_item.url)
UIManager:close(prompt)
self:onDownloadEpubFromUrl(history_item.url)
end,
choice2_text = _("Delete from history"),
choice2_callback = function()
logger.dbg("DownloadToEpub: Deleting from history " .. history_item.url)
history:remove(history_item.url)
UIManager:close(prompt)
end,
}
else
prompt = InfoMessage:new{
text = _("Couldn't open EPUB! EPUB has been deleted or moved since being downloaded."),
show_icon = false,
timeout = 10,
}
end
UIManager:show(prompt)
end
function DownloadToEpub:showReadPrompt(file_path)
local prompt = ConfirmBox:new{
text = _("EPUB downloaded. Would you like to read it now?"),
ok_text = _("Open EPUB"),
ok_callback = function()
logger.dbg("DownloadToEpub: Opening " .. file_path)
local Event = require("ui/event")
UIManager:broadcastEvent(Event:new("SetupShowReader"))
UIManager:close(prompt)
local ReaderUI = require("apps/reader/readerui")
ReaderUI:showReader(file_path)
end,
}
UIManager:show(prompt)
end
function EpubBuilder:new(o)
o = o or {}
setmetatable(o, self)
self.__index = self
return o
end
function EpubBuilder:buildFromUrl(url)
logger.dbg("DownloadToEpub: Begin download of " .. url .. " outputting to " .. self.output_directory)
local info = InfoMessage:new{ text = _("Getting webpage…") }
UIManager:show(info)
UIManager:forceRePaint()
UIManager:close(info)
local webpage, err = self:createWebpage(url)
if not webpage then
logger.dbg("DownloadToEpub: " .. err)
return false, err
end
info = InfoMessage:new{ text = _("Building EPUB…") }
UIManager:show(info)
UIManager:forceRePaint()
UIManager:close(info)
local epub = Epub:new{}
epub:addFromList(ResourceAdapter:new(webpage))
epub:setTitle(webpage.title)
epub:setAuthor("DownloadToEpub")
local epub_path = ("%s%s.epub"):format(self.output_directory, util.getSafeFilename(epub.title))
local build_director, err = self:createBuildDirector(epub_path)
if not build_director then
logger.dbg("DownloadToEpub: " .. err)
return false, err
end
info = InfoMessage:new{ text = _("Writing to device…") }
UIManager:show(info)
UIManager:forceRePaint()
UIManager:close(info)
logger.dbg("DownloadToEpub: Writing EPUB to " .. epub_path)
local path_to_epub, err = build_director:construct(epub)
if not path_to_epub then
logger.dbg("DownloadToEpub: " .. err)
return false, err
end
return path_to_epub
end
function EpubBuilder:createWebpage(url)
local webpage, err = WebPage:new({
url = url,
})
if err then
return false, err
end
webpage:build()
return webpage
end
function EpubBuilder:createBuildDirector(epub_path)
local build_director, err = EpubBuildDirector:new()
if not build_director then
return false, err
end
local success, err = build_director:setDestination(epub_path)
if not success then
return false, err
end
return build_director
end
return DownloadToEpub
Loading…
Cancel
Save