koreader/frontend/util.lua

--[[--
This module contains miscellaneous helper functions for the KOReader frontend.
]]

local BaseUtil = require("ffi/util")
local dbg = require("dbg")
local _ = require("gettext")
local T = BaseUtil.template

local lshift = bit.lshift
local rshift = bit.rshift
local band = bit.band
local bor = bit.bor

local util = {}

--- Strips all punctuation marks and spaces from a string.
---- @string text the string to be stripped
---- @treturn string stripped text
function util.stripPunctuation(text)
    if not text then return end
    -- strip ASCII punctuation marks around text
    -- and strip any generic punctuation marks (U+2000 - U+206F) in the text
    return text:gsub("\226[\128-\131][\128-\191]", ''):gsub("^%p+", ''):gsub("%p+$", '')
end

--[[--
Splits a string by a pattern

Lua doesn't have a string.split() function and most of the time
you don't really need it because string.gmatch() is enough.
However string.gmatch() has one significant disadvantage for me:
You can't split a string while matching both the delimited
strings and the delimiters themselves without tracking positions
and substrings. The gsplit function below takes care of
this problem.

Author: Peter Odding

License: MIT/X11

Source: <a href="http://snippets.luacode.org/snippets/String_splitting_130">http://snippets.luacode.org/snippets/String_splitting_130</a>
]]
----@string str string to split
----@param pattern the pattern to split against
----@bool capture
----@bool capture_empty_entity
function util.gsplit(str, pattern, capture, capture_empty_entity)
    pattern = pattern and tostring(pattern) or '%s+'
    if (''):find(pattern) then
        error('pattern matches empty string!', 2)
    end
    return coroutine.wrap(function()
        local index = 1
        repeat
            local first, last = str:find(pattern, index)
            if first and last then
                if index < first or (index == first and capture_empty_entity) then
                    coroutine.yield(str:sub(index, first - 1))
                end
                if capture then
                    coroutine.yield(str:sub(first, last))
                end
                index = last + 1
            else
                if index <= #str then
                    coroutine.yield(str:sub(index))
                end
                break
            end
        until index > #str
    end)
end

--[[--
Converts seconds to a clock string.

Source: <a href="https://gist.github.com/jesseadams/791673">https://gist.github.com/jesseadams/791673</a>
]]
---- @int seconds number of seconds
---- @bool withoutSeconds if true 00:00, if false 00:00:00
---- @treturn string clock string in the form of 00:00 or 00:00:00
function util.secondsToClock(seconds, withoutSeconds)
    seconds = tonumber(seconds)
    if seconds == 0 or seconds ~= seconds then
        if withoutSeconds then
            return "00:00"
        else
            return "00:00:00"
        end
    else
        local round = withoutSeconds and require("optmath").round or math.floor
        local hours = string.format("%02.f", math.floor(seconds / 3600))
        local mins = string.format("%02.f", round(seconds / 60 - (hours * 60)))
        if mins == "60" then
            mins = string.format("%02.f", 0)
            hours = string.format("%02.f", hours + 1)
        end
        if withoutSeconds then
            return hours .. ":" .. mins
        end
        local secs = string.format("%02.f", math.floor(seconds - hours * 3600 - mins * 60))
        return hours .. ":" .. mins .. ":" .. secs
    end
end

--- Converts seconds to a period of time string.
---- @int seconds number of seconds
---- @bool withoutSeconds if true 1h30', if false 1h30'10''
---- @bool hmsFormat, if true format 1h30m10s
---- @treturn string clock string in the form of 1h30' or 1h30'10''
function util.secondsToHClock(seconds, withoutSeconds, hmsFormat)
    seconds = tonumber(seconds)
    if seconds == 0 then
        if withoutSeconds then
            if hmsFormat then
                return T(_("%1m"), "0")
            else
                return "0'"
            end
        else
            if hmsFormat then
                return T(_("%1s"), "0")
            else
                return "0''"
            end
        end
    elseif seconds < 60 then
        if withoutSeconds and seconds < 30 then
            if hmsFormat then
                return T(_("%1m"), "0")
            else
                return "0'"
            end
        elseif withoutSeconds and seconds >= 30 then
            if hmsFormat then
                return T(_("%1m"), "1")
            else
                return "1'"
            end
        else
            if hmsFormat then
                return T(_("%1m%2s"), "0", string.format("%02.f", seconds))
            else
                return "0'" .. string.format("%02.f", seconds) .. "''"
            end
        end
    else
        local round = withoutSeconds and require("optmath").round or math.floor
        local hours = string.format("%.f", math.floor(seconds / 3600))
        local mins = string.format("%02.f", round(seconds / 60 - (hours * 60)))
        if mins == "60" then
            mins = string.format("%02.f", 0)
            hours = string.format("%.f", hours + 1)
        end
        if withoutSeconds then
            if hours == "0" then
                mins = string.format("%.f", round(seconds / 60))
                if hmsFormat then
                    return T(_("%1m"), mins)
                else
                    return mins .. "'"
                end
            end
            -- @translators This is the 'h' for hour, like in 1h30. This is a duration.
            return T(_("%1h%2"), hours, mins)
        end
        local secs = string.format("%02.f", math.floor(seconds - hours * 3600 - mins * 60))
        if hours == "0" then
            mins = string.format("%.f", round(seconds / 60))
            if hmsFormat then
                -- @translators This is the 'm' for minute and the 's' for second, like in 1m30s. This is a duration.
                return T(_("%1m%2s"), mins, secs)
            else
                return mins .. "'" .. secs .. "''"
            end
        end
        if hmsFormat then
            if secs == "00" then
                -- @translators This is the 'h' for hour and the 'm' for minute, like in 1h30m. This is a duration.
                return T(_("%1h%2m"), hours, mins)
            else
                -- @translators This is the 'h' for hour, the 'm' for minute and the 's' for second, like in 1h30m30s. This is a duration.
                return T(_("%1h%2m%3s"), hours, mins, secs)
            end

        else
            if secs == "00" then
                return T(_("%1h%2'"), hours, mins)
            else
                return T(_("%1h%2'%3''"), hours, mins, secs)
            end
        end
    end
end


--[[--
Compares values in two different tables.

Source: <https://stackoverflow.com/a/32660766/2470572>
]]
---- @param o1 Lua table
---- @param o2 Lua table
---- @bool ignore_mt
---- @treturn boolean
function util.tableEquals(o1, o2, ignore_mt)
    if o1 == o2 then return true end
    local o1Type = type(o1)
    local o2Type = type(o2)
    if o1Type ~= o2Type then return false end
    if o1Type ~= 'table' then return false end

    if not ignore_mt then
        local mt1 = getmetatable(o1)
        if mt1 and mt1.__eq then
            --compare using built in method
            return o1 == o2
        end
    end

    local keySet = {}

    for key1, value1 in pairs(o1) do
        local value2 = o2[key1]
        if value2 == nil or util.tableEquals(value1, value2, ignore_mt) == false then
            return false
        end
        keySet[key1] = true
    end

    for key2, _ in pairs(o2) do
        if not keySet[key2] then return false end
    end
    return true
end

--[[--
Makes a deep copy of a table.

Source: <https://stackoverflow.com/a/16077650/2470572>
]]
---- @param o Lua table
---- @treturn Lua table
function util.tableDeepCopy(o, seen)
  seen = seen or {}
  if o == nil then return nil end
  if seen[o] then return seen[o] end

  local no
  if type(o) == "table" then
    no = {}
    seen[o] = no

    for k, v in next, o, nil do
      no[util.tableDeepCopy(k, seen)] = util.tableDeepCopy(v, seen)
    end
    setmetatable(no, util.tableDeepCopy(getmetatable(o), seen))
  else -- number, string, boolean, etc
    no = o
  end
  return no
end

--- Returns number of keys in a table.
---- @param t Lua table
---- @treturn int number of keys in table t
function util.tableSize(t)
    local count = 0
    for _ in pairs(t) do count = count + 1 end
    return count
end

--- Append all elements from t2 into t1.
---- @param t1 Lua table
---- @param t2 Lua table
function util.arrayAppend(t1, t2)
    for _, v in ipairs(t2) do
        table.insert(t1, v)
    end
end

-- Reverse array elements in-place in table t
---- @param t Lua table
function util.arrayReverse(t)
    local i, j = 1, #t
    while i < j do
        t[i], t[j] = t[j], t[i]
        i = i + 1
        j = j - 1
    end
end

-- Merge t2 into t1, overwriting existing elements if they already exist
-- Probably not safe with nested tables (c.f., https://stackoverflow.com/q/1283388)
---- @param t1 Lua table
---- @param t2 Lua table
function util.tableMerge(t1, t2)
    for k, v in pairs(t2) do
        t1[k] = v
    end
end

--[[--
Gets last index of character in string (i.e., strrchr)

Returns the index within this string of the last occurrence of the specified character
or -1 if the character does not occur.

To find . you need to escape it.
]]
---- @string string
---- @string ch
---- @treturn int last occurrence or -1 if not found
function util.lastIndexOf(string, ch)
    local i = string:match(".*" .. ch .. "()")
    if i == nil then return -1 else return i - 1 end
end

--- Reverse the individual greater-than-single-byte characters
-- @string string to reverse
-- Taken from <https://github.com/blitmap/lua-utf8-simple#utf8reverses>
function util.utf8Reverse(text)
    text = text:gsub('[%z\1-\127\194-\244][\128-\191]*', function (c) return #c > 1 and c:reverse() end)
    return text:reverse()
end

--- Splits string into a list of UTF-8 characters.
---- @string text the string to be split.
---- @treturn table list of UTF-8 chars
function util.splitToChars(text)
    local tab = {}
    if text ~= nil then
        local prevcharcode, charcode = 0
        -- Supports WTF-8 : https://en.wikipedia.org/wiki/UTF-8#WTF-8
        -- a superset of UTF-8, that includes UTF-16 surrogates
        -- in UTF-8 bytes (forbidden in well-formed UTF-8).
        -- We may get that from bad producers or converters.
        -- (luajson, used to decode Wikipedia API json, will not correctly decode
        -- this sample: <span lang=\"got\">\ud800\udf45</span> : single Unicode
        -- char https://www.compart.com/en/unicode/U+10345 and will give us
        -- "\xed\xa0\x80\xed\xbd\x85" as UTF8, instead of the correct "\xf0\x90\x8d\x85")
        -- From http://www.unicode.org/faq/utf_bom.html#utf16-1
        --   Surrogates are code points from two special ranges of
        --   Unicode values, reserved for use as the leading, and
        --   trailing values of paired code units in UTF-16. Leading,
        --   also called high, surrogates are from D800 to DBFF, and
        --   trailing, or low, surrogates are from DC00 to DFFF. They
        --   are called surrogates, since they do not represent
        --   characters directly, but only as a pair.
        local hi_surrogate
        local hi_surrogate_uchar
        for uchar in string.gmatch(text, "([%z\1-\127\194-\244][\128-\191]*)") do
            charcode = BaseUtil.utf8charcode(uchar)
            -- (not sure why we need this prevcharcode check; we could get
            -- charcode=nil with invalid UTF-8, but should we then really
            -- ignore the following charcode ?)
            if prevcharcode then -- utf8
                if charcode and charcode >= 0xD800 and charcode <= 0xDBFF then
                    if hi_surrogate then -- previous unconsumed one, add it even if invalid
                        table.insert(tab, hi_surrogate_uchar)
                    end
                    hi_surrogate = charcode
                    hi_surrogate_uchar = uchar -- will be added if not followed by low surrogate
                elseif hi_surrogate and charcode and charcode >= 0xDC00 and charcode <= 0xDFFF then
                    -- low surrogate following a high surrogate, good, let's make them a single char
                    charcode = lshift((hi_surrogate - 0xD800), 10) + (charcode - 0xDC00) + 0x10000
                    table.insert(tab, util.unicodeCodepointToUtf8(charcode))
                    hi_surrogate = nil
                else
                    if hi_surrogate then -- previous unconsumed one, add it even if invalid
                        table.insert(tab, hi_surrogate_uchar)
                    end
                    hi_surrogate = nil
                    table.insert(tab, uchar)
                end
            end
            prevcharcode = charcode
        end
    end
    return tab
end

--- Tests whether c is a CJK character
---- @string c
---- @treturn boolean true if CJK
function util.isCJKChar(c)
    return string.match(c, "[\228-\234][\128-\191].") == c
end

--- Tests whether str contains CJK characters
---- @string str
---- @treturn boolean true if CJK
function util.hasCJKChar(str)
    return string.match(str, "[\228-\234][\128-\191].") ~= nil
end

--- Split texts into a list of words, spaces and punctuation marks.
---- @string text text to split
---- @treturn table list of words, spaces and punctuation marks
function util.splitToWords(text)
    local wlist = {}
    for word in util.gsplit(text, "[%s%p]+", true) do
        -- if space split word contains CJK characters
        if util.hasCJKChar(word) then
            -- split with CJK characters
            for char in util.gsplit(word, "[\228-\234\192-\255][\128-\191]+", true) do
                table.insert(wlist, char)
            end
        else
            table.insert(wlist, word)
        end
    end
    return wlist
end

-- We don't want to split on a space if it is followed by some
-- specific punctuation marks : e.g. "word :" or "word )"
-- (In French, there is a non-breaking space before a colon, and it better
-- not be wrapped there.)
local non_splittable_space_tailers = ":;,.!?)]}$%=-+*/|<>»”"
-- Same if a space has some specific other punctuation mark before it
local non_splittable_space_leaders = "([{$=-+*/|<>«“"


-- Similar rules exist for CJK text. Taken from :
-- https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

local cjk_non_splittable_tailers = table.concat( {
    -- Simplified Chinese
    "!%),.:;?]}¢°·’\"†‡›℃∶、。〃〆〕〗〞﹚﹜！＂％＇），．：；？！］｝～",
    -- Traditional Chinese
    "!),.:;?]}¢·–—’\"•、。〆〞〕〉》」︰︱︲︳﹐﹑﹒﹓﹔﹕﹖﹘﹚﹜！），．：；？︶︸︺︼︾﹀﹂﹗］｜｝､",
    -- Japanese
    ")]｝〕〉》」』】〙〗〟’\"｠»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。.",
    -- Korean
    "!%),.:;?]}¢°’\"†‡℃〆〈《「『〕！％），．：；？］｝",
})

local cjk_non_splittable_leaders = table.concat( {
    -- Simplified Chinese
    "$(£¥·‘\"〈《「『【〔〖〝﹙﹛＄（．［｛￡￥",
    -- Traditional Chinese
    "([{£¥‘\"‵〈《「『〔〝︴﹙﹛（｛︵︷︹︻︽︿﹁﹃﹏",
    -- Japanese
    "([｛〔〈《「『【〘〖〝‘\"｟«",
    -- Korean
    "$([{£¥‘\"々〇〉》」〔＄（［｛｠￥￦#",
})

local cjk_non_splittable = table.concat( {
    -- Japanese
    "—…‥〳〴〵",
})

--- Test whether a string can be separated by this char for multi-line rendering.
-- Optional next or prev chars may be provided to help make the decision
---- @string c
---- @string next_c
---- @string prev_c
---- @treturn boolean true if splittable, false if not
function util.isSplittable(c, next_c, prev_c)
    if util.isCJKChar(c) then
        -- a CJKChar is a word in itself, and so is splittable
        if cjk_non_splittable:find(c, 1, true) then
            -- except a few of them
            return false
        elseif next_c and cjk_non_splittable_tailers:find(next_c, 1, true) then
            -- but followed by a char that is not permitted at start of line
            return false
        elseif prev_c and cjk_non_splittable_leaders:find(prev_c, 1, true) then
            -- but preceded by a char that is not permitted at end of line
            return false
        else
            -- we can split on this CJKchar
            return true
        end
    elseif c == " " then
        -- we only split on a space (so a punctuation mark sticks to prev word)
        -- if next_c or prev_c is provided, we can make a better decision
        if next_c and non_splittable_space_tailers:find(next_c, 1, true) then
            -- this space is followed by some punctuation mark that is better kept with us
            return false
        elseif prev_c and non_splittable_space_leaders:find(prev_c, 1, true) then
            -- this space is lead by some punctuation mark that is better kept with us
            return false
        else
            -- we can split on this space
            return true
        end
    end
    -- otherwise, not splittable
    return false
end

--- Gets filesystem type of a path.
--
-- Checks if the path occurs in <code>/proc/mounts</code>
---- @string path an absolute path
---- @treturn string filesystem type
function util.getFilesystemType(path)
    local mounts = io.open("/proc/mounts", "r")
    if not mounts then return nil end
    local type
    while true do
        local line
        local mount = {}
        line = mounts:read()
        if line == nil then
            break
        end
        for param in line:gmatch("%S+") do table.insert(mount, param) end
        if string.match(path, mount[2]) then
            type = mount[3]
            if mount[2] ~= '/' then
                break
            end
        end
    end
    mounts:close()
    return type
end

--- Checks if directory is empty.
---- @string path
---- @treturn bool
function util.isEmptyDir(path)
    local lfs = require("libs/libkoreader-lfs")
    -- lfs.dir will crash rather than return nil if directory doesn't exist O_o
    local ok, iter, dir_obj = pcall(lfs.dir, path)
    if not ok then return end
    for filename in iter, dir_obj do
        if filename ~= '.' and filename ~= '..' then
            return false
        end
    end
    return true
end

--- Checks if the given path exists. Doesn't care if it's a file or directory.
---- @string path
---- @treturn bool
function util.pathExists(path)
    local lfs = require("libs/libkoreader-lfs")
    return lfs.attributes(path, "mode") ~= nil
end

--- As `mkdir -p`.
-- Unlike [lfs.mkdir](https://keplerproject.github.io/luafilesystem/manual.html#mkdir)(),
-- does not error if the directory already exists, and creates intermediate directories as needed.
-- @string path the directory to create
-- @treturn bool true on success; nil, err_message on error
function util.makePath(path)
    path = path:gsub("/+$", "")
    if util.pathExists(path) then return true end

    local success, err = util.makePath((util.splitFilePathName(path)))
    if not success then
        return nil, err.." (creating "..path..")"
    end

    local lfs = require("libs/libkoreader-lfs")
    return lfs.mkdir(path)
end

--- Replaces characters that are invalid filenames.
--
-- Replaces the characters <code>\/:*?"<>|</code> with an <code>_</code>.
-- These characters are problematic on Windows filesystems. On Linux only
-- <code>/</code> poses a problem.
---- @string str filename
---- @treturn string sanitized filename
local function replaceAllInvalidChars(str)
    if str then
        return str:gsub('[\\,%/,:,%*,%?,%",%<,%>,%|]','_')
    end
end

--- Replaces slash with an underscore.
---- @string str
---- @treturn string
local function replaceSlashChar(str)
    if str then
        return str:gsub('%/','_')
    end
end

--[[--
Replaces characters that are invalid in filenames.

Replaces the characters `\/:*?"<>|` with an `_` unless an optional path is provided. These characters are problematic on Windows filesystems. On Linux only the `/` poses a problem.

If an optional path is provided, @{util.getFilesystemType}() will be used to determine whether stricter VFAT restrictions should be applied.
]]
---- @string str
---- @string path
---- @int limit
---- @treturn string safe filename
function util.getSafeFilename(str, path, limit, limit_ext)
    local filename, suffix = util.splitFileNameSuffix(str)
    local replaceFunc = replaceAllInvalidChars
    local safe_filename
    -- VFAT supports a maximum of 255 UCS-2 characters, although it's probably treated as UTF-16 by Windows
    -- default to a slightly lower limit just in case
    limit = limit or 240
    limit_ext = limit_ext or 10

    if path then
        local file_system = util.getFilesystemType(path)
        if file_system ~= "vfat" and file_system ~= "fuse.fsp" then
            replaceFunc = replaceSlashChar
        end
    end

    if suffix:len() > limit_ext then
        -- probably not an actual file extension, or at least not one we'd be
        -- dealing with, so strip the whole string
        filename = str
        suffix = nil
    end

    filename = util.htmlToPlainTextIfHtml(filename)
    filename = filename:sub(1, limit)
    -- the limit might result in broken UTF-8, which we don't want in the result
    filename = util.fixUtf8(filename, "")

    if suffix and suffix ~= "" then
        safe_filename = replaceFunc(filename) .. "." .. replaceFunc(suffix)
    else
        safe_filename = replaceFunc(filename)
    end

    return safe_filename
end

--- Splits a file into its directory path and file name.
--- If the given path has a trailing /, returns the entire path as the directory
--- path and "" as the file name.
---- @string file
---- @treturn string path, filename
function util.splitFilePathName(file)
    if file == nil or file == "" then return "", "" end
    if string.find(file, "/") == nil then return "", file end
    return string.gsub(file, "(.*/)(.*)", "%1"), string.gsub(file, ".*/", "")
end

--- Splits a file name into its pure file name and suffix
---- @string file
---- @treturn string path, extension
function util.splitFileNameSuffix(file)
    if file == nil or file == "" then return "", "" end
    if string.find(file, "%.") == nil then return file, "" end
    return string.gsub(file, "(.*)%.(.*)", "%1"), string.gsub(file, ".*%.", "")
end

--- Gets file extension
---- @string filename
---- @treturn string extension
function util.getFileNameSuffix(file)
    local _, suffix = util.splitFileNameSuffix(file)
    return suffix
end

--- Returns true if the file is a script we allow running
--- Basically a helper method to check a specific list of file extensions.
---- @string filename
---- @treturn boolean
function util.isAllowedScript(file)
    local file_ext = string.lower(util.getFileNameSuffix(file))
    if file_ext == "sh"
    or file_ext == "py" then
        return true
    else
        return false
    end
end

--- Companion helper function that returns the script's language,
--- based on the filme extension.
---- @string filename
---- @treturn string (lowercase) (or nil if !isAllowedScript)
function util.getScriptType(file)
    local file_ext = string.lower(util.getFileNameSuffix(file))
    if file_ext == "sh" then
        return "shell"
    elseif file_ext == "py" then
        return "python"
    end
end

--- Gets human friendly size as string
---- @int size (bytes)
---- @bool right_align (by padding with spaces on the left)
---- @treturn string
function util.getFriendlySize(size, right_align)
    local frac_format = right_align and "%6.1f" or "%.1f"
    local deci_format = right_align and "%6d" or "%d"
    size = tonumber(size)
    if not size or type(size) ~= "number" then return end
    if size > 1024*1024*1024 then
        -- @translators This is an abbreviation for the gigabyte, a unit of computer memory or data storage capacity.
        return T(_("%1 GB"), string.format(frac_format, size/1024/1024/1024))
    end
    if size > 1024*1024 then
        -- @translators This is an abbreviation for the megabyte, a unit of computer memory or data storage capacity.
        return T(_("%1 MB"), string.format(frac_format, size/1024/1024))
    end
    if size > 1024 then
        -- @translators This is an abbreviation for the kilobyte, a unit of computer memory or data storage capacity.
        return T(_("%1 KB"), string.format(frac_format, size/1024))
    else
        -- @translators This is an abbreviation for the byte, a unit of computer memory or data storage capacity.
        return T(_("%1 B"), string.format(deci_format, size))
    end
end

--- Gets formatted size as string (1273334 => "1,273,334")
---- @int size (bytes)
---- @treturn string
function util.getFormattedSize(size)
    local s = tostring(size)
    s = s:reverse():gsub("(%d%d%d)", "%1,")
    s = s:reverse():gsub("^,", "")
    return s
end

--[[--
Replaces invalid UTF-8 characters with a replacement string.

Based on <http://notebook.kulchenko.com/programming/fixing-malformed-utf8-in-lua>.
c.f.,    FixUTF8 @ <https://github.com/pkulchenko/ZeroBraneStudio/blob/master/src/util.lua>.

@string str the string to be checked for invalid characters
@string replacement the string to replace invalid characters with
@treturn string valid UTF-8
]]
function util.fixUtf8(str, replacement)
    local pos = 1
    local len = #str
    while pos <= len do
        if     str:find("^[%z\1-\127]", pos) then pos = pos + 1
        elseif str:find("^[\194-\223][\128-\191]", pos) then pos = pos + 2
        elseif str:find(       "^\224[\160-\191][\128-\191]", pos)
            or str:find("^[\225-\236][\128-\191][\128-\191]", pos)
            or str:find(       "^\237[\128-\159][\128-\191]", pos)
            or str:find("^[\238-\239][\128-\191][\128-\191]", pos) then pos = pos + 3
        elseif str:find(       "^\240[\144-\191][\128-\191][\128-\191]", pos)
            or str:find("^[\241-\243][\128-\191][\128-\191][\128-\191]", pos)
            or str:find(       "^\244[\128-\143][\128-\191][\128-\191]", pos) then pos = pos + 4
        else
            str = str:sub(1, pos - 1) .. replacement .. str:sub(pos + 1)
            pos = pos + #replacement
            len = len + #replacement - 1
        end
    end
    return str
end

--- Splits input string with the splitter into a table. This function ignores the last empty entity.
--
--- @string str the string to be split
--- @string splitter
--- @bool capture_empty_entity
--- @treturn an array-like table
function util.splitToArray(str, splitter, capture_empty_entity)
    local result = {}
    for word in util.gsplit(str, splitter, false, capture_empty_entity) do
        table.insert(result, word)
    end
    return result
end

--- Convert a Unicode codepoint (number) to UTF-8 char
--- c.f., <https://stackoverflow.com/a/4609989>
---     & <https://stackoverflow.com/a/38492214>
--- See utf8charcode in ffi/util for a decoder.
--
--- @int c Unicode codepoint
--- @treturn string UTF-8 char
function util.unicodeCodepointToUtf8(c)
    if c < 0x80 then
        return string.char(c)
    elseif c < 0x800 then
        return string.char(
                bor(0xC0, rshift(c, 6)),
                bor(0x80, band(c, 0x3F))
        )
    elseif c < 0x10000 then
        if c >= 0xD800 and c <= 0xDFFF then
            return '<EFBFBD>' -- Surrogates -> U+FFFD REPLACEMENT CHARACTER
        end
        return string.char(
                bor(0xE0, rshift(c, 12)),
                bor(0x80, band(rshift(c, 6), 0x3F)),
                bor(0x80, band(c, 0x3F))
        )
    elseif c < 0x110000 then
        return string.char(
                bor(0xF0, rshift(c, 18)),
                bor(0x80, band(rshift(c, 12), 0x3F)),
                bor(0x80, band(rshift(c, 6), 0x3F)),
                bor(0x80, band(c, 0x3F))
        )
    else
        return '<EFBFBD>' -- Invalid -> U+FFFD REPLACEMENT CHARACTER
    end
end

-- we need to use an array of arrays to keep them ordered as written
local HTML_ENTITIES_TO_UTF8 = {
    {"&lt;", "<"},
    {"&gt;", ">"},
    {"&quot;", '"'},
    {"&apos;", "'"},
    {"&nbsp;", "\xC2\xA0"},
    {"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end},
    {"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x, 16)) end},
    {"&amp;", "&"}, -- must be last
}
--[[--
Replace HTML entities with their UTF-8 encoded equivalent in text.

Supports only basic ones and those with numbers (no support for named entities like `&eacute;`).

@int string text with HTML entities
@treturn string UTF-8 text
]]
function util.htmlEntitiesToUtf8(text)
    for _, t in ipairs(HTML_ENTITIES_TO_UTF8) do
        text = text:gsub(t[1], t[2])
    end
    return text
end

--[[--
Convert simple HTML to plain text.

This may fail on complex HTML (with styles, scripts, comments), but should be fine enough with simple HTML as found in EPUB's `<dc:description>`.

@string text HTML text
@treturn string plain text
]]
function util.htmlToPlainText(text)
    -- Replace <br> and <p> with \n
    text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") -- <br> and <br/>
    text = text:gsub("%s*<%s*p%s*>%s*", "\n") -- <p>
    text = text:gsub("%s*</%s*p%s*>%s*", "\n") -- </p>
    text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone <p/>
    -- Remove all HTML tags
    text = text:gsub("<[^>]*>", "")
    -- Convert HTML entities
    text = util.htmlEntitiesToUtf8(text)
    -- Trim spaces and new lines at start and end
    text = text:gsub("^[\n%s]*", "")
    text = text:gsub("[\n%s]*$", "")
    return text
end

--- Convert HTML to plain text if text seems to be HTML
-- Detection of HTML is simple and may raise false positives
-- or negatives, but seems quite good at guessing content type
-- of text found in EPUB's <dc:description>.
--
--- @string text the string with possibly some HTML
--- @treturn string cleaned text
function util.htmlToPlainTextIfHtml(text)
    local is_html = false
    -- Quick way to check if text is some HTML:
    -- look for html tags
    local _, nb_tags
    _, nb_tags = text:gsub("<%w+.->", "")
    if nb_tags > 0 then
        is_html = true
    else
        -- no <tag> found
        -- but we may meet some text badly/twice encoded html containing "&lt;br&gt;"
        local nb_encoded_tags
        _, nb_encoded_tags = text:gsub("&lt;%a+&gt;", "")
        if nb_encoded_tags > 0 then
            is_html = true
            -- decode one of the two encodes
            text = util.htmlEntitiesToUtf8(text)
        end
    end

    if is_html then
        text = util.htmlToPlainText(text)
    else
        -- if text ends with ]]>, it probably comes from <![CDATA[ .. ]]> that
        -- crengine has extracted correctly, but let the ending tag in, so
        -- let's remove it
        text = text:gsub("]]>%s*$", "")
    end
    return text
end

--- Encode the HTML entities in a string
--- @string text the string to escape
-- Taken from https://github.com/kernelsauce/turbo/blob/e4a35c2e3fb63f07464f8f8e17252bea3a029685/turbo/escape.lua#L58-L70
function util.htmlEscape(text)
    return text:gsub("[}{\">/<'&]", {
        ["&"] = "&amp;",
        ["<"] = "&lt;",
        [">"] = "&gt;",
        ['"'] = "&quot;",
        ["'"] = "&#39;",
        ["/"] = "&#47;",
    })
end

--- Escape list for shell usage
--- @table args the list of arguments to escape
--- @treturn string the escaped and concatenated arguments
function util.shell_escape(args)
    local escaped_args = {}
    for _, arg in ipairs(args) do
        arg = "'" .. arg:gsub("'", "'\\''") .. "'"
        table.insert(escaped_args, arg)
    end
    return table.concat(escaped_args, " ")
end

--- Clear all the elements from a table without reassignment.
--- @table t the table to be cleared
function util.clearTable(t)
    local c = #t
    for i = 0, c do t[i] = nil end
end

--- Encode URL also known as percent-encoding see https://en.wikipedia.org/wiki/Percent-encoding
--- @string text the string to encode
--- @treturn encode string
--- Taken from https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
function util.urlEncode(url)
    local char_to_hex = function(c)
        return string.format("%%%02X", string.byte(c))
    end
    if url == nil then
        return
    end
    url = url:gsub("\n", "\r\n")
    url = url:gsub("([^%w%-%.%_%~%!%*%'%(%)])", char_to_hex)
    return url
end

--- Decode URL (reverse process to util.urlEncode())
--- @string text the string to decode
--- @treturn decode string
--- Taken from https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
function util.urlDecode(url)
    local hex_to_char = function(x)
        return string.char(tonumber(x, 16))
    end
    if url == nil then
        return
    end
    url = url:gsub("%%(%x%x)", hex_to_char)
    return url
end

--- Check lua syntax of string
--- @string text lua code text
--- @treturn string with parsing error, nil if syntax ok
function util.checkLuaSyntax(lua_text)
    local lua_code_ok, err = loadstring(lua_text)
    if lua_code_ok then
        return nil
    end
    -- Replace: [string "blah blah..."]:3: '=' expected near '123'
    -- with: Line 3: '=' expected near '123'
    err = err:gsub("%[string \".-%\"]:", "Line ")
    return err
end

--- Unpack an archive.
-- Extract the contents of an archive, detecting its format by
-- filename extension. Inspired by luarocks archive_unpack()
-- @param archive string: Filename of archive.
-- @param extract_to string: Destination directory.
-- @return boolean or (boolean, string): true on success, false and an error message on failure.
function util.unpackArchive(archive, extract_to)
    dbg.dassert(type(archive) == "string")

    local BD = require("ui/bidi")
    local ok
    if archive:match("%.tar%.bz2$") or archive:match("%.tar%.gz$") or archive:match("%.tar%.lz$") or archive:match("%.tgz$") then
        ok = os.execute(("./tar xf %q -C %q"):format(archive, extract_to))
    else
        return false, T(_("Couldn't extract archive:\n\n%1\n\nUnrecognized filename extension."), BD.filepath(archive))
    end
    if not ok then
        return false, T(_("Extracting archive failed:\n\n%1", BD.filepath(archive)))
    end
    return true
end

-- Simple startsWith / endsWith string helpers
-- c.f., http://lua-users.org/wiki/StringRecipes
-- @param str string: source string
-- @param start string: string to match
-- @return boolean: true on success
function util.stringStartsWith(str, start)
   return str:sub(1, #start) == start
end

-- @param str string: source string
-- @param ending string: string to match
-- @return boolean: true on success
function util.stringEndsWith(str, ending)
   return ending == "" or str:sub(-#ending) == ending
end

return util
-												doc: add documentation build infrastructure

											
										
										
											2016-02-04 18:24:39 +00:00
+								--[[--
-												Doc: miscellaneous improvements.

											
										
										
											2016-12-13 16:06:02 +00:00
+								This module contains miscellaneous helper functions for the KOReader frontend.
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								]]
-												doc: add documentation build infrastructure

											
										
										
											2016-02-04 18:24:39 +00:00
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								local BaseUtil = require("ffi/util")
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								local dbg = require("dbg")
 								local _ = require("gettext")
 								local T = BaseUtil.template
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								local lshift = bit.lshift
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								local rshift = bit.rshift
 								local band = bit.band
 								local bor = bit.bor
-												strip punctuations around word before searching
This should fix #1337.

											
										
										
											2015-02-01 09:40:34 +00:00
+								local util = {}
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								--- Strips all punctuation marks and spaces from a string.
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								---- @string text the string to be stripped
 								---- @treturn string stripped text
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								function util.stripPunctuation(text)
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								    if not text then return end
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								    -- strip ASCII punctuation marks around text
 								    -- and strip any generic punctuation marks (U+2000 - U+206F) in the text
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								    return text:gsub("\226[\128-\131][\128-\191]", ''):gsub("^%p+", ''):gsub("%p+$", '')
-												strip punctuations around word before searching
This should fix #1337.

											
										
										
											2015-02-01 09:40:34 +00:00
+								end
-												util: add some LDoc descriptions

											
										
										
											2017-02-25 17:52:34 +00:00
+								--[[--
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								Splits a string by a pattern
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								Lua doesn't have a string.split() function and most of the time
 								you don't really need it because string.gmatch() is enough.
 								However string.gmatch() has one significant disadvantage for me:
 								You can't split a string while matching both the delimited
 								strings and the delimiters themselves without tracking positions
 								and substrings. The gsplit function below takes care of
 								this problem.
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								Author: Peter Odding
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								License: MIT/X11
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
 								Source: <a href="http://snippets.luacode.org/snippets/String_splitting_130">http://snippets.luacode.org/snippets/String_splitting_130</a>
-												util: add some LDoc descriptions

											
										
										
											2017-02-25 17:52:34 +00:00
+								]]
 								----@string str string to split
 								----@param pattern the pattern to split against
 								----@bool capture
-												Merge various information into systemstat (#2764)

* Merge various information to systemstat
											
										
										
											2017-04-14 19:12:28 +00:00
+								----@bool capture_empty_entity
 								function util.gsplit(str, pattern, capture, capture_empty_entity)
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								    pattern = pattern and tostring(pattern) or '%s+'
 								    if (''):find(pattern) then
 								        error('pattern matches empty string!', 2)
 								    end
 								    return coroutine.wrap(function()
 								        local index = 1
 								        repeat
 								            local first, last = str:find(pattern, index)
 								            if first and last then
-												Merge various information into systemstat (#2764)

* Merge various information to systemstat
											
										
										
											2017-04-14 19:12:28 +00:00
+								                if index < first or (index == first and capture_empty_entity) then
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								                    coroutine.yield(str:sub(index, first - 1))
 								                end
 								                if capture then
 								                    coroutine.yield(str:sub(first, last))
 								                end
 								                index = last + 1
 								            else
 								                if index <= #str then
 								                    coroutine.yield(str:sub(index))
 								                end
 								                break
 								            end
 								        until index > #str
 								    end)
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--[[--
 								Converts seconds to a clock string.
 								Source: <a href="https://gist.github.com/jesseadams/791673">https://gist.github.com/jesseadams/791673</a>
 								]]
-												util: add some LDoc descriptions

											
										
										
											2017-02-25 17:52:34 +00:00
+								---- @int seconds number of seconds
 								---- @bool withoutSeconds if true 00:00, if false 00:00:00
 								---- @treturn string clock string in the form of 00:00 or 00:00:00
-												minor: no need to override function arg with local

											
										
										
											2016-01-03 09:08:26 +00:00
+								function util.secondsToClock(seconds, withoutSeconds)
 								    seconds = tonumber(seconds)
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								    if seconds == 0 or seconds ~= seconds then
 								        if withoutSeconds then
-												[fix] util.secondsToClock 00:60 should be 01:00 (#3371)


											
										
										
											2017-10-18 15:27:27 +00:00
+								            return "00:00"
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								        else
-												[fix] util.secondsToClock 00:60 should be 01:00 (#3371)


											
										
										
											2017-10-18 15:27:27 +00:00
+								            return "00:00:00"
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								        end
 								    else
-												[chore] Rework util spec, rework util.secondsToClock: round seconds to minutes in 00:00 mode + spec

Most of the tests in util_spec were the wrong way around.
It's `assert(expected, given)`.

											
										
										
											2017-10-11 14:38:20 +00:00
+								        local round = withoutSeconds and require("optmath").round or math.floor
-												[fix] util.secondsToClock 00:60 should be 01:00 (#3371)


											
										
										
											2017-10-18 15:27:27 +00:00
+								        local hours = string.format("%02.f", math.floor(seconds / 3600))
 								        local mins = string.format("%02.f", round(seconds / 60 - (hours * 60)))
 								        if mins == "60" then
 								            mins = string.format("%02.f", 0)
 								            hours = string.format("%02.f", hours + 1)
 								        end
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								        if withoutSeconds then
 								            return hours .. ":" .. mins
 								        end
-												[fix] util.secondsToClock 00:60 should be 01:00 (#3371)


											
										
										
											2017-10-18 15:27:27 +00:00
+								        local secs = string.format("%02.f", math.floor(seconds - hours * 3600 - mins * 60))
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								        return hours .. ":" .. mins .. ":" .. secs
 								    end
 								end
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--- Converts seconds to a period of time string.
-												More footer options, default to use icons as prefixes (#5203)

- show icons or letters as prefix of items
- various footer separators
- progress percentage format with decimal digits
- time in 12/24 format
- two duration formats (1:30, 1h30')
- move some options into Settings submenu
											
										
										
											2019-08-16 21:22:58 +00:00
+								---- @int seconds number of seconds
 								---- @bool withoutSeconds if true 1h30', if false 1h30'10''
 								---- @bool hmsFormat, if true format 1h30m10s
 								---- @treturn string clock string in the form of 1h30' or 1h30'10''
 								function util.secondsToHClock(seconds, withoutSeconds, hmsFormat)
 								    seconds = tonumber(seconds)
 								    if seconds == 0 then
 								        if withoutSeconds then
 								            if hmsFormat then
 								                return T(_("%1m"), "0")
 								            else
 								                return "0'"
 								            end
 								        else
 								            if hmsFormat then
 								                return T(_("%1s"), "0")
 								            else
 								                return "0''"
 								            end
 								        end
 								    elseif seconds < 60 then
 								        if withoutSeconds and seconds < 30 then
 								            if hmsFormat then
 								                return T(_("%1m"), "0")
 								            else
 								                return "0'"
 								            end
 								        elseif withoutSeconds and seconds >= 30 then
 								            if hmsFormat then
 								                return T(_("%1m"), "1")
 								            else
 								                return "1'"
 								            end
 								        else
 								            if hmsFormat then
 								                return T(_("%1m%2s"), "0", string.format("%02.f", seconds))
 								            else
 								                return "0'" .. string.format("%02.f", seconds) .. "''"
 								            end
 								        end
 								    else
 								        local round = withoutSeconds and require("optmath").round or math.floor
 								        local hours = string.format("%.f", math.floor(seconds / 3600))
 								        local mins = string.format("%02.f", round(seconds / 60 - (hours * 60)))
 								        if mins == "60" then
 								            mins = string.format("%02.f", 0)
 								            hours = string.format("%.f", hours + 1)
 								        end
 								        if withoutSeconds then
 								            if hours == "0" then
 								                mins = string.format("%.f", round(seconds / 60))
-												fix util.secondsToHClock when hmsFormat is true (#5640)


											
										
										
											2019-11-26 12:28:11 +00:00
+								                if hmsFormat then
 								                    return T(_("%1m"), mins)
 								                else
 								                    return mins .. "'"
 								                end
-												More footer options, default to use icons as prefixes (#5203)

- show icons or letters as prefix of items
- various footer separators
- progress percentage format with decimal digits
- time in 12/24 format
- two duration formats (1:30, 1h30')
- move some options into Settings submenu
											
										
										
											2019-08-16 21:22:58 +00:00
+								            end
-												[i18n] Add translator notes (#5250)

Thanks to <https://github.com/koreader/koreader/pull/5237> we can now  extract the knowledge currently embedded in Transifex and put it directly in our source. This positively affects <https://github.com/koreader/koreader/issues/3754>.

Translation instructions and knowledge that comes out of localization-related questions should be preserved in the source, because Transifex is too ephemeral. For example, the links from <https://github.com/koreader/koreader/pull/2290> are no longer accessible. Even when they are, it's quite useful to have this information around while dealing with the code as well, and I also hope it'll be informative to contributors who seldom visit Transifex.

This commit also makes a few minor changes to obviate the need for comments where possible.
											
										
										
											2019-08-24 07:25:38 +00:00
+								            -- @translators This is the 'h' for hour, like in 1h30. This is a duration.
-												More footer options, default to use icons as prefixes (#5203)

- show icons or letters as prefix of items
- various footer separators
- progress percentage format with decimal digits
- time in 12/24 format
- two duration formats (1:30, 1h30')
- move some options into Settings submenu
											
										
										
											2019-08-16 21:22:58 +00:00
+								            return T(_("%1h%2"), hours, mins)
 								        end
 								        local secs = string.format("%02.f", math.floor(seconds - hours * 3600 - mins * 60))
 								        if hours == "0" then
 								            mins = string.format("%.f", round(seconds / 60))
 								            if hmsFormat then
-												[i18n] Add translator notes (#5250)

Thanks to <https://github.com/koreader/koreader/pull/5237> we can now  extract the knowledge currently embedded in Transifex and put it directly in our source. This positively affects <https://github.com/koreader/koreader/issues/3754>.

Translation instructions and knowledge that comes out of localization-related questions should be preserved in the source, because Transifex is too ephemeral. For example, the links from <https://github.com/koreader/koreader/pull/2290> are no longer accessible. Even when they are, it's quite useful to have this information around while dealing with the code as well, and I also hope it'll be informative to contributors who seldom visit Transifex.

This commit also makes a few minor changes to obviate the need for comments where possible.
											
										
										
											2019-08-24 07:25:38 +00:00
+								                -- @translators This is the 'm' for minute and the 's' for second, like in 1m30s. This is a duration.
-												More footer options, default to use icons as prefixes (#5203)

- show icons or letters as prefix of items
- various footer separators
- progress percentage format with decimal digits
- time in 12/24 format
- two duration formats (1:30, 1h30')
- move some options into Settings submenu
											
										
										
											2019-08-16 21:22:58 +00:00
+								                return T(_("%1m%2s"), mins, secs)
 								            else
 								                return mins .. "'" .. secs .. "''"
 								            end
 								        end
 								        if hmsFormat then
 								            if secs == "00" then
-												[i18n] Add translator notes (#5250)

Thanks to <https://github.com/koreader/koreader/pull/5237> we can now  extract the knowledge currently embedded in Transifex and put it directly in our source. This positively affects <https://github.com/koreader/koreader/issues/3754>.

Translation instructions and knowledge that comes out of localization-related questions should be preserved in the source, because Transifex is too ephemeral. For example, the links from <https://github.com/koreader/koreader/pull/2290> are no longer accessible. Even when they are, it's quite useful to have this information around while dealing with the code as well, and I also hope it'll be informative to contributors who seldom visit Transifex.

This commit also makes a few minor changes to obviate the need for comments where possible.
											
										
										
											2019-08-24 07:25:38 +00:00
+								                -- @translators This is the 'h' for hour and the 'm' for minute, like in 1h30m. This is a duration.
-												More footer options, default to use icons as prefixes (#5203)

- show icons or letters as prefix of items
- various footer separators
- progress percentage format with decimal digits
- time in 12/24 format
- two duration formats (1:30, 1h30')
- move some options into Settings submenu
											
										
										
											2019-08-16 21:22:58 +00:00
+								                return T(_("%1h%2m"), hours, mins)
 								            else
-												[i18n] Add translator notes (#5250)

Thanks to <https://github.com/koreader/koreader/pull/5237> we can now  extract the knowledge currently embedded in Transifex and put it directly in our source. This positively affects <https://github.com/koreader/koreader/issues/3754>.

Translation instructions and knowledge that comes out of localization-related questions should be preserved in the source, because Transifex is too ephemeral. For example, the links from <https://github.com/koreader/koreader/pull/2290> are no longer accessible. Even when they are, it's quite useful to have this information around while dealing with the code as well, and I also hope it'll be informative to contributors who seldom visit Transifex.

This commit also makes a few minor changes to obviate the need for comments where possible.
											
										
										
											2019-08-24 07:25:38 +00:00
+								                -- @translators This is the 'h' for hour, the 'm' for minute and the 's' for second, like in 1h30m30s. This is a duration.
-												More footer options, default to use icons as prefixes (#5203)

- show icons or letters as prefix of items
- various footer separators
- progress percentage format with decimal digits
- time in 12/24 format
- two duration formats (1:30, 1h30')
- move some options into Settings submenu
											
										
										
											2019-08-16 21:22:58 +00:00
+								                return T(_("%1h%2m%3s"), hours, mins, secs)
 								            end
 								        else
 								            if secs == "00" then
 								                return T(_("%1h%2'"), hours, mins)
 								            else
 								                return T(_("%1h%2'%3''"), hours, mins, secs)
 								            end
 								        end
 								    end
 								end
-												[feat] Add ReaderBack (#3821)

This implements a reasonable facsimile of going back on Android.

The back button first goes back in a history of visited pages.
When there's no history left, it closes the app.

Fixes #3816.
											
										
										
											2018-03-31 19:19:31 +00:00
+								--[[--
 								Compares values in two different tables.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								Source: <https://stackoverflow.com/a/32660766/2470572>
-												[feat] Add ReaderBack (#3821)

This implements a reasonable facsimile of going back on Android.

The back button first goes back in a history of visited pages.
When there's no history left, it closes the app.

Fixes #3816.
											
										
										
											2018-03-31 19:19:31 +00:00
+								]]
 								---- @param o1 Lua table
 								---- @param o2 Lua table
 								---- @bool ignore_mt
 								---- @treturn boolean
 								function util.tableEquals(o1, o2, ignore_mt)
 								    if o1 == o2 then return true end
 								    local o1Type = type(o1)
 								    local o2Type = type(o2)
 								    if o1Type ~= o2Type then return false end
 								    if o1Type ~= 'table' then return false end
 								    if not ignore_mt then
 								        local mt1 = getmetatable(o1)
 								        if mt1 and mt1.__eq then
 								            --compare using built in method
 								            return o1 == o2
 								        end
 								    end
 								    local keySet = {}
 								    for key1, value1 in pairs(o1) do
 								        local value2 = o2[key1]
 								        if value2 == nil or util.tableEquals(value1, value2, ignore_mt) == false then
 								            return false
 								        end
 								        keySet[key1] = true
 								    end
 								    for key2, _ in pairs(o2) do
 								        if not keySet[key2] then return false end
 								    end
 								    return true
 								end
-												[fix] GestureDetector: deep copies of events for multiswipes when rotated (#4728)

Fixes #4724.
											
										
										
											2019-03-04 18:01:01 +00:00
+								--[[--
 								Makes a deep copy of a table.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								Source: <https://stackoverflow.com/a/16077650/2470572>
-												[fix] GestureDetector: deep copies of events for multiswipes when rotated (#4728)

Fixes #4724.
											
										
										
											2019-03-04 18:01:01 +00:00
+								]]
 								---- @param o Lua table
 								---- @treturn Lua table
 								function util.tableDeepCopy(o, seen)
 								  seen = seen or {}
 								  if o == nil then return nil end
 								  if seen[o] then return seen[o] end
 								  local no
 								  if type(o) == "table" then
 								    no = {}
 								    seen[o] = no
 								    for k, v in next, o, nil do
 								      no[util.tableDeepCopy(k, seen)] = util.tableDeepCopy(v, seen)
 								    end
 								    setmetatable(no, util.tableDeepCopy(getmetatable(o), seen))
 								  else -- number, string, boolean, etc
 								    no = o
 								  end
 								  return no
 								end
-												doc: add documentation build infrastructure

											
										
										
											2016-02-04 18:24:39 +00:00
+								--- Returns number of keys in a table.
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								---- @param t Lua table
 								---- @treturn int number of keys in table t
 								function util.tableSize(t)
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								    local count = 0
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								    for _ in pairs(t) do count = count + 1 end
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								    return count
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Append all elements from t2 into t1.
 								---- @param t1 Lua table
 								---- @param t2 Lua table
-												feat(util): add array.Append helper

											
										
										
											2016-01-31 22:23:44 +00:00
+								function util.arrayAppend(t1, t2)
-												#1710 FR: Add support of statistics plugin for pdf

											
										
										
											2016-02-12 14:55:02 +00:00
+								    for _, v in ipairs(t2) do
-												feat(util): add array.Append helper

											
										
										
											2016-01-31 22:23:44 +00:00
+								        table.insert(t1, v)
 								    end
 								end
-												[RTL UI] update low-level widgets to handle mirroring

These updated low-level widgets will handle 90%
of the needed UI mirroring.

											
										
										
											2019-12-06 21:55:37 +00:00
+								-- Reverse array elements in-place in table t
 								---- @param t Lua table
 								function util.arrayReverse(t)
 								    local i, j = 1, #t
 								    while i < j do
 								        t[i], t[j] = t[j], t[i]
 								        i = i + 1
 								        j = j - 1
 								    end
 								end
-												Some BookStatus QoL tweaks (#5100)

* Make the cover thumbnail respect the cover's AR in the widget
* Add a "Mark as read/unread" button in the FM's longpress menu.
* Make sure the cover_info cache is wiped if necessary (sidecar purge/BookInfo cache clear).

											
										
										
											2019-06-28 02:46:16 +00:00
+								-- Merge t2 into t1, overwriting existing elements if they already exist
 								-- Probably not safe with nested tables (c.f., https://stackoverflow.com/q/1283388)
 								---- @param t1 Lua table
 								---- @param t2 Lua table
 								function util.tableMerge(t1, t2)
 								    for k, v in pairs(t2) do
 								        t1[k] = v
 								    end
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--[[--
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								Gets last index of character in string (i.e., strrchr)
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
 								Returns the index within this string of the last occurrence of the specified character
 								or -1 if the character does not occur.
 								To find . you need to escape it.
 								]]
 								---- @string string
 								---- @string ch
 								---- @treturn int last occurrence or -1 if not found
-												#1710 FR: Add support of statistics plugin for pdf

											
										
										
											2016-02-12 14:55:02 +00:00
+								function util.lastIndexOf(string, ch)
 								    local i = string:match(".*" .. ch .. "()")
 								    if i == nil then return -1 else return i - 1 end
 								end
-												Fix folder path truncation in filemanager with utf8 chars (#3599)



											
										
										
											2018-01-13 23:05:05 +00:00
+								--- Reverse the individual greater-than-single-byte characters
 								-- @string string to reverse
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								-- Taken from <https://github.com/blitmap/lua-utf8-simple#utf8reverses>
-												Fix folder path truncation in filemanager with utf8 chars (#3599)



											
										
										
											2018-01-13 23:05:05 +00:00
+								function util.utf8Reverse(text)
 								    text = text:gsub('[%z\1-\127\194-\244][\128-\191]*', function (c) return #c > 1 and c:reverse() end)
 								    return text:reverse()
 								end
-												add cursor functionality

											
										
										
											2016-04-21 14:13:10 +00:00
-												Doc: miscellaneous improvements.

											
										
										
											2016-12-13 16:06:02 +00:00
+								--- Splits string into a list of UTF-8 characters.
 								---- @string text the string to be split.
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								---- @treturn table list of UTF-8 chars
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								function util.splitToChars(text)
 								    local tab = {}
 								    if text ~= nil then
 								        local prevcharcode, charcode = 0
-												util.splitToChars(): supports text encoded in WTF-8

https://en.wikipedia.org/wiki/UTF-8#WTF-8
WTF-8 is a superset of UTF-8, that includes UTF-16 surrogates
in UTF-8 bytes (forbidden in well-formed UTF-8).
We may get UTF-8 with these from bad producers or converters.

We can get such chars in the text we get from Wikipedia API once
their (fully valid) JSON has been decoded by our lpeg-based JSON
decoder (which is a defect, hard to fix). (Our other pure-Lua json
decoder has no problem and do that correctly).
We might also find these WTF-8 in some dictionaries, so let's
support them.

											
										
										
											2019-01-15 17:36:33 +00:00
+								        -- Supports WTF-8 : https://en.wikipedia.org/wiki/UTF-8#WTF-8
 								        -- a superset of UTF-8, that includes UTF-16 surrogates
 								        -- in UTF-8 bytes (forbidden in well-formed UTF-8).
 								        -- We may get that from bad producers or converters.
 								        -- (luajson, used to decode Wikipedia API json, will not correctly decode
 								        -- this sample: <span lang=\"got\">\ud800\udf45</span> : single Unicode
 								        -- char https://www.compart.com/en/unicode/U+10345 and will give us
 								        -- "\xed\xa0\x80\xed\xbd\x85" as UTF8, instead of the correct "\xf0\x90\x8d\x85")
 								        -- From http://www.unicode.org/faq/utf_bom.html#utf16-1
 								        --   Surrogates are code points from two special ranges of
 								        --   Unicode values, reserved for use as the leading, and
 								        --   trailing values of paired code units in UTF-16. Leading,
 								        --   also called high, surrogates are from D800 to DBFF, and
 								        --   trailing, or low, surrogates are from DC00 to DFFF. They
 								        --   are called surrogates, since they do not represent
 								        --   characters directly, but only as a pair.
 								        local hi_surrogate
 								        local hi_surrogate_uchar
-												[chore] util: deprecated string.gfind to string.gmatch
											
										
										
											2017-08-14 20:30:42 +00:00
+								        for uchar in string.gmatch(text, "([%z\1-\127\194-\244][\128-\191]*)") do
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								            charcode = BaseUtil.utf8charcode(uchar)
-												util.splitToChars(): supports text encoded in WTF-8

https://en.wikipedia.org/wiki/UTF-8#WTF-8
WTF-8 is a superset of UTF-8, that includes UTF-16 surrogates
in UTF-8 bytes (forbidden in well-formed UTF-8).
We may get UTF-8 with these from bad producers or converters.

We can get such chars in the text we get from Wikipedia API once
their (fully valid) JSON has been decoded by our lpeg-based JSON
decoder (which is a defect, hard to fix). (Our other pure-Lua json
decoder has no problem and do that correctly).
We might also find these WTF-8 in some dictionaries, so let's
support them.

											
										
										
											2019-01-15 17:36:33 +00:00
+								            -- (not sure why we need this prevcharcode check; we could get
 								            -- charcode=nil with invalid UTF-8, but should we then really
 								            -- ignore the following charcode ?)
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								            if prevcharcode then -- utf8
-												util.splitToChars(): supports text encoded in WTF-8

https://en.wikipedia.org/wiki/UTF-8#WTF-8
WTF-8 is a superset of UTF-8, that includes UTF-16 surrogates
in UTF-8 bytes (forbidden in well-formed UTF-8).
We may get UTF-8 with these from bad producers or converters.

We can get such chars in the text we get from Wikipedia API once
their (fully valid) JSON has been decoded by our lpeg-based JSON
decoder (which is a defect, hard to fix). (Our other pure-Lua json
decoder has no problem and do that correctly).
We might also find these WTF-8 in some dictionaries, so let's
support them.

											
										
										
											2019-01-15 17:36:33 +00:00
+								                if charcode and charcode >= 0xD800 and charcode <= 0xDBFF then
 								                    if hi_surrogate then -- previous unconsumed one, add it even if invalid
 								                        table.insert(tab, hi_surrogate_uchar)
 								                    end
 								                    hi_surrogate = charcode
 								                    hi_surrogate_uchar = uchar -- will be added if not followed by low surrogate
 								                elseif hi_surrogate and charcode and charcode >= 0xDC00 and charcode <= 0xDFFF then
 								                    -- low surrogate following a high surrogate, good, let's make them a single char
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								                    charcode = lshift((hi_surrogate - 0xD800), 10) + (charcode - 0xDC00) + 0x10000
-												util.splitToChars(): supports text encoded in WTF-8

https://en.wikipedia.org/wiki/UTF-8#WTF-8
WTF-8 is a superset of UTF-8, that includes UTF-16 surrogates
in UTF-8 bytes (forbidden in well-formed UTF-8).
We may get UTF-8 with these from bad producers or converters.

We can get such chars in the text we get from Wikipedia API once
their (fully valid) JSON has been decoded by our lpeg-based JSON
decoder (which is a defect, hard to fix). (Our other pure-Lua json
decoder has no problem and do that correctly).
We might also find these WTF-8 in some dictionaries, so let's
support them.

											
										
										
											2019-01-15 17:36:33 +00:00
+								                    table.insert(tab, util.unicodeCodepointToUtf8(charcode))
 								                    hi_surrogate = nil
 								                else
 								                    if hi_surrogate then -- previous unconsumed one, add it even if invalid
 								                        table.insert(tab, hi_surrogate_uchar)
 								                    end
 								                    hi_surrogate = nil
 								                    table.insert(tab, uchar)
 								                end
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								            end
 								            prevcharcode = charcode
-												add cursor functionality

											
										
										
											2016-04-21 14:13:10 +00:00
+								        end
 								    end
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								    return tab
-												add cursor functionality

											
										
										
											2016-04-21 14:13:10 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Tests whether c is a CJK character
 								---- @string c
 								---- @treturn boolean true if CJK
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								function util.isCJKChar(c)
 								    return string.match(c, "[\228-\234][\128-\191].") == c
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Tests whether str contains CJK characters
 								---- @string str
 								---- @treturn boolean true if CJK
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								function util.hasCJKChar(str)
 								    return string.match(str, "[\228-\234][\128-\191].") ~= nil
 								end
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								--- Split texts into a list of words, spaces and punctuation marks.
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								---- @string text text to split
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								---- @treturn table list of words, spaces and punctuation marks
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								function util.splitToWords(text)
 								    local wlist = {}
-												split accient greek words with spacing character
This should fix #1705.

											
										
										
											2016-06-28 15:50:21 +00:00
+								    for word in util.gsplit(text, "[%s%p]+", true) do
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								        -- if space split word contains CJK characters
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								        if util.hasCJKChar(word) then
-												split accient greek words with spacing character
This should fix #1705.

											
										
										
											2016-06-28 15:50:21 +00:00
+								            -- split with CJK characters
 								            for char in util.gsplit(word, "[\228-\234\192-\255][\128-\191]+", true) do
 								                table.insert(wlist, char)
 								            end
 								        else
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								            table.insert(wlist, word)
 								        end
 								    end
 								    return wlist
 								end
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								-- We don't want to split on a space if it is followed by some
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								-- specific punctuation marks : e.g. "word :" or "word )"
 								-- (In French, there is a non-breaking space before a colon, and it better
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								-- not be wrapped there.)
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local non_splittable_space_tailers = ":;,.!?)]}$%=-+*/|<>»”"
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								-- Same if a space has some specific other punctuation mark before it
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local non_splittable_space_leaders = "([{$=-+*/|<>«“"
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
 								-- Similar rules exist for CJK text. Taken from :
 								-- https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local cjk_non_splittable_tailers = table.concat( {
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								    -- Simplified Chinese
 								    "!%),.:;?]}¢°·’\"†‡›℃∶、。〃〆〕〗〞﹚﹜！＂％＇），．：；？！］｝～",
 								    -- Traditional Chinese
 								    "!),.:;?]}¢·–—’\"•、。〆〞〕〉》」︰︱︲︳﹐﹑﹒﹓﹔﹕﹖﹘﹚﹜！），．：；？︶︸︺︼︾﹀﹂﹗］｜｝､",
 								    -- Japanese
 								    ")]｝〕〉》」』】〙〗〟’\"｠»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。.",
 								    -- Korean
 								    "!%),.:;?]}¢°’\"†‡℃〆〈《「『〕！％），．：；？］｝",
 								})
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local cjk_non_splittable_leaders = table.concat( {
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								    -- Simplified Chinese
 								    "$(£¥·‘\"〈《「『【〔〖〝﹙﹛＄（．［｛￡￥",
 								    -- Traditional Chinese
 								    "([{£¥‘\"‵〈《「『〔〝︴﹙﹛（｛︵︷︹︻︽︿﹁﹃﹏",
 								    -- Japanese
 								    "([｛〔〈《「『【〘〖〝‘\"｟«",
 								    -- Korean
 								    "$([{£¥‘\"々〇〉》」〔＄（［｛｠￥￦#",
 								})
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local cjk_non_splittable = table.concat( {
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								    -- Japanese
 								    "—…‥〳〴〵",
 								})
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Test whether a string can be separated by this char for multi-line rendering.
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								-- Optional next or prev chars may be provided to help make the decision
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								---- @string c
 								---- @string next_c
 								---- @string prev_c
 								---- @treturn boolean true if splittable, false if not
 								function util.isSplittable(c, next_c, prev_c)
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								    if util.isCJKChar(c) then
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        -- a CJKChar is a word in itself, and so is splittable
 								        if cjk_non_splittable:find(c, 1, true) then
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								            -- except a few of them
 								            return false
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        elseif next_c and cjk_non_splittable_tailers:find(next_c, 1, true) then
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								            -- but followed by a char that is not permitted at start of line
 								            return false
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        elseif prev_c and cjk_non_splittable_leaders:find(prev_c, 1, true) then
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								            -- but preceded by a char that is not permitted at end of line
 								            return false
 								        else
 								            -- we can split on this CJKchar
 								            return true
 								        end
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								    elseif c == " " then
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								        -- we only split on a space (so a punctuation mark sticks to prev word)
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								        -- if next_c or prev_c is provided, we can make a better decision
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        if next_c and non_splittable_space_tailers:find(next_c, 1, true) then
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								            -- this space is followed by some punctuation mark that is better kept with us
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								            return false
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        elseif prev_c and non_splittable_space_leaders:find(prev_c, 1, true) then
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								            -- this space is lead by some punctuation mark that is better kept with us
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								            return false
 								        else
 								            -- we can split on this space
 								            return true
 								        end
 								    end
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								    -- otherwise, not splittable
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								    return false
-												add cursor functionality

											
										
										
											2016-04-21 14:13:10 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Gets filesystem type of a path.
 								--
 								-- Checks if the path occurs in <code>/proc/mounts</code>
 								---- @string path an absolute path
 								---- @treturn string filesystem type
-												Sanitize filename for vfat, fix #2433 (#2464)


											
										
										
											2017-01-10 00:05:15 +00:00
+								function util.getFilesystemType(path)
 								    local mounts = io.open("/proc/mounts", "r")
 								    if not mounts then return nil end
 								    local type
 								    while true do
 								        local line
 								        local mount = {}
 								        line = mounts:read()
 								        if line == nil then
 								            break
 								        end
 								        for param in line:gmatch("%S+") do table.insert(mount, param) end
 								        if string.match(path, mount[2]) then
 								            type = mount[3]
 								            if mount[2] ~= '/' then
 								                break
 								            end
 								        end
 								    end
 								    mounts:close()
 								    return type
 								end
-												ReaderDictionary: add info message about installing dictionaries

Fixes #2816.

											
										
										
											2017-04-26 06:12:25 +00:00
+								--- Checks if directory is empty.
 								---- @string path
 								---- @treturn bool
 								function util.isEmptyDir(path)
-												util.lua: fix lfs require in isEmptyDir()

Pointed out by @poire-z https://github.com/koreader/koreader/commit/ed7937f65d0891f54491be8f77f9864edaaad460#commitcomment-21956632
											
										
										
											2017-04-29 06:57:50 +00:00
+								    local lfs = require("libs/libkoreader-lfs")
-												[fix] util.isEmptyDir() crashes on non-existent dir

lfs.dir will crash rather than return nil if directory doesn't exist

Proper fix for 9f5e44670183d046054f87b4820bf5d1b35b3b12 which is nothing but a workaround. However, I do think creating more of those data dirs automatically is more user-friendly because otherwise Android users will have to look it up or guess.

											
										
										
											2017-08-12 13:01:59 +00:00
+								    -- lfs.dir will crash rather than return nil if directory doesn't exist O_o
 								    local ok, iter, dir_obj = pcall(lfs.dir, path)
 								    if not ok then return end
 								    for filename in iter, dir_obj do
-												ReaderDictionary: add info message about installing dictionaries

Fixes #2816.

											
										
										
											2017-04-26 06:12:25 +00:00
+								        if filename ~= '.' and filename ~= '..' then
 								            return false
 								        end
 								    end
 								    return true
 								end
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								--- Checks if the given path exists. Doesn't care if it's a file or directory.
 								---- @string path
 								---- @treturn bool
 								function util.pathExists(path)
 								    local lfs = require("libs/libkoreader-lfs")
 								    return lfs.attributes(path, "mode") ~= nil
 								end
 								--- As `mkdir -p`.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								-- Unlike [lfs.mkdir](https://keplerproject.github.io/luafilesystem/manual.html#mkdir)(),
 								-- does not error if the directory already exists, and creates intermediate directories as needed.
 								-- @string path the directory to create
 								-- @treturn bool true on success; nil, err_message on error
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								function util.makePath(path)
 								    path = path:gsub("/+$", "")
 								    if util.pathExists(path) then return true end
 								    local success, err = util.makePath((util.splitFilePathName(path)))
 								    if not success then
 								        return nil, err.." (creating "..path..")"
 								    end
 								    local lfs = require("libs/libkoreader-lfs")
 								    return lfs.mkdir(path)
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Replaces characters that are invalid filenames.
 								--
 								-- Replaces the characters <code>\/:*?"<>|</code> with an <code>_</code>.
 								-- These characters are problematic on Windows filesystems. On Linux only
 								-- <code>/</code> poses a problem.
 								---- @string str filename
 								---- @treturn string sanitized filename
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								local function replaceAllInvalidChars(str)
-												Fix crash when downloading books with unknown characters in title (#2628)


											
										
										
											2017-03-15 07:59:42 +00:00
+								    if str then
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
+								        return str:gsub('[\\,%/,:,%*,%?,%",%<,%>,%|]','_')
-												Fix crash when downloading books with unknown characters in title (#2628)


											
										
										
											2017-03-15 07:59:42 +00:00
+								    end
-												Sanitize filename for vfat, fix #2433 (#2464)


											
										
										
											2017-01-10 00:05:15 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Replaces slash with an underscore.
 								---- @string str
 								---- @treturn string
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								local function replaceSlashChar(str)
-												Fix crash when downloading books with unknown characters in title (#2628)


											
										
										
											2017-03-15 07:59:42 +00:00
+								    if str then
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
+								        return str:gsub('%/','_')
-												Fix crash when downloading books with unknown characters in title (#2628)


											
										
										
											2017-03-15 07:59:42 +00:00
+								    end
-												Sanitize filename for vfat, fix #2433 (#2464)


											
										
										
											2017-01-10 00:05:15 +00:00
+								end
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--[[--
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								Replaces characters that are invalid in filenames.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
 								Replaces the characters `\/:*?"<>|` with an `_` unless an optional path is provided. These characters are problematic on Windows filesystems. On Linux only the `/` poses a problem.
 								If an optional path is provided, @{util.getFilesystemType}() will be used to determine whether stricter VFAT restrictions should be applied.
 								]]
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								---- @string str
 								---- @string path
 								---- @int limit
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								---- @treturn string safe filename
-												[fix] util.getSafeFilename() maximum extension length (#5067)

Strip HTML and do some semi-intelligent detection of faux extensions (i.e., more than 10 characters probably isn't one).

Fixes #5049.
											
										
										
											2019-06-10 15:06:13 +00:00
+								function util.getSafeFilename(str, path, limit, limit_ext)
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								    local filename, suffix = util.splitFileNameSuffix(str)
-												[fix] util.getSafeFilename() default to worst-case scenario (#5036)

Also pass path from Wallabag plugin.

Fixes #5025.
											
										
										
											2019-05-22 09:34:46 +00:00
+								    local replaceFunc = replaceAllInvalidChars
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								    local safe_filename
 								    -- VFAT supports a maximum of 255 UCS-2 characters, although it's probably treated as UTF-16 by Windows
 								    -- default to a slightly lower limit just in case
 								    limit = limit or 240
-												[fix] util.getSafeFilename() maximum extension length (#5067)

Strip HTML and do some semi-intelligent detection of faux extensions (i.e., more than 10 characters probably isn't one).

Fixes #5049.
											
										
										
											2019-06-10 15:06:13 +00:00
+								    limit_ext = limit_ext or 10
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
 								    if path then
 								        local file_system = util.getFilesystemType(path)
-												[fix] util.getSafeFilename() default to worst-case scenario (#5036)

Also pass path from Wallabag plugin.

Fixes #5025.
											
										
										
											2019-05-22 09:34:46 +00:00
+								        if file_system ~= "vfat" and file_system ~= "fuse.fsp" then
 								            replaceFunc = replaceSlashChar
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								        end
 								    end
-												[fix] util.getSafeFilename() maximum extension length (#5067)

Strip HTML and do some semi-intelligent detection of faux extensions (i.e., more than 10 characters probably isn't one).

Fixes #5049.
											
										
										
											2019-06-10 15:06:13 +00:00
+								    if suffix:len() > limit_ext then
 								        -- probably not an actual file extension, or at least not one we'd be
 								        -- dealing with, so strip the whole string
 								        filename = str
 								        suffix = nil
 								    end
 								    filename = util.htmlToPlainTextIfHtml(filename)
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								    filename = filename:sub(1, limit)
 								    -- the limit might result in broken UTF-8, which we don't want in the result
 								    filename = util.fixUtf8(filename, "")
 								    if suffix and suffix ~= "" then
 								        safe_filename = replaceFunc(filename) .. "." .. replaceFunc(suffix)
 								    else
 								        safe_filename = replaceFunc(filename)
 								    end
 								    return safe_filename
 								end
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								--- Splits a file into its directory path and file name.
 								--- If the given path has a trailing /, returns the entire path as the directory
 								--- path and "" as the file name.
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								---- @string file
 								---- @treturn string path, filename
-												evernote: ReadHistory integration and text file output (#2498)



											
										
										
											2017-01-21 09:32:42 +00:00
+								function util.splitFilePathName(file)
 								    if file == nil or file == "" then return "", "" end
 								    if string.find(file, "/") == nil then return "", file end
 								    return string.gsub(file, "(.*/)(.*)", "%1"), string.gsub(file, ".*/", "")
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Splits a file name into its pure file name and suffix
 								---- @string file
 								---- @treturn string path, extension
-												evernote: ReadHistory integration and text file output (#2498)



											
										
										
											2017-01-21 09:32:42 +00:00
+								function util.splitFileNameSuffix(file)
 								    if file == nil or file == "" then return "", "" end
 								    if string.find(file, "%.") == nil then return file, "" end
 								    return string.gsub(file, "(.*)%.(.*)", "%1"), string.gsub(file, ".*%.", "")
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Gets file extension
 								---- @string filename
 								---- @treturn string extension
-												More sorting orders in file manager (#2535)


											
										
										
											2017-02-12 02:55:31 +00:00
+								function util.getFileNameSuffix(file)
 								    local _, suffix = util.splitFileNameSuffix(file)
 								    return suffix
 								end
-												Allow running shell scripts from the FileManager/Favorites (#5804)

* Allow running Shell/Python scripts from the FM

* Show an InfoMessage before/after running the script

Since we're blocking the UI ;).

* Allow running scripts from the favorites menu, too.
											
										
										
											2020-02-03 19:08:18 +00:00
+								--- Returns true if the file is a script we allow running
 								--- Basically a helper method to check a specific list of file extensions.
 								---- @string filename
 								---- @treturn boolean
 								function util.isAllowedScript(file)
 								    local file_ext = string.lower(util.getFileNameSuffix(file))
 								    if file_ext == "sh"
 								    or file_ext == "py" then
 								        return true
 								    else
 								        return false
 								    end
 								end
 								--- Companion helper function that returns the script's language,
 								--- based on the filme extension.
 								---- @string filename
 								---- @treturn string (lowercase) (or nil if !isAllowedScript)
 								function util.getScriptType(file)
 								    local file_ext = string.lower(util.getFileNameSuffix(file))
 								    if file_ext == "sh" then
 								        return "shell"
 								    elseif file_ext == "py" then
 								        return "python"
 								    end
 								end
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								--- Gets human friendly size as string
 								---- @int size (bytes)
-												util.getFriendlySize(): add option to right align

Left align by default, but allow right alignment by
padding left with spaces.

											
										
										
											2019-12-17 12:00:35 +00:00
+								---- @bool right_align (by padding with spaces on the left)
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								---- @treturn string
-												util.getFriendlySize(): add option to right align

Left align by default, but allow right alignment by
padding left with spaces.

											
										
										
											2019-12-17 12:00:35 +00:00
+								function util.getFriendlySize(size, right_align)
 								    local frac_format = right_align and "%6.1f" or "%.1f"
 								    local deci_format = right_align and "%6d" or "%d"
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								    size = tonumber(size)
-												[spec] util_spec: tests for util.getFriendlySize() (#3650)


											
										
										
											2018-01-31 16:22:34 +00:00
+								    if not size or type(size) ~= "number" then return end
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								    if size > 1024*1024*1024 then
-												Translate file size unit (#5651)

Close #5649
											
										
										
											2019-11-28 22:22:07 +00:00
+								        -- @translators This is an abbreviation for the gigabyte, a unit of computer memory or data storage capacity.
-												util.getFriendlySize(): add option to right align

Left align by default, but allow right alignment by
padding left with spaces.

											
										
										
											2019-12-17 12:00:35 +00:00
+								        return T(_("%1 GB"), string.format(frac_format, size/1024/1024/1024))
-												Translate file size unit (#5651)

Close #5649
											
										
										
											2019-11-28 22:22:07 +00:00
+								    end
 								    if size > 1024*1024 then
 								        -- @translators This is an abbreviation for the megabyte, a unit of computer memory or data storage capacity.
-												util.getFriendlySize(): add option to right align

Left align by default, but allow right alignment by
padding left with spaces.

											
										
										
											2019-12-17 12:00:35 +00:00
+								        return T(_("%1 MB"), string.format(frac_format, size/1024/1024))
-												Translate file size unit (#5651)

Close #5649
											
										
										
											2019-11-28 22:22:07 +00:00
+								    end
 								    if size > 1024 then
 								        -- @translators This is an abbreviation for the kilobyte, a unit of computer memory or data storage capacity.
-												util.getFriendlySize(): add option to right align

Left align by default, but allow right alignment by
padding left with spaces.

											
										
										
											2019-12-17 12:00:35 +00:00
+								        return T(_("%1 KB"), string.format(frac_format, size/1024))
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								    else
-												Translate file size unit (#5651)

Close #5649
											
										
										
											2019-11-28 22:22:07 +00:00
+								        -- @translators This is an abbreviation for the byte, a unit of computer memory or data storage capacity.
-												util.getFriendlySize(): add option to right align

Left align by default, but allow right alignment by
padding left with spaces.

											
										
										
											2019-12-17 12:00:35 +00:00
+								        return T(_("%1 B"), string.format(deci_format, size))
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								    end
 								end
-												Added util.getFormattedSize() (#3383)


											
										
										
											2017-10-20 17:29:52 +00:00
+								--- Gets formatted size as string (1273334 => "1,273,334")
 								---- @int size (bytes)
 								---- @treturn string
 								function util.getFormattedSize(size)
 								    local s = tostring(size)
 								    s = s:reverse():gsub("(%d%d%d)", "%1,")
 								    s = s:reverse():gsub("^,", "")
 								    return s
 								end
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--[[--
 								Replaces invalid UTF-8 characters with a replacement string.
 								Based on <http://notebook.kulchenko.com/programming/fixing-malformed-utf8-in-lua>.
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								c.f.,    FixUTF8 @ <https://github.com/pkulchenko/ZeroBraneStudio/blob/master/src/util.lua>.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								@string str the string to be checked for invalid characters
 								@string replacement the string to replace invalid characters with
 								@treturn string valid UTF-8
 								]]
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
+								function util.fixUtf8(str, replacement)
 								    local pos = 1
 								    local len = #str
 								    while pos <= len do
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								        if     str:find("^[%z\1-\127]", pos) then pos = pos + 1
 								        elseif str:find("^[\194-\223][\128-\191]", pos) then pos = pos + 2
 								        elseif str:find(       "^\224[\160-\191][\128-\191]", pos)
 								            or str:find("^[\225-\236][\128-\191][\128-\191]", pos)
 								            or str:find(       "^\237[\128-\159][\128-\191]", pos)
 								            or str:find("^[\238-\239][\128-\191][\128-\191]", pos) then pos = pos + 3
 								        elseif str:find(       "^\240[\144-\191][\128-\191][\128-\191]", pos)
 								            or str:find("^[\241-\243][\128-\191][\128-\191][\128-\191]", pos)
 								            or str:find(       "^\244[\128-\143][\128-\191][\128-\191]", pos) then pos = pos + 4
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
+								        else
 								            str = str:sub(1, pos - 1) .. replacement .. str:sub(pos + 1)
 								            pos = pos + #replacement
 								            len = len + #replacement - 1
 								        end
 								    end
 								    return str
 								end
-												Merge various information into systemstat (#2764)

* Merge various information to systemstat
											
										
										
											2017-04-14 19:12:28 +00:00
+								--- Splits input string with the splitter into a table. This function ignores the last empty entity.
 								--
 								--- @string str the string to be split
 								--- @string splitter
 								--- @bool capture_empty_entity
 								--- @treturn an array-like table
 								function util.splitToArray(str, splitter, capture_empty_entity)
 								    local result = {}
 								    for word in util.gsplit(str, splitter, false, capture_empty_entity) do
 								        table.insert(result, word)
 								    end
 								    return result
 								end
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								--- Convert a Unicode codepoint (number) to UTF-8 char
 								--- c.f., <https://stackoverflow.com/a/4609989>
 								---     & <https://stackoverflow.com/a/38492214>
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								--- See utf8charcode in ffi/util for a decoder.
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								--
 								--- @int c Unicode codepoint
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								--- @treturn string UTF-8 char
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								function util.unicodeCodepointToUtf8(c)
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								    if c < 0x80 then
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								        return string.char(c)
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								    elseif c < 0x800 then
 								        return string.char(
 								                bor(0xC0, rshift(c, 6)),
 								                bor(0x80, band(c, 0x3F))
 								        )
 								    elseif c < 0x10000 then
 								        if c >= 0xD800 and c <= 0xDFFF then
 								            return '<EFBFBD>' -- Surrogates -> U+FFFD REPLACEMENT CHARACTER
 								        end
 								        return string.char(
 								                bor(0xE0, rshift(c, 12)),
 								                bor(0x80, band(rshift(c, 6), 0x3F)),
 								                bor(0x80, band(c, 0x3F))
 								        )
 								    elseif c < 0x110000 then
 								        return string.char(
 								                bor(0xF0, rshift(c, 18)),
 								                bor(0x80, band(rshift(c, 12), 0x3F)),
 								                bor(0x80, band(rshift(c, 6), 0x3F)),
 								                bor(0x80, band(c, 0x3F))
 								        )
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    else
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								        return '<EFBFBD>' -- Invalid -> U+FFFD REPLACEMENT CHARACTER
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    end
 								end
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								-- we need to use an array of arrays to keep them ordered as written
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								local HTML_ENTITIES_TO_UTF8 = {
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								    {"&lt;", "<"},
 								    {"&gt;", ">"},
 								    {"&quot;", '"'},
 								    {"&apos;", "'"},
 								    {"&nbsp;", "\xC2\xA0"},
 								    {"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end},
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								    {"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x, 16)) end},
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								    {"&amp;", "&"}, -- must be last
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								}
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--[[--
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								Replace HTML entities with their UTF-8 encoded equivalent in text.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
 								Supports only basic ones and those with numbers (no support for named entities like `&eacute;`).
 								@int string text with HTML entities
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								@treturn string UTF-8 text
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								]]
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								function util.htmlEntitiesToUtf8(text)
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								    for _, t in ipairs(HTML_ENTITIES_TO_UTF8) do
 								        text = text:gsub(t[1], t[2])
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    end
 								    return text
 								end
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--[[--
 								Convert simple HTML to plain text.
 								This may fail on complex HTML (with styles, scripts, comments), but should be fine enough with simple HTML as found in EPUB's `<dc:description>`.
 								@string text HTML text
 								@treturn string plain text
 								]]
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								function util.htmlToPlainText(text)
 								    -- Replace <br> and <p> with \n
 								    text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") -- <br> and <br/>
 								    text = text:gsub("%s*<%s*p%s*>%s*", "\n") -- <p>
 								    text = text:gsub("%s*</%s*p%s*>%s*", "\n") -- </p>
 								    text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone <p/>
 								    -- Remove all HTML tags
 								    text = text:gsub("<[^>]*>", "")
 								    -- Convert HTML entities
 								    text = util.htmlEntitiesToUtf8(text)
 								    -- Trim spaces and new lines at start and end
 								    text = text:gsub("^[\n%s]*", "")
 								    text = text:gsub("[\n%s]*$", "")
 								    return text
 								end
 								--- Convert HTML to plain text if text seems to be HTML
 								-- Detection of HTML is simple and may raise false positives
 								-- or negatives, but seems quite good at guessing content type
 								-- of text found in EPUB's <dc:description>.
 								--
 								--- @string text the string with possibly some HTML
 								--- @treturn string cleaned text
 								function util.htmlToPlainTextIfHtml(text)
 								    local is_html = false
 								    -- Quick way to check if text is some HTML:
 								    -- look for html tags
 								    local _, nb_tags
 								    _, nb_tags = text:gsub("<%w+.->", "")
 								    if nb_tags > 0 then
 								        is_html = true
 								    else
 								        -- no <tag> found
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								        -- but we may meet some text badly/twice encoded html containing "&lt;br&gt;"
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								        local nb_encoded_tags
 								        _, nb_encoded_tags = text:gsub("&lt;%a+&gt;", "")
 								        if nb_encoded_tags > 0 then
 								            is_html = true
 								            -- decode one of the two encodes
 								            text = util.htmlEntitiesToUtf8(text)
 								        end
 								    end
 								    if is_html then
 								        text = util.htmlToPlainText(text)
 								    else
 								        -- if text ends with ]]>, it probably comes from <![CDATA[ .. ]]> that
 								        -- crengine has extracted correctly, but let the ending tag in, so
 								        -- let's remove it
 								        text = text:gsub("]]>%s*$", "")
 								    end
 								    return text
 								end
-												HTML dictionary support (#3573)

* Adds a generic HTML widget modeled after the text widget, and HTML dictionary support. HTML dictionaries can have their own CSS (for X.ifo it must be X.css). The base CSS just resets the margin and sets the font.

Note that the widget doesn't handle links, that wasn't needed for the dictionary.

Closes <https://github.com/koreader/koreader/issues/1776>.

* Show tag stripped HTML if the dictionary entry isn't valid HTML

* Simulate the normal <br/> behavior

* Bump base

											
										
										
											2018-01-07 19:24:15 +00:00
+								--- Encode the HTML entities in a string
-												HTML dictionary link support (#3603)


											
										
										
											2018-01-15 22:51:43 +00:00
+								--- @string text the string to escape
-												HTML dictionary support (#3573)

* Adds a generic HTML widget modeled after the text widget, and HTML dictionary support. HTML dictionaries can have their own CSS (for X.ifo it must be X.css). The base CSS just resets the margin and sets the font.

Note that the widget doesn't handle links, that wasn't needed for the dictionary.

Closes <https://github.com/koreader/koreader/issues/1776>.

* Show tag stripped HTML if the dictionary entry isn't valid HTML

* Simulate the normal <br/> behavior

* Bump base

											
										
										
											2018-01-07 19:24:15 +00:00
+								-- Taken from https://github.com/kernelsauce/turbo/blob/e4a35c2e3fb63f07464f8f8e17252bea3a029685/turbo/escape.lua#L58-L70
 								function util.htmlEscape(text)
 								    return text:gsub("[}{\">/<'&]", {
 								        ["&"] = "&amp;",
 								        ["<"] = "&lt;",
 								        [">"] = "&gt;",
 								        ['"'] = "&quot;",
 								        ["'"] = "&#39;",
 								        ["/"] = "&#47;",
 								    })
 								end
-												HTML dictionary link support (#3603)


											
										
										
											2018-01-15 22:51:43 +00:00
+								--- Escape list for shell usage
 								--- @table args the list of arguments to escape
 								--- @treturn string the escaped and concatenated arguments
 								function util.shell_escape(args)
 								    local escaped_args = {}
 								    for _, arg in ipairs(args) do
 								        arg = "'" .. arg:gsub("'", "'\\''") .. "'"
 								        table.insert(escaped_args, arg)
 								    end
 								    return table.concat(escaped_args, " ")
 								end
-												Do not execute background runner if device is suspended (#3608)


											
										
										
											2018-01-17 08:17:53 +00:00
+								--- Clear all the elements from a table without reassignment.
 								--- @table t the table to be cleared
 								function util.clearTable(t)
 								    local c = #t
 								    for i = 0, c do t[i] = nil end
 								end
-												CloudStorage: Allow use reserved characters in FTP username and FTP password  (#3924)

Depends on RFC 3986 compliant util.urlEncode() and adds unit tests for the new functions.
											
										
										
											2018-05-04 15:06:58 +00:00
+								--- Encode URL also known as percent-encoding see https://en.wikipedia.org/wiki/Percent-encoding
 								--- @string text the string to encode
 								--- @treturn encode string
 								--- Taken from https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
 								function util.urlEncode(url)
 								    local char_to_hex = function(c)
 								        return string.format("%%%02X", string.byte(c))
 								    end
 								    if url == nil then
 								        return
 								    end
 								    url = url:gsub("\n", "\r\n")
 								    url = url:gsub("([^%w%-%.%_%~%!%*%'%(%)])", char_to_hex)
 								    return url
 								end
 								--- Decode URL (reverse process to util.urlEncode())
 								--- @string text the string to decode
 								--- @treturn decode string
 								--- Taken from https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
 								function util.urlDecode(url)
 								    local hex_to_char = function(x)
 								        return string.char(tonumber(x, 16))
 								    end
 								    if url == nil then
 								        return
 								    end
 								    url = url:gsub("%%(%x%x)", hex_to_char)
 								    return url
 								end
-												Text editor plugin, InputDialog enhancements (#4135)

This plugin mostly sets up a "Text editor>" submenu, that allows
browsing files, creating a new file, and managing a history of
previously opened file for easier re-opening.
It restore previous scroll and cursor positions on re-opening.
Additional "Check lua" syntax button is added when editing
a .lua file, and prevent saving if errors.
The text editing is mainly provided by the enhanced InputDialog.

InputDialog: added a few more options, the main one being
'save_callback', which will add a Save and Close buttons
and manage saving/discarding/exiting.
If "fullscreen" and "add_nav_bar", will add a show/hide keyboard
button to it.
Moved the preset buttons setup code in their own InputDialog
methods for clarity of the main init code.
Buttons are now enabled/disabled depending on context for feedback
(eg: Save is disabled as long as text has not been modified).

Added util.checkLuaSyntax(lua_string), might be useful elsewhere.
											
										
										
											2018-08-06 19:16:30 +00:00
+								--- Check lua syntax of string
 								--- @string text lua code text
 								--- @treturn string with parsing error, nil if syntax ok
 								function util.checkLuaSyntax(lua_text)
 								    local lua_code_ok, err = loadstring(lua_text)
 								    if lua_code_ok then
 								        return nil
 								    end
 								    -- Replace: [string "blah blah..."]:3: '=' expected near '123'
 								    -- with: Line 3: '=' expected near '123'
 								    err = err:gsub("%[string \".-%\"]:", "Line ")
 								    return err
 								end
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								--- Unpack an archive.
 								-- Extract the contents of an archive, detecting its format by
 								-- filename extension. Inspired by luarocks archive_unpack()
 								-- @param archive string: Filename of archive.
 								-- @param extract_to string: Destination directory.
 								-- @return boolean or (boolean, string): true on success, false and an error message on failure.
 								function util.unpackArchive(archive, extract_to)
 								    dbg.dassert(type(archive) == "string")
-												[RTL UI] Bidi-wrap filenames, paths, urls, metadata

bidi.lua:
- Revert "Alias everything to Bidi.nowrap() when in LTR UI,
  as using LTR isolates seems uneeded when already LTR" (part
  of a628714f) which was a wrong assumption: we need proper
  wrappers for all things paths. Enhance some of these wrappers.
- Fix GetText RTL wrapping which was losing empty lines and
  trailing \n.

- Wrap all paths, directories, filenames in the code with
  these wrappers.
- Wrap all book metadata (title, authors...) with BD.auto(),
  as it helps fixing some edge cases (like open/close quotation
  marks which are not considered as bracket types by FriBiDi).
  (Needed some minor logic changes in CoverBrowser.)

- Tweak hyphenation menu text
- Update forgotten SortWidget for UI mirroring
- KoptConfig: update "justification" index for RTL re-ordering,
  following the recent addition of the page_gap_height option.

											
										
										
											2020-01-04 00:18:51 +00:00
+								    local BD = require("ui/bidi")
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								    local ok
 								    if archive:match("%.tar%.bz2$") or archive:match("%.tar%.gz$") or archive:match("%.tar%.lz$") or archive:match("%.tgz$") then
 								        ok = os.execute(("./tar xf %q -C %q"):format(archive, extract_to))
 								    else
-												[RTL UI] Bidi-wrap filenames, paths, urls, metadata

bidi.lua:
- Revert "Alias everything to Bidi.nowrap() when in LTR UI,
  as using LTR isolates seems uneeded when already LTR" (part
  of a628714f) which was a wrong assumption: we need proper
  wrappers for all things paths. Enhance some of these wrappers.
- Fix GetText RTL wrapping which was losing empty lines and
  trailing \n.

- Wrap all paths, directories, filenames in the code with
  these wrappers.
- Wrap all book metadata (title, authors...) with BD.auto(),
  as it helps fixing some edge cases (like open/close quotation
  marks which are not considered as bracket types by FriBiDi).
  (Needed some minor logic changes in CoverBrowser.)

- Tweak hyphenation menu text
- Update forgotten SortWidget for UI mirroring
- KoptConfig: update "justification" index for RTL re-ordering,
  following the recent addition of the page_gap_height option.

											
										
										
											2020-01-04 00:18:51 +00:00
+								        return false, T(_("Couldn't extract archive:\n\n%1\n\nUnrecognized filename extension."), BD.filepath(archive))
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								    end
 								    if not ok then
-												[RTL UI] Bidi-wrap filenames, paths, urls, metadata

bidi.lua:
- Revert "Alias everything to Bidi.nowrap() when in LTR UI,
  as using LTR isolates seems uneeded when already LTR" (part
  of a628714f) which was a wrong assumption: we need proper
  wrappers for all things paths. Enhance some of these wrappers.
- Fix GetText RTL wrapping which was losing empty lines and
  trailing \n.

- Wrap all paths, directories, filenames in the code with
  these wrappers.
- Wrap all book metadata (title, authors...) with BD.auto(),
  as it helps fixing some edge cases (like open/close quotation
  marks which are not considered as bracket types by FriBiDi).
  (Needed some minor logic changes in CoverBrowser.)

- Tweak hyphenation menu text
- Update forgotten SortWidget for UI mirroring
- KoptConfig: update "justification" index for RTL re-ordering,
  following the recent addition of the page_gap_height option.

											
										
										
											2020-01-04 00:18:51 +00:00
+								        return false, T(_("Extracting archive failed:\n\n%1", BD.filepath(archive)))
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								    end
 								    return true
 								end
-												Allow toggling CRe's new dithering & scaling (#4922)

Smooth scaling toggle is per document, in the gear tab.
Dithering is in the Dev top menu ;).
											
										
										
											2019-04-18 21:26:53 +00:00
+								-- Simple startsWith / endsWith string helpers
 								-- c.f., http://lua-users.org/wiki/StringRecipes
 								-- @param str string: source string
 								-- @param start string: string to match
 								-- @return boolean: true on success
 								function util.stringStartsWith(str, start)
 								   return str:sub(1, #start) == start
 								end
 								-- @param str string: source string
 								-- @param ending string: string to match
 								-- @return boolean: true on success
 								function util.stringEndsWith(str, ending)
 								   return ending == "" or str:sub(-#ending) == ending
 								end
-												strip punctuations around word before searching
This should fix #1337.

											
										
										
											2015-02-01 09:40:34 +00:00
+								return util