koreader/frontend/util.lua

--[[--
This module contains miscellaneous helper functions for the KOReader frontend.
]]

local BaseUtil = require("ffi/util")
local Utf8Proc = require("ffi/utf8proc")
local lfs = require("libs/libkoreader-lfs")
local md5 = require("ffi/sha2").md5
local _ = require("gettext")
local C_ = _.pgettext
local T = BaseUtil.template

local lshift = bit.lshift
local rshift = bit.rshift
local band = bit.band
local bor = bit.bor

local util = {}

---- Strips all punctuation marks and spaces from a string.
---- @string text the string to be stripped
---- @treturn string stripped text
function util.stripPunctuation(text)
    if not text then return end
    -- strip ASCII punctuation marks around text
    -- and strip any generic punctuation marks (U+2000 - U+206F) in the text
    return text:gsub("\226[\128-\131][\128-\191]", ''):gsub("^%p+", ''):gsub("%p+$", '')
end

-- Various whitespace trimming helpers, from http://lua-users.org/wiki/CommonFunctions & http://lua-users.org/wiki/StringTrim
---- Remove leading whitespace from string.
---- @string s the string to be trimmed
---- @treturn string trimmed text
function util.ltrim(s)
    return (s:gsub("^%s*", ""))
end

---- Remove trailing whitespace from string.
---- @string s the string to be trimmed
---- @treturn string trimmed text
function util.rtrim(s)
    local n = #s
    while n > 0 and s:find("^%s", n) do
        n = n - 1
    end
    return s:sub(1, n)
end

---- Remove leading & trailing whitespace from string.
---- @string s the string to be trimmed
---- @treturn string trimmed text
function util.trim(s)
   local from = s:match"^%s*()"
   return from > #s and "" or s:match(".*%S", from)
end

--[[
-- Trim leading & trailing character `c` from string `s`
function util.trim_char(s, c)
    local from = s:match"^"..c.."*()"
    return from > #s and "" or s:match(".*[^"..c.."]", from)
end

-- Trim trailing character `c` from string `s`
function util.rtrim_char(s, c)
    local n = #s
    while n > 0 and s:find("^"..c, n) do
        n = n - 1
    end
    return s:sub(1, n)
end
--]]

--[[--
Splits a string by a pattern

Lua doesn't have a string.split() function and most of the time
you don't really need it because string.gmatch() is enough.
However string.gmatch() has one significant disadvantage for me:
You can't split a string while matching both the delimited
strings and the delimiters themselves without tracking positions
and substrings. The gsplit function below takes care of
this problem.

Author: Peter Odding

License: MIT/X11

Source: <a href="http://snippets.luacode.org/snippets/String_splitting_130">http://snippets.luacode.org/snippets/String_splitting_130</a>
]]
----@string str string to split
----@param pattern the pattern to split against
----@bool capture
----@bool capture_empty_entity
function util.gsplit(str, pattern, capture, capture_empty_entity)
    pattern = pattern and tostring(pattern) or '%s+'
    if (''):find(pattern) then
        error('pattern matches empty string!', 2)
    end
    return coroutine.wrap(function()
        local index = 1
        repeat
            local first, last = str:find(pattern, index)
            if first and last then
                if index < first or (index == first and capture_empty_entity) then
                    coroutine.yield(str:sub(index, first - 1))
                end
                if capture then
                    coroutine.yield(str:sub(first, last))
                end
                index = last + 1
            else
                if index <= #str then
                    coroutine.yield(str:sub(index))
                end
                break
            end
        until index > #str
    end)
end

--[[--
Compares values in two different tables.

Source: <https://stackoverflow.com/a/32660766/2470572>
]]
---- @param o1 Lua table
---- @param o2 Lua table
---- @bool ignore_mt
---- @treturn boolean
function util.tableEquals(o1, o2, ignore_mt)
    if o1 == o2 then return true end
    local o1Type = type(o1)
    local o2Type = type(o2)
    if o1Type ~= o2Type then return false end
    if o1Type ~= 'table' then return false end

    if not ignore_mt then
        local mt1 = getmetatable(o1)
        if mt1 and mt1.__eq then
            -- Compare using built in method
            return o1 == o2
        end
    end

    local keySet = {}

    for key1, value1 in pairs(o1) do
        local value2 = o2[key1]
        if value2 == nil or util.tableEquals(value1, value2, ignore_mt) == false then
            return false
        end
        keySet[key1] = true
    end

    for key2, _ in pairs(o2) do
        if not keySet[key2] then return false end
    end
    return true
end

--[[--
Makes a deep copy of a table.

Source: <https://stackoverflow.com/a/16077650/2470572>
]]
---- @param o Lua table
---- @treturn Lua table
function util.tableDeepCopy(o, seen)
  seen = seen or {}
  if o == nil then return nil end
  if seen[o] then return seen[o] end

  local no
  if type(o) == "table" then
    no = {}
    seen[o] = no

    for k, v in next, o, nil do
      no[util.tableDeepCopy(k, seen)] = util.tableDeepCopy(v, seen)
    end
    setmetatable(no, util.tableDeepCopy(getmetatable(o), seen))
  else -- number, string, boolean, etc
    no = o
  end
  return no
end

--- Returns number of keys in a table.
---- @param t Lua table
---- @treturn int number of keys in table t
function util.tableSize(t)
    local count = 0
    for _ in pairs(t) do count = count + 1 end
    return count
end

--- Append all elements from t2 into t1.
---- @param t1 Lua table
---- @param t2 Lua table
function util.arrayAppend(t1, t2)
    for _, v in ipairs(t2) do
        table.insert(t1, v)
    end
end

--[[--
Remove elements from an array, fast.

Swap & pop, like <http://lua-users.org/lists/lua-l/2013-11/msg00027.html> / <https://stackoverflow.com/a/28942022>, but preserving order.
c.f., <https://stackoverflow.com/a/53038524>

@table t Lua array to filter
@func keep_cb Filtering callback. Takes three arguments: table, index, new index. Returns true to *keep* the item. See link above for potential uses of the third argument.

@usage

local foo = { "a", "b", "c", "b", "d", "e" }
local function drop_b(t, i, j)
    -- Discard any item with value "b"
    return t[i] ~= "b"
end
util.arrayRemove(foo, drop_b)
]]
function util.arrayRemove(t, keep_cb)
    local j, n = 1, #t

    for i = 1, n do
        if keep_cb(t, i, j) then
            -- Move i's kept value to j's position, if it's not already there.
            if i ~= j then
                t[j] = t[i]
                t[i] = nil
            end
            -- Increment position of where we'll place the next kept value.
            j = j + 1
        else
            t[i] = nil
        end
    end

    return t
end

--- Reverse array elements in-place in table t
---- @param t Lua table
function util.arrayReverse(t)
    local i, j = 1, #t
    while i < j do
        t[i], t[j] = t[j], t[i]
        i = i + 1
        j = j - 1
    end
end

--- Test whether t contains a value equal to v
--- (or such a value that callback returns true),
--- and if so, return the index.
---- @param t Lua table
---- @param v
---- @func callback(v1, v2)
function util.arrayContains(t, v, cb)
    cb = cb or function(v1, v2) return v1 == v2 end
    for _k, _v in ipairs(t) do
        if cb(_v, v) then
            return _k
        end
    end
    return false
end

--- Test whether array t contains a reference to array n (at any depth at or below m)
---- @param t Lua table (array only)
---- @param n Lua table (array only)
---- @int m Max nesting level
function util.arrayReferences(t, n, m, l)
    if not m then m = 15 end
    if not l then l = 0 end
    if l > m then
        return false
    end

    if type(t) == "table" then
        if t == n then
            return true, l
        end

        for _, v in ipairs(t) do
            local matched, depth = util.arrayReferences(v, n, m, l + 1)
            if matched then
                return matched, depth
            end
        end
    end

    return false
end

-- A set of binary search implementations for plain arrays.
-- Should be easy to tweak for arrays of hashes (c.f., UIManager:schedule),
-- or arrays sorted in descending order (c.f., ReadHistory).
-- refs: https://en.wikipedia.org/wiki/Binary_search_algorithm
--       https://rosettacode.org/wiki/Binary_search
--- Perform a binary search for `value` in a *sorted* (ascending) `array`.
---- @param array Lua table (array only, sorted, ascending, every value must match the type of `value` and support comparison operators)
---- @param value
---- @return int index of value in array, or a (nil, insertion index) tuple if value was not found.
function util.bsearch(array, value)
    local lo = 1
    local hi = #array
    while lo <= hi do
        -- invariants: value > array[i] for all i < lo
        --             value < array[i] for all i > hi
        local mid = bit.rshift(lo + hi, 1)
        if array[mid] > value then
            hi = mid - 1
        elseif array[mid] < value then
            lo = mid + 1
        else
            return mid
        end
    end
    return nil, lo
end

--- Perform a leftmost insertion binary search for `value` in a *sorted* (ascending) `array`.
---- @param array Lua table (array only, sorted, ascending, every value must match the type of `value` and support comparison operators)
---- @param value
---- @return int leftmost insertion index of value in array.
function util.bsearch_left(array, value)
    local lo = 1
    local hi = #array
    while lo <= hi do
        -- invariants: value > array[i] for all i < lo
        --             value <= array[i] for all i > hi
        local mid = bit.rshift(lo + hi, 1)
        if array[mid] >= value then
            hi = mid - 1
        else
            lo = mid + 1
        end
    end
    return lo
end

--- Perform a rightmost insertion binary search for `value` in a *sorted* (ascending) `array`.
---- @param array Lua table (array only, sorted, ascending, every value must match the type of `value` and support comparison operators)
---- @param value
---- @return int rightmost insertion index of value in array.
function util.bsearch_right(array, value)
    local lo = 1
    local hi = #array
    while lo <= hi do
        -- invariants: value >= array[i] for all i < low
        --             value < array[i] for all i > high
        local mid = bit.rshift(lo + hi, 1)
        if array[mid] > value then
            hi = mid - 1
        else
            lo = mid + 1
        end
    end
    return lo
end

-- Merge t2 into t1, overwriting existing elements if they already exist
-- Probably not safe with nested tables (c.f., https://stackoverflow.com/q/1283388)
---- @param t1 Lua table
---- @param t2 Lua table
function util.tableMerge(t1, t2)
    for k, v in pairs(t2) do
        t1[k] = v
    end
end

--[[--
Gets last index of character in string (i.e., strrchr)

Returns the index within this string of the last occurrence of the specified character
or -1 if the character does not occur.

To find . you need to escape it.
]]
---- @string string
---- @string ch
---- @treturn int last occurrence or -1 if not found
function util.lastIndexOf(string, ch)
    local i = string:match(".*" .. ch .. "()")
    if i == nil then return -1 else return i - 1 end
end

--- Pattern which matches a single well-formed UTF-8 character, including
--- theoretical >4-byte extensions.
-- Taken from <https://www.lua.org/manual/5.4/manual.html#pdf-utf8.charpattern>
util.UTF8_CHAR_PATTERN = '[%z\1-\127\194-\253][\128-\191]*'

--- Reverse the individual greater-than-single-byte characters
-- @string string to reverse
-- Taken from <https://github.com/blitmap/lua-utf8-simple#utf8reverses>
function util.utf8Reverse(text)
    text = text:gsub(util.UTF8_CHAR_PATTERN, function (c) return #c > 1 and c:reverse() end)
    return text:reverse()
end

--- Splits string into a list of UTF-8 characters.
---- @string text the string to be split.
---- @treturn table list of UTF-8 chars
function util.splitToChars(text)
    local tab = {}
    if text ~= nil then
        local prevcharcode, charcode = 0
        -- Supports WTF-8 : https://en.wikipedia.org/wiki/UTF-8#WTF-8
        -- a superset of UTF-8, that includes UTF-16 surrogates
        -- in UTF-8 bytes (forbidden in well-formed UTF-8).
        -- We may get that from bad producers or converters.
        -- (luajson, used to decode Wikipedia API json, will not correctly decode
        -- this sample: <span lang=\"got\">\ud800\udf45</span> : single Unicode
        -- char https://www.compart.com/en/unicode/U+10345 and will give us
        -- "\xed\xa0\x80\xed\xbd\x85" as UTF8, instead of the correct "\xf0\x90\x8d\x85")
        -- From http://www.unicode.org/faq/utf_bom.html#utf16-1
        --   Surrogates are code points from two special ranges of
        --   Unicode values, reserved for use as the leading, and
        --   trailing values of paired code units in UTF-16. Leading,
        --   also called high, surrogates are from D800 to DBFF, and
        --   trailing, or low, surrogates are from DC00 to DFFF. They
        --   are called surrogates, since they do not represent
        --   characters directly, but only as a pair.
        local hi_surrogate
        local hi_surrogate_uchar
        for uchar in text:gmatch(util.UTF8_CHAR_PATTERN) do
            charcode = BaseUtil.utf8charcode(uchar)
            -- (not sure why we need this prevcharcode check; we could get
            -- charcode=nil with invalid UTF-8, but should we then really
            -- ignore the following charcode ?)
            if prevcharcode then -- utf8
                if charcode and charcode >= 0xD800 and charcode <= 0xDBFF then
                    if hi_surrogate then -- previous unconsumed one, add it even if invalid
                        table.insert(tab, hi_surrogate_uchar)
                    end
                    hi_surrogate = charcode
                    hi_surrogate_uchar = uchar -- will be added if not followed by low surrogate
                elseif hi_surrogate and charcode and charcode >= 0xDC00 and charcode <= 0xDFFF then
                    -- low surrogate following a high surrogate, good, let's make them a single char
                    charcode = lshift((hi_surrogate - 0xD800), 10) + (charcode - 0xDC00) + 0x10000
                    table.insert(tab, util.unicodeCodepointToUtf8(charcode))
                    hi_surrogate = nil
                else
                    if hi_surrogate then -- previous unconsumed one, add it even if invalid
                        table.insert(tab, hi_surrogate_uchar)
                    end
                    hi_surrogate = nil
                    table.insert(tab, uchar)
                end
            end
            prevcharcode = charcode
        end
    end
    return tab
end

--- Tests whether c is a CJK character
---- @string c
---- @treturn boolean true if CJK
function util.isCJKChar(c)
    -- Smallest CJK codepoint is 0x1100 which requires at least 3 utf8 bytes to
    -- encode (U+07FF is the largest codepoint that can be represented in 2
    -- bytes with utf8). So if the character is shorter than 3 bytes it's
    -- definitely not CJK and no need to decode it.
    if #c < 3 then
        return false
    end
    local code = BaseUtil.utf8charcode(c)
    -- The weird bracketing is intentional -- we use the lowest possible
    -- codepoint as a shortcut so if the codepoint is below U+1100 we
    -- immediately return false.
    return -- BMP (Plane 0)
            code >=  0x1100 and (code <=  0x11FF  or -- Hangul Jamo
           (code >=  0x2E80 and  code <=  0x9FFF) or -- Numerous CJK Blocks (NB: has some gaps)
           (code >=  0xA960 and  code <=  0xA97F) or -- Hangul Jamo Extended-A
           (code >=  0xAC00 and  code <=  0xD7AF) or -- Hangul Syllables
           (code >=  0xD7B0 and  code <=  0xD7FF) or -- Hangul Jame Extended-B
           (code >=  0xF900 and  code <=  0xFAFF) or -- CJK Compatibility Ideographs
           (code >=  0xFE30 and  code <=  0xFE4F) or -- CJK Compatibility Forms
           (code >=  0xFF00 and  code <=  0xFFEF) or -- Halfwidth and Fullwidth Forms
           -- SIP (Plane 2)
           (code >= 0x20000 and  code <= 0x2A6DF) or -- CJK Unified Ideographs Extension B
           (code >= 0x2A700 and  code <= 0x2B73F) or -- CJK Unified Ideographs Extension C
           (code >= 0x2B740 and  code <= 0x2B81F) or -- CJK Unified Ideographs Extension D
           (code >= 0x2B820 and  code <= 0x2CEAF) or -- CJK Unified Ideographs Extension E
           (code >= 0x2CEB0 and  code <= 0x2EBEF) or -- CJK Unified Ideographs Extension F
           (code >= 0x2F800 and  code <= 0x2FA1F) or -- CJK Compatibility Ideographs Supplement
           -- TIP (Plane 3)
           (code >= 0x30000 and  code <= 0x3134F))   -- CJK Unified Ideographs Extension G
end

--- Tests whether str contains CJK characters
---- @string str
---- @treturn boolean true if CJK
function util.hasCJKChar(str)
    for c in str:gmatch(util.UTF8_CHAR_PATTERN) do
        if util.isCJKChar(c) then
            return true
        end
    end
    return false
end

--- Split texts into a list of words, spaces and punctuation marks.
---- @string text text to split
---- @treturn table list of words, spaces and punctuation marks
function util.splitToWords(text)
    local wlist = {}
    for word in util.gsplit(text, "[%s%p]+", true) do
        -- if space split word contains CJK characters
        if util.hasCJKChar(word) then
            -- split all non-ASCII characters separately (FIXME ideally we
            -- would split only the CJK characters, but you cannot define CJK
            -- characters trivially with a byte-only Lua pattern).
            for char in util.gsplit(word, "[\192-\255][\128-\191]+", true) do
                table.insert(wlist, char)
            end
        else
            table.insert(wlist, word)
        end
    end
    return wlist
end

-- We don't want to split on a space if it is followed by some
-- specific punctuation marks : e.g. "word :" or "word )"
-- (In French, there is a non-breaking space before a colon, and it better
-- not be wrapped there.)
local non_splittable_space_tailers = ":;,.!?)]}$%=-+*/|<>»”"
-- Same if a space has some specific other punctuation mark before it
local non_splittable_space_leaders = "([{$=-+*/|<>«“"


-- Similar rules exist for CJK text. Taken from :
-- https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

local cjk_non_splittable_tailers = table.concat( {
    -- Simplified Chinese
    "!%),.:;?]}¢°·’\"†‡›℃∶、。〃〆〕〗〞﹚﹜！＂％＇），．：；？！］｝～",
    -- Traditional Chinese
    "!),.:;?]}¢·–—’\"•、。〆〞〕〉》」︰︱︲︳﹐﹑﹒﹓﹔﹕﹖﹘﹚﹜！），．：；？︶︸︺︼︾﹀﹂﹗］｜｝､",
    -- Japanese
    ")]｝〕〉》」』】〙〗〟’\"｠»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。.",
    -- Korean
    "!%),.:;?]}¢°’\"†‡℃〆〈《「『〕！％），．：；？］｝",
})

local cjk_non_splittable_leaders = table.concat( {
    -- Simplified Chinese
    "$(£¥·‘\"〈《「『【〔〖〝﹙﹛＄（．［｛￡￥",
    -- Traditional Chinese
    "([{£¥‘\"‵〈《「『〔〝︴﹙﹛（｛︵︷︹︻︽︿﹁﹃﹏",
    -- Japanese
    "([｛〔〈《「『【〘〖〝‘\"｟«",
    -- Korean
    "$([{£¥‘\"々〇〉》」〔＄（［｛｠￥￦#",
})

local cjk_non_splittable = table.concat( {
    -- Japanese
    "—…‥〳〴〵",
})

--- Test whether a string can be separated by this char for multi-line rendering.
-- Optional next or prev chars may be provided to help make the decision
---- @string c
---- @string next_c
---- @string prev_c
---- @treturn boolean true if splittable, false if not
function util.isSplittable(c, next_c, prev_c)
    if util.isCJKChar(c) then
        -- a CJKChar is a word in itself, and so is splittable
        if cjk_non_splittable:find(c, 1, true) then
            -- except a few of them
            return false
        elseif next_c and cjk_non_splittable_tailers:find(next_c, 1, true) then
            -- but followed by a char that is not permitted at start of line
            return false
        elseif prev_c and cjk_non_splittable_leaders:find(prev_c, 1, true) then
            -- but preceded by a char that is not permitted at end of line
            return false
        else
            -- we can split on this CJKchar
            return true
        end
    elseif c == " " then
        -- we only split on a space (so a punctuation mark sticks to prev word)
        -- if next_c or prev_c is provided, we can make a better decision
        if next_c and non_splittable_space_tailers:find(next_c, 1, true) then
            -- this space is followed by some punctuation mark that is better kept with us
            return false
        elseif prev_c and non_splittable_space_leaders:find(prev_c, 1, true) then
            -- this space is lead by some punctuation mark that is better kept with us
            return false
        else
            -- we can split on this space
            return true
        end
    end
    -- otherwise, not splittable
    return false
end

--- Gets filesystem type of a path.
--
-- Checks if the path occurs in <code>/proc/mounts</code>
---- @string path an absolute path
---- @treturn string filesystem type
function util.getFilesystemType(path)
    local mounts = io.open("/proc/mounts", "r")
    if not mounts then return nil end
    local type
    for line in mounts:lines() do
        local mount = {}
        for param in line:gmatch("%S+") do table.insert(mount, param) end
        if string.match(path, mount[2]) then
            type = mount[3]
            if mount[2] ~= '/' then
                break
            end
        end
    end
    mounts:close()
    return type
end

-- For documentation purposes, here's a battle-tested shell version of calcFreeMem,
-- our simplified Lua version follows...
--[[
    if grep -q 'MemAvailable' /proc/meminfo ; then
        # We'll settle for 85% of available memory to leave a bit of breathing room
        tmpfs_size="$(awk '/MemAvailable/ {printf "%d", $2 * 0.85}' /proc/meminfo)"
    elif grep -q 'Inactive(file)' /proc/meminfo ; then
        # Basically try to emulate the kernel's computation, c.f., https://unix.stackexchange.com/q/261247
        # Again, 85% of available memory
        tmpfs_size="$(awk -v low=$(grep low /proc/zoneinfo | awk '{k+=$2}END{printf "%d", k}') \
            '{a[$1]=$2}
            END{
                printf "%d", (a["MemFree:"]+a["Active(file):"]+a["Inactive(file):"]+a["SReclaimable:"]-(12*low))*0.85;
            }' /proc/meminfo)"
    else
        # Ye olde crap workaround of Free + Buffers + Cache...
        # Take it with a grain of salt, and settle for 80% of that...
        tmpfs_size="$(awk \
            '{a[$1]=$2}
            END{
                printf "%d", (a["MemFree:"]+a["Buffers:"]+a["Cached:"])*0.80;
            }' /proc/meminfo)"
    fi
--]]

--- Computes the currently available memory
---- @treturn tuple of ints: memavailable, memtotal (or nil, nil on unsupported platforms).
function util:calcFreeMem()
    local memtotal, memfree, memavailable, buffers, cached

    local meminfo = io.open("/proc/meminfo", "r")
    if meminfo then
        for line in meminfo:lines() do
            if not memtotal then
                memtotal = line:match("^MemTotal:%s-(%d+) kB")
                if memtotal then
                    -- Next!
                    goto continue
                end
            end

            if not memfree then
                memfree = line:match("^MemFree:%s-(%d+) kB")
                if memfree then
                    -- Next!
                    goto continue
                end
            end

            if not memavailable then
                memavailable = line:match("^MemAvailable:%s-(%d+) kB")
                if memavailable then
                    -- Best case scenario, we're done :)
                    break
                end
            end

            if not buffers then
                buffers = line:match("^Buffers:%s-(%d+) kB")
                if buffers then
                    -- Next!
                    goto continue
                end
            end

            if not cached then
                cached = line:match("^Cached:%s-(%d+) kB")
                if cached then
                    -- Ought to be the last entry we care about, we're done
                    break
                end
            end

            ::continue::
        end
        meminfo:close()
    else
        -- Not on Linux?
        return nil, nil
    end

    if memavailable then
        -- Leave a bit of margin, and report 85% of that...
        return math.floor(memavailable * 0.85) * 1024, memtotal * 1024
    else
        -- Crappy Free + Buffers + Cache version, because the zoneinfo approach is a tad hairy...
        -- So, leave an even larger margin, and only report 75% of that...
        return math.floor((memfree + buffers + cached) * 0.75) * 1024, memtotal * 1024
    end
end

--- Recursively scan directory for files inside
-- @string path
-- @func callback(fullpath, name, attr)
function util.findFiles(dir, cb)
    local function scan(current)
        local ok, iter, dir_obj = pcall(lfs.dir, current)
        if not ok then return end
        for f in iter, dir_obj do
            local path = current.."/"..f
            -- lfs can return nil here, as it will follow symlinks!
            local attr = lfs.attributes(path) or {}
            if attr.mode == "directory" then
                if f ~= "." and f ~= ".." then
                    scan(path)
                end
            elseif attr.mode == "file" or attr.mode == "link" then
                cb(path, f, attr)
            end
        end
    end
    scan(dir)
end

--- Checks if directory is empty.
---- @string path
---- @treturn bool
function util.isEmptyDir(path)
    -- lfs.dir will crash rather than return nil if directory doesn't exist O_o
    local ok, iter, dir_obj = pcall(lfs.dir, path)
    if not ok then return end
    for filename in iter, dir_obj do
        if filename ~= '.' and filename ~= '..' then
            return false
        end
    end
    return true
end

--- check if the given path is a file
---- @string path
---- @treturn bool
function util.fileExists(path)
    local file = io.open(path, "r")
    if file ~= nil then
         file:close()
         return true
    end
end

--- Checks if the given path exists. Doesn't care if it's a file or directory.
---- @string path
---- @treturn bool
function util.pathExists(path)
    return lfs.attributes(path, "mode") ~= nil
end

--- Checks if the given directory exists.
function util.directoryExists(path)
  return lfs.attributes(path, "mode") == "directory"
end

--- As `mkdir -p`.
-- Unlike [lfs.mkdir](https://keplerproject.github.io/luafilesystem/manual.html#mkdir)(),
-- does not error if the directory already exists, and creates intermediate directories as needed.
-- @string path the directory to create
-- @treturn bool true on success; nil, err_message on error
function util.makePath(path)
    if lfs.attributes(path, "mode") == "directory" then
        return true
    end

    local components
    if path:sub(1, 1) == "/" then
        -- Leading slash, remember that it's an absolute path
        components = "/"
    else
        -- Relative path
        components = ""
    end

    local success, err
    -- NOTE: mkdir -p handles umask shenanigans for intermediate components, we don't
    for component in path:gmatch("([^/]+)") do
        -- The trailing slash ensures we properly fail via mkdir if the composite path already exists as a file/link
        components = components .. component .. "/"
        if lfs.attributes(components, "mode") == nil then
            success, err = lfs.mkdir(components)
            if not success then
                return nil, err .. " (creating `" .. components .. "` for `" .. path .. "`)"
            end
        end
    end

    return success, err
end

--- Remove as many of the empty directories specified in path, children-first.
-- Does not fail if the directory is already gone.
-- @string path the directory tree to prune
-- @treturn bool true on success; nil, err_message on error
function util.removePath(path)
    local component = path
    repeat
        local attr = lfs.attributes(component, "mode")
        if attr == "directory" then
            local success, err = lfs.rmdir(component)
            if not success then
                -- Most likely because ENOTEMPTY ;)
                return nil, err .. " (removing `" .. component .. "` for `" .. path .. "`)"
            end
        elseif attr ~= nil then
            return nil, "Encountered a component that isn't a directory" .. " (removing `" .. component .. "` for `" .. path .. "`)"
        end

        local parent = BaseUtil.dirname(component)
        component = parent
    until parent == "." or parent == "/"
    return true, nil
end

--- As `rm`
-- @string path of the file to remove
-- @treturn bool true on success; nil, err_message on error
function util.removeFile(file)
    if file and lfs.attributes(file, "mode") == "file" then
        return os.remove(file)
    elseif file then
        return nil, file .. " is not a file"
    else
        return nil, "file is nil"
    end
end

-- Gets total, used and available bytes for the mountpoint that holds a given directory.
-- @string path of the directory
-- @treturn table with total, used and available bytes
function util.diskUsage(dir)
    -- safe way of testing df & awk
    local function doCommand(d)
        local handle = io.popen("df -k " .. d .. " 2>/dev/null | awk '$3 ~ /[0-9]+/ { print $2,$3,$4 }' 2>/dev/null || echo ::ERROR::")
        if not handle then return end
        local output = handle:read("*all")
        handle:close()
        if not output:find "::ERROR::" then
            return output
        end
    end
    local err = { total = nil, used = nil, available = nil }
    if not dir or lfs.attributes(dir, "mode") ~= "directory" then return err end
    local usage = doCommand(dir)
    if not usage then return err end
    local stage, result = {}, {}
    for size in usage:gmatch("%w+") do
        table.insert(stage, size)
    end
    for k, v in pairs({"total", "used", "available"}) do
        if stage[k] ~= nil then
            -- sizes are in kb, return bytes here
            result[v] = stage[k] * 1024
        end
    end
    return result
end


--- Replaces characters that are invalid filenames.
--
-- Replaces the characters <code>\/:*?"<>|</code> with an <code>_</code>.
-- These characters are problematic on Windows filesystems. On Linux only
-- <code>/</code> poses a problem.
---- @string str filename
---- @treturn string sanitized filename
local function replaceAllInvalidChars(str)
    if str then
        return str:gsub('[\\,%/,:,%*,%?,%",%<,%>,%|]','_')
    end
end

--- Replaces slash with an underscore.
---- @string str
---- @treturn string
local function replaceSlashChar(str)
    if str then
        return str:gsub('%/','_')
    end
end

--[[--
Replaces characters that are invalid in filenames.

Replaces the characters `\/:*?"<>|` with an `_` unless an optional path is provided. These characters are problematic on Windows filesystems. On Linux only the `/` poses a problem.

If an optional path is provided, @{util.getFilesystemType}() will be used to determine whether stricter VFAT restrictions should be applied.
]]
---- @string str
---- @string path
---- @int limit
---- @treturn string safe filename
function util.getSafeFilename(str, path, limit, limit_ext)
    local filename, suffix = util.splitFileNameSuffix(str)
    local replaceFunc = replaceAllInvalidChars
    local safe_filename
    -- VFAT supports a maximum of 255 UCS-2 characters, although it's probably treated as UTF-16 by Windows
    -- default to a slightly lower limit just in case
    limit = limit or 240
    limit_ext = limit_ext or 10

    -- Always assume the worst on Android (#7837)
    if path and not BaseUtil.isAndroid() then
        local file_system = util.getFilesystemType(path)
        if file_system ~= "vfat" and file_system ~= "fuse.fsp" then
            replaceFunc = replaceSlashChar
        end
    end

    if suffix:len() > limit_ext then
        -- probably not an actual file extension, or at least not one we'd be
        -- dealing with, so strip the whole string
        filename = str
        suffix = nil
    end

    filename = util.htmlToPlainTextIfHtml(filename)
    filename = filename:sub(1, limit)
    -- the limit might result in broken UTF-8, which we don't want in the result
    filename = util.fixUtf8(filename, "")

    if suffix and suffix ~= "" then
        safe_filename = replaceFunc(filename) .. "." .. replaceFunc(suffix)
    else
        safe_filename = replaceFunc(filename)
    end

    return safe_filename
end

--- Splits a file into its directory path and file name.
--- If the given path has a trailing /, returns the entire path as the directory
--- path and "" as the file name.
---- @string file
---- @treturn string directory, filename
function util.splitFilePathName(file)
    if file == nil or file == "" then return "", "" end
    if string.find(file, "/") == nil then return "", file end
    return file:match("(.*/)(.*)")
end

--- Splits a file name into its pure file name and suffix
---- @string file
---- @treturn string path, extension
function util.splitFileNameSuffix(file)
    if file == nil or file == "" then return "", "" end
    if string.find(file, "%.") == nil then return file, "" end
    return file:match("(.*)%.(.*)")
end

--- Gets file extension
---- @string filename
---- @treturn string extension
function util.getFileNameSuffix(file)
    local _, suffix = util.splitFileNameSuffix(file)
    return suffix
end

--- Companion helper function that returns the script's language,
--- based on the file extension.
---- @string filename
---- @treturn string (lowercase) (or nil if not Device:canExecuteScript(file))
function util.getScriptType(file)
    local file_ext = string.lower(util.getFileNameSuffix(file))
    if file_ext == "sh" then
        return "shell"
    elseif file_ext == "py" then
        return "python"
    end
end

--- Gets human friendly size as string
---- @int size (bytes)
---- @bool right_align (by padding with spaces on the left)
---- @treturn string
function util.getFriendlySize(size, right_align)
    local frac_format = right_align and "%6.1f" or "%.1f"
    local deci_format = right_align and "%6d" or "%d"
    size = tonumber(size)
    if not size or type(size) ~= "number" then return end
    if size > 1000*1000*1000 then
        return T(C_("Data storage size", "%1 GB"), string.format(frac_format, size/1000/1000/1000))
    end
    if size > 1000*1000 then
        return T(C_("Data storage size", "%1 MB"), string.format(frac_format, size/1000/1000))
    end
    if size > 1000 then
        return T(C_("Data storage size", "%1 kB"), string.format(frac_format, size/1000))
    else
        return T(C_("Data storage size", "%1 B"), string.format(deci_format, size))
    end
end

--- Gets formatted size as string (1273334 => "1,273,334")
---- @int size (bytes)
---- @treturn string
function util.getFormattedSize(size)
    local s = tostring(size)
    s = s:reverse():gsub("(%d%d%d)", "%1,")
    s = s:reverse():gsub("^,", "")
    return s
end

--- Calculate partial digest of an open file. To the calculating mechanism itself,
-- since only PDF documents could be modified by KOReader by appending data
-- at the end of the files when highlighting, we use a non-even sampling
-- algorithm which samples with larger weight at file head and much smaller
-- weight at file tail, thus reduces the probability that appended data may change
-- the digest value.
-- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144
-- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data
-- by highlighting in KOReader may change the digest value.
function util.partialMD5(filepath)
    if not filepath then return end
    local file = io.open(filepath, "rb")
    if not file then return end
    local step, size = 1024, 1024
    local update = md5()
    for i = -1, 10 do
        file:seek("set", lshift(step, 2*i))
        local sample = file:read(size)
        if sample then
            update(sample)
        else
            break
        end
    end
    file:close()
    return update()
end

function util.writeToFile(data, filepath, force_flush, lua_dofile_ready, directory_updated)
    if not filepath then return end
    if lua_dofile_ready then
        local t = { "-- ", filepath, "\nreturn ", data, "\n" }
        data = table.concat(t)
    end
    local file, err = io.open(filepath, "wb")
    if not file then
        return nil, err
    end
    file:write(data)
    if force_flush then
        BaseUtil.fsyncOpenedFile(file)
    end
    file:close()
    if directory_updated then
        BaseUtil.fsyncDirectory(filepath)
    end
    return true
end

--[[--
Replaces invalid UTF-8 characters with a replacement string.

Based on <http://notebook.kulchenko.com/programming/fixing-malformed-utf8-in-lua>.
c.f.,    FixUTF8 @ <https://github.com/pkulchenko/ZeroBraneStudio/blob/master/src/util.lua>.

@string str the string to be checked for invalid characters
@string replacement the string to replace invalid characters with
@treturn string valid UTF-8
]]
function util.fixUtf8(str, replacement)
    local pos = 1
    local len = #str
    while pos <= len do
        if     str:find("^[%z\1-\127]", pos) then pos = pos + 1
        elseif str:find("^[\194-\223][\128-\191]", pos) then pos = pos + 2
        elseif str:find(       "^\224[\160-\191][\128-\191]", pos)
            or str:find("^[\225-\236][\128-\191][\128-\191]", pos)
            or str:find(       "^\237[\128-\159][\128-\191]", pos)
            or str:find("^[\238-\239][\128-\191][\128-\191]", pos) then pos = pos + 3
        elseif str:find(       "^\240[\144-\191][\128-\191][\128-\191]", pos)
            or str:find("^[\241-\243][\128-\191][\128-\191][\128-\191]", pos)
            or str:find(       "^\244[\128-\143][\128-\191][\128-\191]", pos) then pos = pos + 4
        else
            str = str:sub(1, pos - 1) .. replacement .. str:sub(pos + 1)
            pos = pos + #replacement
            len = len + #replacement - 1
        end
    end
    return str
end

--- Splits input string with the splitter into a table. This function ignores the last empty entity.
--
--- @string str the string to be split
--- @string splitter
--- @bool capture_empty_entity
--- @treturn an array-like table
function util.splitToArray(str, splitter, capture_empty_entity)
    local result = {}
    for word in util.gsplit(str, splitter, false, capture_empty_entity) do
        table.insert(result, word)
    end
    return result
end

--- Convert a Unicode codepoint (number) to UTF-8 char
--- c.f., <https://stackoverflow.com/a/4609989>
---     & <https://stackoverflow.com/a/38492214>
--- See utf8charcode in ffi/util for a decoder.
--
--- @int c Unicode codepoint
--- @treturn string UTF-8 char
function util.unicodeCodepointToUtf8(c)
    if c < 0x80 then
        return string.char(c)
    elseif c < 0x800 then
        return string.char(
                bor(0xC0, rshift(c, 6)),
                bor(0x80, band(c, 0x3F))
        )
    elseif c < 0x10000 then
        if c >= 0xD800 and c <= 0xDFFF then
            return '<EFBFBD>' -- Surrogates -> U+FFFD REPLACEMENT CHARACTER
        end
        return string.char(
                bor(0xE0, rshift(c, 12)),
                bor(0x80, band(rshift(c, 6), 0x3F)),
                bor(0x80, band(c, 0x3F))
        )
    elseif c < 0x110000 then
        return string.char(
                bor(0xF0, rshift(c, 18)),
                bor(0x80, band(rshift(c, 12), 0x3F)),
                bor(0x80, band(rshift(c, 6), 0x3F)),
                bor(0x80, band(c, 0x3F))
        )
    else
        return '<EFBFBD>' -- Invalid -> U+FFFD REPLACEMENT CHARACTER
    end
end

-- we need to use an array of arrays to keep them ordered as written
local HTML_ENTITIES_TO_UTF8 = {
    {"&lt;", "<"},
    {"&gt;", ">"},
    {"&quot;", '"'},
    {"&lsquo;", '‘'},
    {"&rsquo;", '’'},
    {"&ldquo;", '“'},
    {"&rdquo;", '”'},
    {"&mdash;", '—'},
    {"&apos;", "'"},
    {"&nbsp;", "\u{00A0}"},
    {"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end},
    {"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x, 16)) end},
    {"&amp;", "&"}, -- must be last
}
--[[--
Replace HTML entities with their UTF-8 encoded equivalent in text.

Supports only basic ones and those with numbers (no support for named entities like `&eacute;`).

@int string text with HTML entities
@treturn string UTF-8 text
]]
function util.htmlEntitiesToUtf8(text)
    for _, t in ipairs(HTML_ENTITIES_TO_UTF8) do
        text = text:gsub(t[1], t[2])
    end
    return text
end

--[[--
Convert simple HTML to plain text.

This may fail on complex HTML (with styles, scripts, comments), but should be fine enough with simple HTML as found in EPUB's `<dc:description>`.

@string text HTML text
@treturn string plain text
]]
function util.htmlToPlainText(text)
    -- Replace <br> with \n
    text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") -- <br> and <br/>
    -- Replace <p> with \n\t (\t, unlike any combination of spaces,
    -- ensures a constant indentation when text is justified.)
    text = text:gsub("%s*</%s*p%s*>%s*", "\n") -- </p>
    text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone <p/>
    text = text:gsub("%s*<%s*p%s*>%s*", "\n\t") -- <p>
        -- (this one last, so \t is not removed by the others' %s)
    -- Remove all HTML tags
    text = text:gsub("<[^>]*>", "")
    -- Convert HTML entities
    text = util.htmlEntitiesToUtf8(text)
    -- Trim spaces and new lines at start and end, including
    -- the \t we added (this looks fine enough with multiple
    -- paragraphs, but feels nicer with a single paragraph,
    -- whether it contains <br>s or not).
    text = text:gsub("^[\n%s]*", "")
    text = text:gsub("[\n%s]*$", "")
    return text
end

--- Convert HTML to plain text if text seems to be HTML
-- Detection of HTML is simple and may raise false positives
-- or negatives, but seems quite good at guessing content type
-- of text found in EPUB's <dc:description>.
--
--- @string text the string with possibly some HTML
--- @treturn string cleaned text
function util.htmlToPlainTextIfHtml(text)
    local is_html = false
    -- Quick way to check if text is some HTML:
    -- look for html tags
    local _, nb_tags
    _, nb_tags = text:gsub("<%w+.->", "")
    if nb_tags > 0 then
        is_html = true
    else
        -- no <tag> found
        -- but we may meet some text badly/twice encoded html containing "&lt;br&gt;"
        local nb_encoded_tags
        _, nb_encoded_tags = text:gsub("&lt;%a+&gt;", "")
        if nb_encoded_tags > 0 then
            is_html = true
            -- decode one of the two encodes
            text = util.htmlEntitiesToUtf8(text)
        end
    end

    if is_html then
        text = util.htmlToPlainText(text)
    else
        -- if text ends with ]]>, it probably comes from <![CDATA[ .. ]]> that
        -- crengine has extracted correctly, but let the ending tag in, so
        -- let's remove it
        text = text:gsub("]]>%s*$", "")
    end
    return text
end

--- Encode the HTML entities in a string
--- @string text the string to escape
-- Taken from https://github.com/kernelsauce/turbo/blob/e4a35c2e3fb63f07464f8f8e17252bea3a029685/turbo/escape.lua#L58-L70
function util.htmlEscape(text)
    return text:gsub("[}{\">/<'&]", {
        ["&"] = "&amp;",
        ["<"] = "&lt;",
        [">"] = "&gt;",
        ['"'] = "&quot;",
        ["'"] = "&#39;",
        ["/"] = "&#47;",
    })
end

--- Prettify a CSS stylesheet
-- Not perfect, but enough to make some ugly CSS readable.
-- By default, each selector and each property is put on its own line.
-- With condensed=true, condense each full declaration on a single line.
--
--- @string CSS string
--- @boolean condensed[opt=false] true to condense each declaration on a line
--- @treturn string the CSS prettified
function util.prettifyCSS(css_text, condensed)
    if not condensed then
        -- Get rid of \t
        css_text = css_text:gsub("\t", " ")
        css_text = css_text:gsub("\r", "")
        -- Protect ',:;' in comments by replacing them with rare control chars
        css_text = css_text:gsub("/%*.-%*/", function(s)
            s = s:gsub(",", "\v")
            s = s:gsub(":", "\f")
            s = s:gsub(";", "\b")
            return s
        end)
        -- Protect ',' inside () (ie. ":is(td, th)") by replacing them with rare control chars
        css_text = css_text:gsub("%b()/", function(s)
            s = s:gsub(",", "\v")
            return s
        end)
        -- Cleanup declarations (the most nested ones only, which may be
        -- contained in "@supports (...) {...}" or "@media (...) {...}")
        css_text = css_text:gsub(" *{([^{}]*)} *", function(s)
            -- Comments inside declaration may be mixed with properties, on a same line,
            -- before or after them, and we don't know if they apply to what's before or
            -- what's after, except when they are standalone and probably apply to the
            -- next line. So, when not standalone, double indent them (so it looks like
            -- they apply to what's above - but will still look fine if they are about
            -- what's after.
            s = "\n" .. s -- so the next one match on the first line
            s = s:gsub("\n */%*", "\a/*")          -- '/*' with only blank before: mark them with '\a'
            s = s:gsub(" *([^\a])/%*", "\n\t/*")   -- unmarked '/*' (content before): marked, more indentation later
            s = s:gsub("\a", "")                   -- remove mark
            s = s:gsub("\t", "\a")                 -- replace mark by one that is not caught by '%s'
            s = s:gsub("%*/%s*", "*/\n")           -- '*/' end of css comment: newline after
            s = s:gsub("%s*;%s*", ";\n")           -- newline after ';'
            s = s:gsub("\n+%s*", "\n    ")         -- remove blank lines, 4 spaces indent on all lines
            s = s:gsub("\a", "    ")               -- expand our \a marks to have these /* more indented
            s = s:gsub("%s*:%s*", ": ")            -- normalize spacing in "keyword: value"
            s = s:gsub("^%s*(.-)%s*$", "\n    %1") -- remove leading and trailing spaces, indent first line
            s = s:gsub("^%s*$", "")                -- but have empty declaration really empty
            -- less indent for these crengine specific tweaks to the followup properties
            s = s:gsub("\n    %-cr%-hint: late", "\n -cr-hint: late")
            s = s:gsub("\n    %-cr%-only%-if", "\n -cr-only-if")
            -- Protect and normalize ',' in declarations (ie. in font-family list, rgb()...)
            s = s:gsub("%s*,%s*", "\v ")
            return " {" .. s .. "\n}"
        end)
        -- Have each selector (separated by ',') on a new line
        css_text = css_text:gsub("%s*,%s*", " ,\n")
        css_text = css_text:gsub("\n *([^\n]+),", "\n%1,") -- remove leading spaces on the first one
        css_text = css_text:gsub("\n *([^\n]+){", "\n%1{") -- remove leading spaces on a standalone one
        -- Make sure { is on the same line with the selector it follows
        css_text = css_text:gsub("%s*\n *{", " {")
        -- Make sure we have a newline after our }
        css_text = css_text:gsub("\n} *([^\n]+)", "\n}\n%1")
        -- Restore all protected chars
        css_text = css_text:gsub("\v", ",")
        css_text = css_text:gsub("\f", ":")
        css_text = css_text:gsub("\b", ";")
    else
        -- Go thru previous method to have something standard to work on
        css_text = util.prettifyCSS(css_text)
        -- And condense that
        css_text = css_text:gsub(" {\n    ", " { ")
        css_text = css_text:gsub(";\n    ", "; ")
        css_text = css_text:gsub("\n}", " }")
        css_text = css_text:gsub(" ,\n", ", ")
    end
    return css_text
end

--- Escape list for shell usage
--- @table args the list of arguments to escape
--- @treturn string the escaped and concatenated arguments
function util.shell_escape(args)
    local escaped_args = {}
    for _, arg in ipairs(args) do
        arg = "'" .. arg:gsub("'", "'\\''") .. "'"
        table.insert(escaped_args, arg)
    end
    return table.concat(escaped_args, " ")
end

--- Clear all the elements from an array without reassignment.
--- @table t the array to be cleared
function util.clearTable(t)
    local c = #t
    for i = 0, c do t[i] = nil end
end

--- Encode URL also known as percent-encoding see https://en.wikipedia.org/wiki/Percent-encoding
--- @string text the string to encode
--- @treturn encode string
--- Taken from https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
function util.urlEncode(url)
    local char_to_hex = function(c)
        return string.format("%%%02X", string.byte(c))
    end
    if url == nil then
        return
    end
    url = url:gsub("\n", "\r\n")
    url = url:gsub("([^%w%-%.%_%~%!%*%'%(%)])", char_to_hex)
    return url
end

--- Decode URL (reverse process to util.urlEncode())
--- @string text the string to decode
--- @treturn decode string
--- Taken from https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
function util.urlDecode(url)
    local hex_to_char = function(x)
        return string.char(tonumber(x, 16))
    end
    if url == nil then
        return
    end
    url = url:gsub("%%(%x%x)", hex_to_char)
    return url
end

--- Check lua syntax of string
--- @string text lua code text
--- @treturn string with parsing error, nil if syntax ok
function util.checkLuaSyntax(lua_text)
    local lua_code_ok, err = loadstring(lua_text)
    if lua_code_ok then
        return nil
    end
    -- Replace: [string "blah blah..."]:3: '=' expected near '123'
    -- with: Line 3: '=' expected near '123'
    err = err and err:gsub("%[string \".-%\"]:", "Line ")
    return err
end

--- Simple startsWith string helper.
--
-- C.f., <http://lua-users.org/wiki/StringRecipes>.
-- @string str source string
-- @string start string to match
-- @treturn bool true on success
function util.stringStartsWith(str, start)
   return str:sub(1, #start) == start
end

--- Simple endsWith string helper.
-- @string str source string
-- @string ending string to match
-- @treturn bool true on success
function util.stringEndsWith(str, ending)
   return ending == "" or str:sub(-#ending) == ending
end

--- Search a string in a text.
-- @string or table txt Text (char list) to search in
-- @string str String to search for
-- @boolean case_sensitive
-- @number start_pos Position number in text to start search from
-- @treturn number Position number or 0 if not found
-- @treturn table Text char list
-- @treturn table Search string char list
function util.stringSearch(txt, str, case_sensitive, start_pos)
    if not case_sensitive then
        str = Utf8Proc.lowercase(util.fixUtf8(str, "?"))
    end
    local txt_charlist = type(txt) == "table" and txt or util.splitToChars(txt)
    local str_charlist = util.splitToChars(str)
    local str_len = #str_charlist
    local char_pos, found = 0, 0
    for i = start_pos - 1, #txt_charlist - str_len do
        for j = 1, str_len do
            local char_txt = txt_charlist[i + j]
            local char_str = str_charlist[j]
            if not case_sensitive then
                char_txt = Utf8Proc.lowercase(util.fixUtf8(char_txt, "?"))
            end
            if char_txt ~= char_str then
                found = 0
                break
            end
            found = found + 1
        end
        if found == str_len then
            char_pos = i + 1
            break
        end
    end
    -- Returned charlists are used in TextViewer find,
    -- to avoid double call of util.splitToChars()
    return char_pos, txt_charlist, str_charlist
end

local WrappedFunction_mt = {
    __call = function(self, ...)
        if self.before_callback then
            self.before_callback(self.target_table, ...)
        end
        if self.func then
            return self.func(...)
        end
    end,
}

--- Wrap (or replace) a table method with a custom method, in a revertable way.
-- This allows you extend the features of an existing module by modifying its
-- internal methods, and then revert them back to normal later if necessary.
--
-- The most notable use-case for this is VirtualKeyboard's inputbox method
-- wrapping to allow keyboards to add more complicated state-machines to modify
-- how characters are input.
--
-- The returned table is the same table `target_table[target_field_name]` is
-- set to. In addition to being callable, the new method has two sub-methods:
--
--  * `:revert()` will un-wrap the method and revert it to the original state.
--
--    Note that if a method is wrapped multiple times, reverting it will revert
--    it to the state of the method when util.wrapMethod was called (and if
--    called on the table returned from util.wrapMethod, that is the state when
--    that particular util.wrapMethod was called).
--
--  * `:raw_call(...)` will call the original method with the given arguments
--    and return whatever it returns.
--
--    This makes it more ergonomic to use the wrapped table methods in the case
--    where you've replaced the regular function with your own implementation
--    but you need to call the original functions inside your implementation.
--
--  * `:raw_method_call(...)` will call the original method with the arguments
--    `(target_table, ...)` and return whatever it returns. Note that the
--    target_table used is the one associated with the util.wrapMethod call.
--
--    This makes it more ergonomic to use the wrapped table methods in the case
--    where you've replaced the regular function with your own implementation
--    but you need to call the original functions inside your implementation.
--
--    This is effectively short-hand for `:raw_call(target_table, ...)`.
--
-- This is loosely based on busted/luassert's spies implementation (MIT).
--   <https://github.com/Olivine-Labs/luassert/blob/v1.7.11/src/spy.lua>
--
-- @tparam table target_table The table whose method will be wrapped.
-- @tparam string target_field_name The name of the field to wrap.
-- @tparam nil|func new_func If non-nil, this function will be called instead of the original function after wrapping.
-- @tparam nil|func before_callback If non-nil, this function will be called (with the arguments (target_table, ...)) before the function is called.
function util.wrapMethod(target_table, target_field_name, new_func, before_callback)
    local old_func = target_table[target_field_name]
    local wrapped = setmetatable({
        target_table = target_table,
        target_field_name = target_field_name,
        old_func = old_func,

        before_callback = before_callback,
        func = new_func or old_func,

        revert = function(self)
            if not self.reverted then
                self.target_table[self.target_field_name] = self.old_func
                self.reverted = true
            end
        end,

        raw_call = function(self, ...)
            if self.old_func then
                return self.old_func(...)
            end
        end,

        raw_method_call = function(self, ...)
            return self:raw_call(self.target_table, ...)
        end,
    }, WrappedFunction_mt)
    target_table[target_field_name] = wrapped
    return wrapped
end

-- Round a given "num" to the decimal points of "points"
-- (i.e. `round_decimal(0.000000001, 2)` will yield `0.00`)
function util.round_decimal(num, points)
    local op = 10 ^ points

    return math.floor(num * op) / op
end

return util
-												doc: add documentation build infrastructure

											
										
										
											2016-02-04 18:24:39 +00:00
+								--[[--
-												Doc: miscellaneous improvements.

											
										
										
											2016-12-13 16:06:02 +00:00
+								This module contains miscellaneous helper functions for the KOReader frontend.
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								]]
-												doc: add documentation build infrastructure

											
										
										
											2016-02-04 18:24:39 +00:00
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								local BaseUtil = require("ffi/util")
-												TextViewer: add Find (#9507)


											
										
										
											2022-09-13 21:09:49 +00:00
+								local Utf8Proc = require("ffi/utf8proc")
-												Misc: Get rid of the legacy defaults.lua globals (#9546)

* This removes support for the following deprecated constants: `DTAP_ZONE_FLIPPING`, `DTAP_ZONE_BOOKMARK`, `DCREREADER_CONFIG_DEFAULT_FONT_GAMMA`
* The "Advanced settings" panel now highlights modified values in bold (think about:config in Firefox ;)).
* LuaData: Isolate global table lookup shenanigans, and fix a few issues in unused-in-prod codepaths.
* CodeStyle: Require module locals for Lua/C modules, too.
* ScreenSaver: Actually garbage collect our widget on close (ScreenSaver itself is not an instantiated object).
* DateTimeWidget: Code cleanups to ensure child widgets can be GC'ed.
											
										
										
											2022-09-27 23:10:50 +00:00
+								local lfs = require("libs/libkoreader-lfs")
-												md5: centralize and deduplicate (#11003)

Document partial md5 hash is calculated by util.partialMD5() and stored in doc_settings as "partial_md5_checksum" on the first document opening.
											
										
										
											2023-10-15 04:47:09 +00:00
+								local md5 = require("ffi/sha2").md5
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								local _ = require("gettext")
-												DoubleSpinWidget, SpinWidget: add units, make usage more consistent (#9046)


											
										
										
											2022-05-23 22:25:50 +00:00
+								local C_ = _.pgettext
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								local T = BaseUtil.template
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								local lshift = bit.lshift
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								local rshift = bit.rshift
 								local band = bit.band
 								local bor = bit.bor
-												strip punctuations around word before searching
This should fix #1337.

											
										
										
											2015-02-01 09:40:34 +00:00
+								local util = {}
-												Show full ToC entry on hold (#6729)

Fix #6728
											
										
										
											2020-09-30 17:56:56 +00:00
+								---- Strips all punctuation marks and spaces from a string.
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								---- @string text the string to be stripped
 								---- @treturn string stripped text
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								function util.stripPunctuation(text)
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								    if not text then return end
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								    -- strip ASCII punctuation marks around text
 								    -- and strip any generic punctuation marks (U+2000 - U+206F) in the text
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								    return text:gsub("\226[\128-\131][\128-\191]", ''):gsub("^%p+", ''):gsub("%p+$", '')
-												strip punctuations around word before searching
This should fix #1337.

											
										
										
											2015-02-01 09:40:34 +00:00
+								end
-												Show full ToC entry on hold (#6729)

Fix #6728
											
										
										
											2020-09-30 17:56:56 +00:00
+								-- Various whitespace trimming helpers, from http://lua-users.org/wiki/CommonFunctions & http://lua-users.org/wiki/StringTrim
 								---- Remove leading whitespace from string.
 								---- @string s the string to be trimmed
 								---- @treturn string trimmed text
 								function util.ltrim(s)
 								    return (s:gsub("^%s*", ""))
 								end
 								---- Remove trailing whitespace from string.
 								---- @string s the string to be trimmed
 								---- @treturn string trimmed text
 								function util.rtrim(s)
 								    local n = #s
 								    while n > 0 and s:find("^%s", n) do
 								        n = n - 1
 								    end
 								    return s:sub(1, n)
 								end
 								---- Remove leading & trailing whitespace from string.
 								---- @string s the string to be trimmed
 								---- @treturn string trimmed text
 								function util.trim(s)
 								   local from = s:match"^%s*()"
 								   return from > #s and "" or s:match(".*%S", from)
 								end
-												WebDav: Simplify path manipulations to fix wonky heuristics (#12038)

This prevents the current directory from appearing in the listing.
											
										
										
											2024-06-20 17:46:03 +00:00
+								--[[
 								-- Trim leading & trailing character `c` from string `s`
 								function util.trim_char(s, c)
 								    local from = s:match"^"..c.."*()"
 								    return from > #s and "" or s:match(".*[^"..c.."]", from)
 								end
 								-- Trim trailing character `c` from string `s`
 								function util.rtrim_char(s, c)
 								    local n = #s
 								    while n > 0 and s:find("^"..c, n) do
 								        n = n - 1
 								    end
 								    return s:sub(1, n)
 								end
 								--]]
-												util: add some LDoc descriptions

											
										
										
											2017-02-25 17:52:34 +00:00
+								--[[--
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								Splits a string by a pattern
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								Lua doesn't have a string.split() function and most of the time
 								you don't really need it because string.gmatch() is enough.
 								However string.gmatch() has one significant disadvantage for me:
 								You can't split a string while matching both the delimited
 								strings and the delimiters themselves without tracking positions
 								and substrings. The gsplit function below takes care of
 								this problem.
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								Author: Peter Odding
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								License: MIT/X11
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
 								Source: <a href="http://snippets.luacode.org/snippets/String_splitting_130">http://snippets.luacode.org/snippets/String_splitting_130</a>
-												util: add some LDoc descriptions

											
										
										
											2017-02-25 17:52:34 +00:00
+								]]
 								----@string str string to split
 								----@param pattern the pattern to split against
 								----@bool capture
-												Merge various information into systemstat (#2764)

* Merge various information to systemstat
											
										
										
											2017-04-14 19:12:28 +00:00
+								----@bool capture_empty_entity
 								function util.gsplit(str, pattern, capture, capture_empty_entity)
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								    pattern = pattern and tostring(pattern) or '%s+'
 								    if (''):find(pattern) then
 								        error('pattern matches empty string!', 2)
 								    end
 								    return coroutine.wrap(function()
 								        local index = 1
 								        repeat
 								            local first, last = str:find(pattern, index)
 								            if first and last then
-												Merge various information into systemstat (#2764)

* Merge various information to systemstat
											
										
										
											2017-04-14 19:12:28 +00:00
+								                if index < first or (index == first and capture_empty_entity) then
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								                    coroutine.yield(str:sub(index, first - 1))
 								                end
 								                if capture then
 								                    coroutine.yield(str:sub(first, last))
 								                end
 								                index = last + 1
 								            else
 								                if index <= #str then
 								                    coroutine.yield(str:sub(index))
 								                end
 								                break
 								            end
 								        until index > #str
 								    end)
 								end
-												[feat] Add ReaderBack (#3821)

This implements a reasonable facsimile of going back on Android.

The back button first goes back in a history of visited pages.
When there's no history left, it closes the app.

Fixes #3816.
											
										
										
											2018-03-31 19:19:31 +00:00
+								--[[--
 								Compares values in two different tables.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								Source: <https://stackoverflow.com/a/32660766/2470572>
-												[feat] Add ReaderBack (#3821)

This implements a reasonable facsimile of going back on Android.

The back button first goes back in a history of visited pages.
When there's no history left, it closes the app.

Fixes #3816.
											
										
										
											2018-03-31 19:19:31 +00:00
+								]]
 								---- @param o1 Lua table
 								---- @param o2 Lua table
 								---- @bool ignore_mt
 								---- @treturn boolean
 								function util.tableEquals(o1, o2, ignore_mt)
 								    if o1 == o2 then return true end
 								    local o1Type = type(o1)
 								    local o2Type = type(o2)
 								    if o1Type ~= o2Type then return false end
 								    if o1Type ~= 'table' then return false end
 								    if not ignore_mt then
 								        local mt1 = getmetatable(o1)
 								        if mt1 and mt1.__eq then
-												Misc: Get rid of the legacy defaults.lua globals (#9546)

* This removes support for the following deprecated constants: `DTAP_ZONE_FLIPPING`, `DTAP_ZONE_BOOKMARK`, `DCREREADER_CONFIG_DEFAULT_FONT_GAMMA`
* The "Advanced settings" panel now highlights modified values in bold (think about:config in Firefox ;)).
* LuaData: Isolate global table lookup shenanigans, and fix a few issues in unused-in-prod codepaths.
* CodeStyle: Require module locals for Lua/C modules, too.
* ScreenSaver: Actually garbage collect our widget on close (ScreenSaver itself is not an instantiated object).
* DateTimeWidget: Code cleanups to ensure child widgets can be GC'ed.
											
										
										
											2022-09-27 23:10:50 +00:00
+								            -- Compare using built in method
-												[feat] Add ReaderBack (#3821)

This implements a reasonable facsimile of going back on Android.

The back button first goes back in a history of visited pages.
When there's no history left, it closes the app.

Fixes #3816.
											
										
										
											2018-03-31 19:19:31 +00:00
+								            return o1 == o2
 								        end
 								    end
 								    local keySet = {}
 								    for key1, value1 in pairs(o1) do
 								        local value2 = o2[key1]
 								        if value2 == nil or util.tableEquals(value1, value2, ignore_mt) == false then
 								            return false
 								        end
 								        keySet[key1] = true
 								    end
 								    for key2, _ in pairs(o2) do
 								        if not keySet[key2] then return false end
 								    end
 								    return true
 								end
-												[fix] GestureDetector: deep copies of events for multiswipes when rotated (#4728)

Fixes #4724.
											
										
										
											2019-03-04 18:01:01 +00:00
+								--[[--
 								Makes a deep copy of a table.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								Source: <https://stackoverflow.com/a/16077650/2470572>
-												[fix] GestureDetector: deep copies of events for multiswipes when rotated (#4728)

Fixes #4724.
											
										
										
											2019-03-04 18:01:01 +00:00
+								]]
 								---- @param o Lua table
 								---- @treturn Lua table
 								function util.tableDeepCopy(o, seen)
 								  seen = seen or {}
 								  if o == nil then return nil end
 								  if seen[o] then return seen[o] end
 								  local no
 								  if type(o) == "table" then
 								    no = {}
 								    seen[o] = no
 								    for k, v in next, o, nil do
 								      no[util.tableDeepCopy(k, seen)] = util.tableDeepCopy(v, seen)
 								    end
 								    setmetatable(no, util.tableDeepCopy(getmetatable(o), seen))
 								  else -- number, string, boolean, etc
 								    no = o
 								  end
 								  return no
 								end
-												doc: add documentation build infrastructure

											
										
										
											2016-02-04 18:24:39 +00:00
+								--- Returns number of keys in a table.
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								---- @param t Lua table
 								---- @treturn int number of keys in table t
 								function util.tableSize(t)
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								    local count = 0
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								    for _ in pairs(t) do count = count + 1 end
-												#1723 Add time to read into the status bar
Add new statuses:
TB - book time to read
TC - chapter time to read

Fix backward compatible in statistics plugin

											
										
										
											2015-11-27 15:13:01 +00:00
+								    return count
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Append all elements from t2 into t1.
 								---- @param t1 Lua table
 								---- @param t2 Lua table
-												feat(util): add array.Append helper

											
										
										
											2016-01-31 22:23:44 +00:00
+								function util.arrayAppend(t1, t2)
-												#1710 FR: Add support of statistics plugin for pdf

											
										
										
											2016-02-12 14:55:02 +00:00
+								    for _, v in ipairs(t2) do
-												feat(util): add array.Append helper

											
										
										
											2016-01-31 22:23:44 +00:00
+								        table.insert(t1, v)
 								    end
 								end
-												LuaSettings: Add a method to initialize a setting properly (#7371)

* LuaSettings/DocSettings: Updated readSetting API to allow proper initialization to default.
Use it to initialize tables, e.g., fixing corner-cases in readerFooter that could prevent settings from being saved.
(Fixes an issue reported on Gitter).
* LuaSettings/DocSettings: Add simpler API than the the flip* ones to toggle boolean settings.
* Update LuaSettings/DocSettigns usage throughout the codebase to use the dedicated boolean methods wher appropriate, and clean up some of the more mind-bending uses.
* FileChooser: Implement an extended default exclusion list (fix #2360)
* ScreenSaver: Refactor to avoid the pile of kludges this was threatening to become. Code should be easier to follow and use, and fallbacks now behave as expected (fix #4418).
											
										
										
											2021-03-06 21:44:18 +00:00
+								--[[--
 								Remove elements from an array, fast.
 								Swap & pop, like <http://lua-users.org/lists/lua-l/2013-11/msg00027.html> / <https://stackoverflow.com/a/28942022>, but preserving order.
 								c.f., <https://stackoverflow.com/a/53038524>
 								@table t Lua array to filter
 								@func keep_cb Filtering callback. Takes three arguments: table, index, new index. Returns true to *keep* the item. See link above for potential uses of the third argument.
 								@usage
 								local foo = { "a", "b", "c", "b", "d", "e" }
 								local function drop_b(t, i, j)
 								    -- Discard any item with value "b"
 								    return t[i] ~= "b"
 								end
 								util.arrayRemove(foo, drop_b)
 								]]
 								function util.arrayRemove(t, keep_cb)
 								    local j, n = 1, #t
 								    for i = 1, n do
 								        if keep_cb(t, i, j) then
 								            -- Move i's kept value to j's position, if it's not already there.
 								            if i ~= j then
 								                t[j] = t[i]
 								                t[i] = nil
 								            end
 								            -- Increment position of where we'll place the next kept value.
 								            j = j + 1
 								        else
 								            t[i] = nil
 								        end
 								    end
 								    return t
 								end
-												Paged documents: rework zoom options (#6885)

- Move zoom options from top menu to bottom config
- Add option to manually define zoom (relative to
  page width) and overlap (in percent)
- Add options to zoom to columns or rows, possibly
  with overlap. Add panning direction options when
  page forward in these modes
											
										
										
											2020-11-28 16:18:57 +00:00
+								--- Reverse array elements in-place in table t
-												[RTL UI] update low-level widgets to handle mirroring

These updated low-level widgets will handle 90%
of the needed UI mirroring.

											
										
										
											2019-12-06 21:55:37 +00:00
+								---- @param t Lua table
 								function util.arrayReverse(t)
 								    local i, j = 1, #t
 								    while i < j do
 								        t[i], t[j] = t[j], t[i]
 								        i = i + 1
 								        j = j - 1
 								    end
 								end
-												Paged documents: rework zoom options (#6885)

- Move zoom options from top menu to bottom config
- Add option to manually define zoom (relative to
  page width) and overlap (in percent)
- Add options to zoom to columns or rows, possibly
  with overlap. Add panning direction options when
  page forward in these modes
											
										
										
											2020-11-28 16:18:57 +00:00
+								--- Test whether t contains a value equal to v
 								--- (or such a value that callback returns true),
 								--- and if so, return the index.
 								---- @param t Lua table
 								---- @param v
-												LuaSettings: Add a method to initialize a setting properly (#7371)

* LuaSettings/DocSettings: Updated readSetting API to allow proper initialization to default.
Use it to initialize tables, e.g., fixing corner-cases in readerFooter that could prevent settings from being saved.
(Fixes an issue reported on Gitter).
* LuaSettings/DocSettings: Add simpler API than the the flip* ones to toggle boolean settings.
* Update LuaSettings/DocSettigns usage throughout the codebase to use the dedicated boolean methods wher appropriate, and clean up some of the more mind-bending uses.
* FileChooser: Implement an extended default exclusion list (fix #2360)
* ScreenSaver: Refactor to avoid the pile of kludges this was threatening to become. Code should be easier to follow and use, and fallbacks now behave as expected (fix #4418).
											
										
										
											2021-03-06 21:44:18 +00:00
+								---- @func callback(v1, v2)
-												Paged documents: rework zoom options (#6885)

- Move zoom options from top menu to bottom config
- Add option to manually define zoom (relative to
  page width) and overlap (in percent)
- Add options to zoom to columns or rows, possibly
  with overlap. Add panning direction options when
  page forward in these modes
											
										
										
											2020-11-28 16:18:57 +00:00
+								function util.arrayContains(t, v, cb)
 								    cb = cb or function(v1, v2) return v1 == v2 end
 								    for _k, _v in ipairs(t) do
 								        if cb(_v, v) then
 								            return _k
 								        end
 								    end
 								    return false
 								end
-												Revamp "flash_ui" handling (#7118)

* Wherever possible, do an actual dumb invert on the Screen BB instead of repainting the widget, *then* inverting it (which is what the "invert" flag does).
* Instead of playing with nextTick/tickAfterNext delays, explicitly fence stuff with forceRePaint
* And, in the few cases where said Mk. 7 quirk kicks in, make the fences more marked by using a well-placed WAIT_FOR_UPDATE_COMPLETE

* Fix an issue in Button where show/hide & enable/disable where actually all toggles, which meant that duplicate calls or timing issues would do the wrong thing. (This broke dimming some icons, and mistakenly dropped the background from FM chevrons, for example).
* Speaking of, fix Button's hide/show to actually restore the background properly (there was a stupid typo in the variable name)
* Still in Button, fix the insanity of the double repaint on rounded buttons. Turns out it made sense, after all (and was related to said missing background, and bad interaction with invert & text with no background).
* KeyValuePage suffered from a similar issue with broken highlights (all black) because of missing backgrounds.
* In ConfigDialog, only instanciate IconButtons once (because every tab switch causes a full instantiation; and the initial display implies a full instanciation and an initial tab switch). Otherwise, both instances linger, and catch taps, and as such, double highlights.
* ConfigDialog: Restore the "don't repaint ReaderUI" when switching between similarly sized tabs (re #6131). I never could reproduce that on eInk, and I can't now on the emulator, so I'm assuming @poire-z fixed it during the swap to SVG icons.
* KeyValuePage: Only instanciate Buttons once (again, this is a widget that goes through a full init every page). Again, caused highlight/dimming issues because buttons were stacked.
* Menu: Ditto.
* TouchMenu: Now home of the gnarliest unhilight heuristics, because of the sheer amount of different things that can happen (and/or thanks to stuff not flagged covers_fullscreen properly ;p).

* Bump base
https://github.com/koreader/koreader-base/pull/1280
https://github.com/koreader/koreader-base/pull/1282
https://github.com/koreader/koreader-base/pull/1283
https://github.com/koreader/koreader-base/pull/1284

* Bump android-luajit-launcher
https://github.com/koreader/android-luajit-launcher/pull/284
https://github.com/koreader/android-luajit-launcher/pull/285
https://github.com/koreader/android-luajit-launcher/pull/286
https://github.com/koreader/android-luajit-launcher/pull/287
											
										
										
											2021-01-10 00:51:09 +00:00
+								--- Test whether array t contains a reference to array n (at any depth at or below m)
 								---- @param t Lua table (array only)
 								---- @param n Lua table (array only)
 								---- @int m Max nesting level
 								function util.arrayReferences(t, n, m, l)
 								    if not m then m = 15 end
 								    if not l then l = 0 end
 								    if l > m then
 								        return false
 								    end
 								    if type(t) == "table" then
 								        if t == n then
 								            return true, l
 								        end
 								        for _, v in ipairs(t) do
 								            local matched, depth = util.arrayReferences(v, n, m, l + 1)
 								            if matched then
 								                return matched, depth
 								            end
 								        end
 								    end
 								    return false
 								end
-												util: Add a set of binary searches implementation (#9614)


											
										
										
											2022-10-10 13:16:16 +00:00
+								-- A set of binary search implementations for plain arrays.
 								-- Should be easy to tweak for arrays of hashes (c.f., UIManager:schedule),
 								-- or arrays sorted in descending order (c.f., ReadHistory).
 								-- refs: https://en.wikipedia.org/wiki/Binary_search_algorithm
 								--       https://rosettacode.org/wiki/Binary_search
 								--- Perform a binary search for `value` in a *sorted* (ascending) `array`.
 								---- @param array Lua table (array only, sorted, ascending, every value must match the type of `value` and support comparison operators)
 								---- @param value
 								---- @return int index of value in array, or a (nil, insertion index) tuple if value was not found.
 								function util.bsearch(array, value)
 								    local lo = 1
 								    local hi = #array
 								    while lo <= hi do
 								        -- invariants: value > array[i] for all i < lo
 								        --             value < array[i] for all i > hi
 								        local mid = bit.rshift(lo + hi, 1)
 								        if array[mid] > value then
 								            hi = mid - 1
 								        elseif array[mid] < value then
 								            lo = mid + 1
 								        else
 								            return mid
 								        end
 								    end
 								    return nil, lo
 								end
 								--- Perform a leftmost insertion binary search for `value` in a *sorted* (ascending) `array`.
 								---- @param array Lua table (array only, sorted, ascending, every value must match the type of `value` and support comparison operators)
 								---- @param value
 								---- @return int leftmost insertion index of value in array.
 								function util.bsearch_left(array, value)
 								    local lo = 1
 								    local hi = #array
 								    while lo <= hi do
 								        -- invariants: value > array[i] for all i < lo
 								        --             value <= array[i] for all i > hi
 								        local mid = bit.rshift(lo + hi, 1)
 								        if array[mid] >= value then
 								            hi = mid - 1
 								        else
 								            lo = mid + 1
 								        end
 								    end
 								    return lo
 								end
 								--- Perform a rightmost insertion binary search for `value` in a *sorted* (ascending) `array`.
 								---- @param array Lua table (array only, sorted, ascending, every value must match the type of `value` and support comparison operators)
 								---- @param value
 								---- @return int rightmost insertion index of value in array.
 								function util.bsearch_right(array, value)
 								    local lo = 1
 								    local hi = #array
 								    while lo <= hi do
 								        -- invariants: value >= array[i] for all i < low
 								        --             value < array[i] for all i > high
 								        local mid = bit.rshift(lo + hi, 1)
 								        if array[mid] > value then
 								            hi = mid - 1
 								        else
 								            lo = mid + 1
 								        end
 								    end
 								    return lo
 								end
-												Some BookStatus QoL tweaks (#5100)

* Make the cover thumbnail respect the cover's AR in the widget
* Add a "Mark as read/unread" button in the FM's longpress menu.
* Make sure the cover_info cache is wiped if necessary (sidecar purge/BookInfo cache clear).

											
										
										
											2019-06-28 02:46:16 +00:00
+								-- Merge t2 into t1, overwriting existing elements if they already exist
 								-- Probably not safe with nested tables (c.f., https://stackoverflow.com/q/1283388)
 								---- @param t1 Lua table
 								---- @param t2 Lua table
 								function util.tableMerge(t1, t2)
 								    for k, v in pairs(t2) do
 								        t1[k] = v
 								    end
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--[[--
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								Gets last index of character in string (i.e., strrchr)
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
 								Returns the index within this string of the last occurrence of the specified character
 								or -1 if the character does not occur.
 								To find . you need to escape it.
 								]]
 								---- @string string
 								---- @string ch
 								---- @treturn int last occurrence or -1 if not found
-												#1710 FR: Add support of statistics plugin for pdf

											
										
										
											2016-02-12 14:55:02 +00:00
+								function util.lastIndexOf(string, ch)
 								    local i = string:match(".*" .. ch .. "()")
 								    if i == nil then return -1 else return i - 1 end
 								end
-												util.utf8: improve CJK character detection

Previously the CJK character detection defined only characters in the
range U+4000..U+AFFF as "CJK characters". This excludes an incredibly
large number of CJK characters within the BMP, let alone the whole two
planes dedicated to rarer CJK characters (the SIP and TIP). As a result,
a very large number of Chinese, Japanese, and Korean characters were not
detected as being CJK characters.

While slightly less elegant-looking, it is far more accurate to compute
the codepoint from the utf8 character and then see if it falls within
one of the defined CJK blocks. This is not future-proof against future
CJK ideograph extensions in future Unicode versions, but there is no
real way to accurately predict such changes so this is the best we can
do without accidentally treating characters explicitily defined as being
non-CJK in Unicode as CJK.

While we're at it, copy Lua 5.3's utf8.charpattern constant definition
so that we can more easily write utf8 iterators with string.gmatch (at
least in the interim until there is a rework of utf8 handling in
KOReader and everything is rebuilt on top of utf8proc).

Some unit tests are added for Korean and Japanese text, and the existing
unit tests needed a minor adjustment to handle the fact that
isSplittable now correctly detects CJK punctuation as a character to
compare against the forbidden split rules.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>

											
										
										
											2021-10-23 10:12:38 +00:00
+								--- Pattern which matches a single well-formed UTF-8 character, including
 								--- theoretical >4-byte extensions.
 								-- Taken from <https://www.lua.org/manual/5.4/manual.html#pdf-utf8.charpattern>
 								util.UTF8_CHAR_PATTERN = '[%z\1-\127\194-\253][\128-\191]*'
-												Fix folder path truncation in filemanager with utf8 chars (#3599)



											
										
										
											2018-01-13 23:05:05 +00:00
+								--- Reverse the individual greater-than-single-byte characters
 								-- @string string to reverse
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								-- Taken from <https://github.com/blitmap/lua-utf8-simple#utf8reverses>
-												Fix folder path truncation in filemanager with utf8 chars (#3599)



											
										
										
											2018-01-13 23:05:05 +00:00
+								function util.utf8Reverse(text)
-												util.utf8: improve CJK character detection

Previously the CJK character detection defined only characters in the
range U+4000..U+AFFF as "CJK characters". This excludes an incredibly
large number of CJK characters within the BMP, let alone the whole two
planes dedicated to rarer CJK characters (the SIP and TIP). As a result,
a very large number of Chinese, Japanese, and Korean characters were not
detected as being CJK characters.

While slightly less elegant-looking, it is far more accurate to compute
the codepoint from the utf8 character and then see if it falls within
one of the defined CJK blocks. This is not future-proof against future
CJK ideograph extensions in future Unicode versions, but there is no
real way to accurately predict such changes so this is the best we can
do without accidentally treating characters explicitily defined as being
non-CJK in Unicode as CJK.

While we're at it, copy Lua 5.3's utf8.charpattern constant definition
so that we can more easily write utf8 iterators with string.gmatch (at
least in the interim until there is a rework of utf8 handling in
KOReader and everything is rebuilt on top of utf8proc).

Some unit tests are added for Korean and Japanese text, and the existing
unit tests needed a minor adjustment to handle the fact that
isSplittable now correctly detects CJK punctuation as a character to
compare against the forbidden split rules.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>

											
										
										
											2021-10-23 10:12:38 +00:00
+								    text = text:gsub(util.UTF8_CHAR_PATTERN, function (c) return #c > 1 and c:reverse() end)
-												Fix folder path truncation in filemanager with utf8 chars (#3599)



											
										
										
											2018-01-13 23:05:05 +00:00
+								    return text:reverse()
 								end
-												add cursor functionality

											
										
										
											2016-04-21 14:13:10 +00:00
-												Doc: miscellaneous improvements.

											
										
										
											2016-12-13 16:06:02 +00:00
+								--- Splits string into a list of UTF-8 characters.
 								---- @string text the string to be split.
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								---- @treturn table list of UTF-8 chars
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								function util.splitToChars(text)
 								    local tab = {}
 								    if text ~= nil then
 								        local prevcharcode, charcode = 0
-												util.splitToChars(): supports text encoded in WTF-8

https://en.wikipedia.org/wiki/UTF-8#WTF-8
WTF-8 is a superset of UTF-8, that includes UTF-16 surrogates
in UTF-8 bytes (forbidden in well-formed UTF-8).
We may get UTF-8 with these from bad producers or converters.

We can get such chars in the text we get from Wikipedia API once
their (fully valid) JSON has been decoded by our lpeg-based JSON
decoder (which is a defect, hard to fix). (Our other pure-Lua json
decoder has no problem and do that correctly).
We might also find these WTF-8 in some dictionaries, so let's
support them.

											
										
										
											2019-01-15 17:36:33 +00:00
+								        -- Supports WTF-8 : https://en.wikipedia.org/wiki/UTF-8#WTF-8
 								        -- a superset of UTF-8, that includes UTF-16 surrogates
 								        -- in UTF-8 bytes (forbidden in well-formed UTF-8).
 								        -- We may get that from bad producers or converters.
 								        -- (luajson, used to decode Wikipedia API json, will not correctly decode
 								        -- this sample: <span lang=\"got\">\ud800\udf45</span> : single Unicode
 								        -- char https://www.compart.com/en/unicode/U+10345 and will give us
 								        -- "\xed\xa0\x80\xed\xbd\x85" as UTF8, instead of the correct "\xf0\x90\x8d\x85")
 								        -- From http://www.unicode.org/faq/utf_bom.html#utf16-1
 								        --   Surrogates are code points from two special ranges of
 								        --   Unicode values, reserved for use as the leading, and
 								        --   trailing values of paired code units in UTF-16. Leading,
 								        --   also called high, surrogates are from D800 to DBFF, and
 								        --   trailing, or low, surrogates are from DC00 to DFFF. They
 								        --   are called surrogates, since they do not represent
 								        --   characters directly, but only as a pair.
 								        local hi_surrogate
 								        local hi_surrogate_uchar
-												util.utf8: improve CJK character detection

Previously the CJK character detection defined only characters in the
range U+4000..U+AFFF as "CJK characters". This excludes an incredibly
large number of CJK characters within the BMP, let alone the whole two
planes dedicated to rarer CJK characters (the SIP and TIP). As a result,
a very large number of Chinese, Japanese, and Korean characters were not
detected as being CJK characters.

While slightly less elegant-looking, it is far more accurate to compute
the codepoint from the utf8 character and then see if it falls within
one of the defined CJK blocks. This is not future-proof against future
CJK ideograph extensions in future Unicode versions, but there is no
real way to accurately predict such changes so this is the best we can
do without accidentally treating characters explicitily defined as being
non-CJK in Unicode as CJK.

While we're at it, copy Lua 5.3's utf8.charpattern constant definition
so that we can more easily write utf8 iterators with string.gmatch (at
least in the interim until there is a rework of utf8 handling in
KOReader and everything is rebuilt on top of utf8proc).

Some unit tests are added for Korean and Japanese text, and the existing
unit tests needed a minor adjustment to handle the fact that
isSplittable now correctly detects CJK punctuation as a character to
compare against the forbidden split rules.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>

											
										
										
											2021-10-23 10:12:38 +00:00
+								        for uchar in text:gmatch(util.UTF8_CHAR_PATTERN) do
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								            charcode = BaseUtil.utf8charcode(uchar)
-												util.splitToChars(): supports text encoded in WTF-8

https://en.wikipedia.org/wiki/UTF-8#WTF-8
WTF-8 is a superset of UTF-8, that includes UTF-16 surrogates
in UTF-8 bytes (forbidden in well-formed UTF-8).
We may get UTF-8 with these from bad producers or converters.

We can get such chars in the text we get from Wikipedia API once
their (fully valid) JSON has been decoded by our lpeg-based JSON
decoder (which is a defect, hard to fix). (Our other pure-Lua json
decoder has no problem and do that correctly).
We might also find these WTF-8 in some dictionaries, so let's
support them.

											
										
										
											2019-01-15 17:36:33 +00:00
+								            -- (not sure why we need this prevcharcode check; we could get
 								            -- charcode=nil with invalid UTF-8, but should we then really
 								            -- ignore the following charcode ?)
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								            if prevcharcode then -- utf8
-												util.splitToChars(): supports text encoded in WTF-8

https://en.wikipedia.org/wiki/UTF-8#WTF-8
WTF-8 is a superset of UTF-8, that includes UTF-16 surrogates
in UTF-8 bytes (forbidden in well-formed UTF-8).
We may get UTF-8 with these from bad producers or converters.

We can get such chars in the text we get from Wikipedia API once
their (fully valid) JSON has been decoded by our lpeg-based JSON
decoder (which is a defect, hard to fix). (Our other pure-Lua json
decoder has no problem and do that correctly).
We might also find these WTF-8 in some dictionaries, so let's
support them.

											
										
										
											2019-01-15 17:36:33 +00:00
+								                if charcode and charcode >= 0xD800 and charcode <= 0xDBFF then
 								                    if hi_surrogate then -- previous unconsumed one, add it even if invalid
 								                        table.insert(tab, hi_surrogate_uchar)
 								                    end
 								                    hi_surrogate = charcode
 								                    hi_surrogate_uchar = uchar -- will be added if not followed by low surrogate
 								                elseif hi_surrogate and charcode and charcode >= 0xDC00 and charcode <= 0xDFFF then
 								                    -- low surrogate following a high surrogate, good, let's make them a single char
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								                    charcode = lshift((hi_surrogate - 0xD800), 10) + (charcode - 0xDC00) + 0x10000
-												util.splitToChars(): supports text encoded in WTF-8

https://en.wikipedia.org/wiki/UTF-8#WTF-8
WTF-8 is a superset of UTF-8, that includes UTF-16 surrogates
in UTF-8 bytes (forbidden in well-formed UTF-8).
We may get UTF-8 with these from bad producers or converters.

We can get such chars in the text we get from Wikipedia API once
their (fully valid) JSON has been decoded by our lpeg-based JSON
decoder (which is a defect, hard to fix). (Our other pure-Lua json
decoder has no problem and do that correctly).
We might also find these WTF-8 in some dictionaries, so let's
support them.

											
										
										
											2019-01-15 17:36:33 +00:00
+								                    table.insert(tab, util.unicodeCodepointToUtf8(charcode))
 								                    hi_surrogate = nil
 								                else
 								                    if hi_surrogate then -- previous unconsumed one, add it even if invalid
 								                        table.insert(tab, hi_surrogate_uchar)
 								                    end
 								                    hi_surrogate = nil
 								                    table.insert(tab, uchar)
 								                end
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								            end
 								            prevcharcode = charcode
-												add cursor functionality

											
										
										
											2016-04-21 14:13:10 +00:00
+								        end
 								    end
-												fix function util.splitToChars in frontend/util.lua

											
										
										
											2016-05-22 15:59:28 +00:00
+								    return tab
-												add cursor functionality

											
										
										
											2016-04-21 14:13:10 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Tests whether c is a CJK character
 								---- @string c
 								---- @treturn boolean true if CJK
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								function util.isCJKChar(c)
-												util.utf8: improve CJK character detection

Previously the CJK character detection defined only characters in the
range U+4000..U+AFFF as "CJK characters". This excludes an incredibly
large number of CJK characters within the BMP, let alone the whole two
planes dedicated to rarer CJK characters (the SIP and TIP). As a result,
a very large number of Chinese, Japanese, and Korean characters were not
detected as being CJK characters.

While slightly less elegant-looking, it is far more accurate to compute
the codepoint from the utf8 character and then see if it falls within
one of the defined CJK blocks. This is not future-proof against future
CJK ideograph extensions in future Unicode versions, but there is no
real way to accurately predict such changes so this is the best we can
do without accidentally treating characters explicitily defined as being
non-CJK in Unicode as CJK.

While we're at it, copy Lua 5.3's utf8.charpattern constant definition
so that we can more easily write utf8 iterators with string.gmatch (at
least in the interim until there is a rework of utf8 handling in
KOReader and everything is rebuilt on top of utf8proc).

Some unit tests are added for Korean and Japanese text, and the existing
unit tests needed a minor adjustment to handle the fact that
isSplittable now correctly detects CJK punctuation as a character to
compare against the forbidden split rules.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>

											
										
										
											2021-10-23 10:12:38 +00:00
+								    -- Smallest CJK codepoint is 0x1100 which requires at least 3 utf8 bytes to
 								    -- encode (U+07FF is the largest codepoint that can be represented in 2
 								    -- bytes with utf8). So if the character is shorter than 3 bytes it's
 								    -- definitely not CJK and no need to decode it.
 								    if #c < 3 then
 								        return false
 								    end
-												*: luacheck fixes (#8368)

These weren't caught during the Japanese support plugin review.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
											
										
										
											2021-10-24 08:58:14 +00:00
+								    local code = BaseUtil.utf8charcode(c)
-												util.utf8: improve CJK character detection

Previously the CJK character detection defined only characters in the
range U+4000..U+AFFF as "CJK characters". This excludes an incredibly
large number of CJK characters within the BMP, let alone the whole two
planes dedicated to rarer CJK characters (the SIP and TIP). As a result,
a very large number of Chinese, Japanese, and Korean characters were not
detected as being CJK characters.

While slightly less elegant-looking, it is far more accurate to compute
the codepoint from the utf8 character and then see if it falls within
one of the defined CJK blocks. This is not future-proof against future
CJK ideograph extensions in future Unicode versions, but there is no
real way to accurately predict such changes so this is the best we can
do without accidentally treating characters explicitily defined as being
non-CJK in Unicode as CJK.

While we're at it, copy Lua 5.3's utf8.charpattern constant definition
so that we can more easily write utf8 iterators with string.gmatch (at
least in the interim until there is a rework of utf8 handling in
KOReader and everything is rebuilt on top of utf8proc).

Some unit tests are added for Korean and Japanese text, and the existing
unit tests needed a minor adjustment to handle the fact that
isSplittable now correctly detects CJK punctuation as a character to
compare against the forbidden split rules.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>

											
										
										
											2021-10-23 10:12:38 +00:00
+								    -- The weird bracketing is intentional -- we use the lowest possible
 								    -- codepoint as a shortcut so if the codepoint is below U+1100 we
 								    -- immediately return false.
 								    return -- BMP (Plane 0)
 								            code >=  0x1100 and (code <=  0x11FF  or -- Hangul Jamo
 								           (code >=  0x2E80 and  code <=  0x9FFF) or -- Numerous CJK Blocks (NB: has some gaps)
 								           (code >=  0xA960 and  code <=  0xA97F) or -- Hangul Jamo Extended-A
 								           (code >=  0xAC00 and  code <=  0xD7AF) or -- Hangul Syllables
 								           (code >=  0xD7B0 and  code <=  0xD7FF) or -- Hangul Jame Extended-B
 								           (code >=  0xF900 and  code <=  0xFAFF) or -- CJK Compatibility Ideographs
 								           (code >=  0xFE30 and  code <=  0xFE4F) or -- CJK Compatibility Forms
 								           (code >=  0xFF00 and  code <=  0xFFEF) or -- Halfwidth and Fullwidth Forms
 								           -- SIP (Plane 2)
 								           (code >= 0x20000 and  code <= 0x2A6DF) or -- CJK Unified Ideographs Extension B
 								           (code >= 0x2A700 and  code <= 0x2B73F) or -- CJK Unified Ideographs Extension C
 								           (code >= 0x2B740 and  code <= 0x2B81F) or -- CJK Unified Ideographs Extension D
 								           (code >= 0x2B820 and  code <= 0x2CEAF) or -- CJK Unified Ideographs Extension E
 								           (code >= 0x2CEB0 and  code <= 0x2EBEF) or -- CJK Unified Ideographs Extension F
 								           (code >= 0x2F800 and  code <= 0x2FA1F) or -- CJK Compatibility Ideographs Supplement
 								           -- TIP (Plane 3)
 								           (code >= 0x30000 and  code <= 0x3134F))   -- CJK Unified Ideographs Extension G
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Tests whether str contains CJK characters
 								---- @string str
 								---- @treturn boolean true if CJK
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								function util.hasCJKChar(str)
-												util.utf8: improve CJK character detection

Previously the CJK character detection defined only characters in the
range U+4000..U+AFFF as "CJK characters". This excludes an incredibly
large number of CJK characters within the BMP, let alone the whole two
planes dedicated to rarer CJK characters (the SIP and TIP). As a result,
a very large number of Chinese, Japanese, and Korean characters were not
detected as being CJK characters.

While slightly less elegant-looking, it is far more accurate to compute
the codepoint from the utf8 character and then see if it falls within
one of the defined CJK blocks. This is not future-proof against future
CJK ideograph extensions in future Unicode versions, but there is no
real way to accurately predict such changes so this is the best we can
do without accidentally treating characters explicitily defined as being
non-CJK in Unicode as CJK.

While we're at it, copy Lua 5.3's utf8.charpattern constant definition
so that we can more easily write utf8 iterators with string.gmatch (at
least in the interim until there is a rework of utf8 handling in
KOReader and everything is rebuilt on top of utf8proc).

Some unit tests are added for Korean and Japanese text, and the existing
unit tests needed a minor adjustment to handle the fact that
isSplittable now correctly detects CJK punctuation as a character to
compare against the forbidden split rules.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>

											
										
										
											2021-10-23 10:12:38 +00:00
+								    for c in str:gmatch(util.UTF8_CHAR_PATTERN) do
 								        if util.isCJKChar(c) then
 								            return true
 								        end
 								    end
 								    return false
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								end
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								--- Split texts into a list of words, spaces and punctuation marks.
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								---- @string text text to split
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								---- @treturn table list of words, spaces and punctuation marks
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								function util.splitToWords(text)
 								    local wlist = {}
-												split accient greek words with spacing character
This should fix #1705.

											
										
										
											2016-06-28 15:50:21 +00:00
+								    for word in util.gsplit(text, "[%s%p]+", true) do
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								        -- if space split word contains CJK characters
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								        if util.hasCJKChar(word) then
-												util.utf8: improve CJK character detection

Previously the CJK character detection defined only characters in the
range U+4000..U+AFFF as "CJK characters". This excludes an incredibly
large number of CJK characters within the BMP, let alone the whole two
planes dedicated to rarer CJK characters (the SIP and TIP). As a result,
a very large number of Chinese, Japanese, and Korean characters were not
detected as being CJK characters.

While slightly less elegant-looking, it is far more accurate to compute
the codepoint from the utf8 character and then see if it falls within
one of the defined CJK blocks. This is not future-proof against future
CJK ideograph extensions in future Unicode versions, but there is no
real way to accurately predict such changes so this is the best we can
do without accidentally treating characters explicitily defined as being
non-CJK in Unicode as CJK.

While we're at it, copy Lua 5.3's utf8.charpattern constant definition
so that we can more easily write utf8 iterators with string.gmatch (at
least in the interim until there is a rework of utf8 handling in
KOReader and everything is rebuilt on top of utf8proc).

Some unit tests are added for Korean and Japanese text, and the existing
unit tests needed a minor adjustment to handle the fact that
isSplittable now correctly detects CJK punctuation as a character to
compare against the forbidden split rules.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>

											
										
										
											2021-10-23 10:12:38 +00:00
+								            -- split all non-ASCII characters separately (FIXME ideally we
 								            -- would split only the CJK characters, but you cannot define CJK
 								            -- characters trivially with a byte-only Lua pattern).
 								            for char in util.gsplit(word, "[\192-\255][\128-\191]+", true) do
-												split accient greek words with spacing character
This should fix #1705.

											
										
										
											2016-06-28 15:50:21 +00:00
+								                table.insert(wlist, char)
 								            end
 								        else
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								            table.insert(wlist, word)
 								        end
 								    end
 								    return wlist
 								end
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								-- We don't want to split on a space if it is followed by some
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								-- specific punctuation marks : e.g. "word :" or "word )"
 								-- (In French, there is a non-breaking space before a colon, and it better
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								-- not be wrapped there.)
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local non_splittable_space_tailers = ":;,.!?)]}$%=-+*/|<>»”"
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								-- Same if a space has some specific other punctuation mark before it
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local non_splittable_space_leaders = "([{$=-+*/|<>«“"
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
 								-- Similar rules exist for CJK text. Taken from :
 								-- https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local cjk_non_splittable_tailers = table.concat( {
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								    -- Simplified Chinese
 								    "!%),.:;?]}¢°·’\"†‡›℃∶、。〃〆〕〗〞﹚﹜！＂％＇），．：；？！］｝～",
 								    -- Traditional Chinese
 								    "!),.:;?]}¢·–—’\"•、。〆〞〕〉》」︰︱︲︳﹐﹑﹒﹓﹔﹕﹖﹘﹚﹜！），．：；？︶︸︺︼︾﹀﹂﹗］｜｝､",
 								    -- Japanese
 								    ")]｝〕〉》」』】〙〗〟’\"｠»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。.",
 								    -- Korean
 								    "!%),.:;?]}¢°’\"†‡℃〆〈《「『〕！％），．：；？］｝",
 								})
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local cjk_non_splittable_leaders = table.concat( {
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								    -- Simplified Chinese
 								    "$(£¥·‘\"〈《「『【〔〖〝﹙﹛＄（．［｛￡￥",
 								    -- Traditional Chinese
 								    "([{£¥‘\"‵〈《「『〔〝︴﹙﹛（｛︵︷︹︻︽︿﹁﹃﹏",
 								    -- Japanese
 								    "([｛〔〈《「『【〘〖〝‘\"｟«",
 								    -- Korean
 								    "$([{£¥‘\"々〇〉》」〔＄（［｛｠￥￦#",
 								})
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								local cjk_non_splittable = table.concat( {
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								    -- Japanese
 								    "—…‥〳〴〵",
 								})
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Test whether a string can be separated by this char for multi-line rendering.
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								-- Optional next or prev chars may be provided to help make the decision
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								---- @string c
 								---- @string next_c
 								---- @string prev_c
 								---- @treturn boolean true if splittable, false if not
 								function util.isSplittable(c, next_c, prev_c)
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								    if util.isCJKChar(c) then
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        -- a CJKChar is a word in itself, and so is splittable
 								        if cjk_non_splittable:find(c, 1, true) then
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								            -- except a few of them
 								            return false
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        elseif next_c and cjk_non_splittable_tailers:find(next_c, 1, true) then
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								            -- but followed by a char that is not permitted at start of line
 								            return false
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        elseif prev_c and cjk_non_splittable_leaders:find(prev_c, 1, true) then
-												textboxwidget: enhanced CJK text wrapping

(changes only in util.isSplitable(), but used by textboxwidget)
Implemented line breaking rules from :
https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages

											
										
										
											2016-12-15 07:58:58 +00:00
+								            -- but preceded by a char that is not permitted at end of line
 								            return false
 								        else
 								            -- we can split on this CJKchar
 								            return true
 								        end
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								    elseif c == " " then
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								        -- we only split on a space (so a punctuation mark sticks to prev word)
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								        -- if next_c or prev_c is provided, we can make a better decision
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        if next_c and non_splittable_space_tailers:find(next_c, 1, true) then
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								            -- this space is followed by some punctuation mark that is better kept with us
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								            return false
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								        elseif prev_c and non_splittable_space_leaders:find(prev_c, 1, true) then
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								            -- this space is lead by some punctuation mark that is better kept with us
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								            return false
 								        else
 								            -- we can split on this space
 								            return true
 								        end
 								    end
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								    -- otherwise, not splittable
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								    return false
-												add cursor functionality

											
										
										
											2016-04-21 14:13:10 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Gets filesystem type of a path.
 								--
 								-- Checks if the path occurs in <code>/proc/mounts</code>
 								---- @string path an absolute path
 								---- @treturn string filesystem type
-												Sanitize filename for vfat, fix #2433 (#2464)


											
										
										
											2017-01-10 00:05:15 +00:00
+								function util.getFilesystemType(path)
 								    local mounts = io.open("/proc/mounts", "r")
 								    if not mounts then return nil end
 								    local type
-												Cleanup eye-gouging madness around io.read calls (#7149)

* Don't reinvent the wheel when reading a one-line int or string from sysfs

* Simplify a whole other bunch of read calls
											
										
										
											2021-01-16 03:41:46 +00:00
+								    for line in mounts:lines() do
-												Sanitize filename for vfat, fix #2433 (#2464)


											
										
										
											2017-01-10 00:05:15 +00:00
+								        local mount = {}
 								        for param in line:gmatch("%S+") do table.insert(mount, param) end
 								        if string.match(path, mount[2]) then
 								            type = mount[3]
 								            if mount[2] ~= '/' then
 								                break
 								            end
 								        end
 								    end
 								    mounts:close()
 								    return type
-												fontlist: Cache categorized font info (for mupdf) (#6786)

Info about each face (l10n, name, family, style etc) is
now cached offline, so fonts can be queried ahead of time.

Fixes https://github.com/koreader/koreader/issues/6763
											
										
										
											2020-10-20 04:30:41 +00:00
+								end
-												ImageViewer: Clamp zoom factor to sane values (#9529)

Should avoid egregious values that would potentially alloc insanely large buffers (and likely fail to do so).

In the process, tweak the scale_factor computations when zooming so as to produce slightly less annoying behavior.
											
										
										
											2022-09-19 21:25:18 +00:00
+								-- For documentation purposes, here's a battle-tested shell version of calcFreeMem,
 								-- our simplified Lua version follows...
 								--[[
 								    if grep -q 'MemAvailable' /proc/meminfo ; then
 								        # We'll settle for 85% of available memory to leave a bit of breathing room
 								        tmpfs_size="$(awk '/MemAvailable/ {printf "%d", $2 * 0.85}' /proc/meminfo)"
 								    elif grep -q 'Inactive(file)' /proc/meminfo ; then
 								        # Basically try to emulate the kernel's computation, c.f., https://unix.stackexchange.com/q/261247
 								        # Again, 85% of available memory
 								        tmpfs_size="$(awk -v low=$(grep low /proc/zoneinfo | awk '{k+=$2}END{printf "%d", k}') \
 								            '{a[$1]=$2}
 								            END{
 								                printf "%d", (a["MemFree:"]+a["Active(file):"]+a["Inactive(file):"]+a["SReclaimable:"]-(12*low))*0.85;
 								            }' /proc/meminfo)"
 								    else
 								        # Ye olde crap workaround of Free + Buffers + Cache...
 								        # Take it with a grain of salt, and settle for 80% of that...
 								        tmpfs_size="$(awk \
 								            '{a[$1]=$2}
 								            END{
 								                printf "%d", (a["MemFree:"]+a["Buffers:"]+a["Cached:"])*0.80;
 								            }' /proc/meminfo)"
 								    fi
 								--]]
 								--- Computes the currently available memory
 								---- @treturn tuple of ints: memavailable, memtotal (or nil, nil on unsupported platforms).
 								function util:calcFreeMem()
 								    local memtotal, memfree, memavailable, buffers, cached
 								    local meminfo = io.open("/proc/meminfo", "r")
 								    if meminfo then
 								        for line in meminfo:lines() do
 								            if not memtotal then
 								                memtotal = line:match("^MemTotal:%s-(%d+) kB")
 								                if memtotal then
 								                    -- Next!
 								                    goto continue
 								                end
 								            end
 								            if not memfree then
 								                memfree = line:match("^MemFree:%s-(%d+) kB")
 								                if memfree then
 								                    -- Next!
 								                    goto continue
 								                end
 								            end
 								            if not memavailable then
 								                memavailable = line:match("^MemAvailable:%s-(%d+) kB")
 								                if memavailable then
 								                    -- Best case scenario, we're done :)
 								                    break
 								                end
 								            end
 								            if not buffers then
 								                buffers = line:match("^Buffers:%s-(%d+) kB")
 								                if buffers then
 								                    -- Next!
 								                    goto continue
 								                end
 								            end
 								            if not cached then
 								                cached = line:match("^Cached:%s-(%d+) kB")
 								                if cached then
 								                    -- Ought to be the last entry we care about, we're done
 								                    break
 								                end
 								            end
 								            ::continue::
 								        end
 								        meminfo:close()
 								    else
 								        -- Not on Linux?
 								        return nil, nil
 								    end
 								    if memavailable then
 								        -- Leave a bit of margin, and report 85% of that...
 								        return math.floor(memavailable * 0.85) * 1024, memtotal * 1024
 								    else
 								        -- Crappy Free + Buffers + Cache version, because the zoneinfo approach is a tad hairy...
 								        -- So, leave an even larger margin, and only report 75% of that...
 								        return math.floor((memfree + buffers + cached) * 0.75) * 1024, memtotal * 1024
 								    end
 								end
-												fontlist: Cache categorized font info (for mupdf) (#6786)

Info about each face (l10n, name, family, style etc) is
now cached offline, so fonts can be queried ahead of time.

Fixes https://github.com/koreader/koreader/issues/6763
											
										
										
											2020-10-20 04:30:41 +00:00
+								--- Recursively scan directory for files inside
 								-- @string path
-												LuaSettings: Add a method to initialize a setting properly (#7371)

* LuaSettings/DocSettings: Updated readSetting API to allow proper initialization to default.
Use it to initialize tables, e.g., fixing corner-cases in readerFooter that could prevent settings from being saved.
(Fixes an issue reported on Gitter).
* LuaSettings/DocSettings: Add simpler API than the the flip* ones to toggle boolean settings.
* Update LuaSettings/DocSettigns usage throughout the codebase to use the dedicated boolean methods wher appropriate, and clean up some of the more mind-bending uses.
* FileChooser: Implement an extended default exclusion list (fix #2360)
* ScreenSaver: Refactor to avoid the pile of kludges this was threatening to become. Code should be easier to follow and use, and fallbacks now behave as expected (fix #4418).
											
										
										
											2021-03-06 21:44:18 +00:00
+								-- @func callback(fullpath, name, attr)
-												fontlist: Cache categorized font info (for mupdf) (#6786)

Info about each face (l10n, name, family, style etc) is
now cached offline, so fonts can be queried ahead of time.

Fixes https://github.com/koreader/koreader/issues/6763
											
										
										
											2020-10-20 04:30:41 +00:00
+								function util.findFiles(dir, cb)
 								    local function scan(current)
 								        local ok, iter, dir_obj = pcall(lfs.dir, current)
 								        if not ok then return end
 								        for f in iter, dir_obj do
 								            local path = current.."/"..f
-												util.findFiles: Don't die on broken symlinks (#7829)


											
										
										
											2021-06-11 11:25:53 +00:00
+								            -- lfs can return nil here, as it will follow symlinks!
 								            local attr = lfs.attributes(path) or {}
-												fontlist: Cache categorized font info (for mupdf) (#6786)

Info about each face (l10n, name, family, style etc) is
now cached offline, so fonts can be queried ahead of time.

Fixes https://github.com/koreader/koreader/issues/6763
											
										
										
											2020-10-20 04:30:41 +00:00
+								            if attr.mode == "directory" then
 								                if f ~= "." and f ~= ".." then
 								                    scan(path)
 								                end
-												fontlist: Skip fonts with missing family_name (#6820)

* Also don't trip on pipes
											
										
										
											2020-10-24 11:23:05 +00:00
+								            elseif attr.mode == "file" or attr.mode == "link" then
-												fontlist: Cache categorized font info (for mupdf) (#6786)

Info about each face (l10n, name, family, style etc) is
now cached offline, so fonts can be queried ahead of time.

Fixes https://github.com/koreader/koreader/issues/6763
											
										
										
											2020-10-20 04:30:41 +00:00
+								                cb(path, f, attr)
 								            end
 								        end
 								    end
 								    scan(dir)
-												Sanitize filename for vfat, fix #2433 (#2464)


											
										
										
											2017-01-10 00:05:15 +00:00
+								end
-												ReaderDictionary: add info message about installing dictionaries

Fixes #2816.

											
										
										
											2017-04-26 06:12:25 +00:00
+								--- Checks if directory is empty.
 								---- @string path
 								---- @treturn bool
 								function util.isEmptyDir(path)
-												[fix] util.isEmptyDir() crashes on non-existent dir

lfs.dir will crash rather than return nil if directory doesn't exist

Proper fix for 9f5e44670183d046054f87b4820bf5d1b35b3b12 which is nothing but a workaround. However, I do think creating more of those data dirs automatically is more user-friendly because otherwise Android users will have to look it up or guess.

											
										
										
											2017-08-12 13:01:59 +00:00
+								    -- lfs.dir will crash rather than return nil if directory doesn't exist O_o
 								    local ok, iter, dir_obj = pcall(lfs.dir, path)
 								    if not ok then return end
 								    for filename in iter, dir_obj do
-												ReaderDictionary: add info message about installing dictionaries

Fixes #2816.

											
										
										
											2017-04-26 06:12:25 +00:00
+								        if filename ~= '.' and filename ~= '..' then
 								            return false
 								        end
 								    end
 								    return true
 								end
-												unified calibre plugin (#6177)

joins calibre metadata search and calibre wireless connections into a single plugin

search metadata changes:

- search directly into calibre metadata files.
- search can be performed on more than one library (configurable from a menu)
- device scans now find all calibre libraries under a given root
- search options can be configured from a menu. (case sensitive, find by title, author and path)
- removed legacy global variables.
- *option* to search from the reader
- *option* to generate a cache of books for faster searches.

calibre wireless connection changes:

- keep track of books in a library (includes prunning books from calibre metadata if the file was deleted locally)
- remove files on device from calibre
- support password protected connections
- FM integration: if we're in the inbox dir it will be updated each time a book is added or deleted.
- disconnect when requested by calibre, available on newer calibre versions (+4.17)
- remove unused opcodes.
- better report of client name, version and device id
- free disk space checks for all calibre versions
- bump supported extensions to match what KOReader can handle. Users can override this with their own list of extensions (or from calibre, by configuring the wireless device).
											
										
										
											2020-06-19 10:22:38 +00:00
+								--- check if the given path is a file
 								---- @string path
 								---- @treturn bool
 								function util.fileExists(path)
 								    local file = io.open(path, "r")
 								    if file ~= nil then
 								         file:close()
 								         return true
 								    end
 								end
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								--- Checks if the given path exists. Doesn't care if it's a file or directory.
 								---- @string path
 								---- @treturn bool
 								function util.pathExists(path)
 								    return lfs.attributes(path, "mode") ~= nil
 								end
-												[plugin] Add folder check in move-to-archive (#11262)


											
										
										
											2023-12-27 19:45:16 +00:00
+								--- Checks if the given directory exists.
 								function util.directoryExists(path)
 								  return lfs.attributes(path, "mode") == "directory"
 								end
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								--- As `mkdir -p`.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								-- Unlike [lfs.mkdir](https://keplerproject.github.io/luafilesystem/manual.html#mkdir)(),
 								-- does not error if the directory already exists, and creates intermediate directories as needed.
 								-- @string path the directory to create
 								-- @treturn bool true on success; nil, err_message on error
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								function util.makePath(path)
-												util: Rewrite makePath (#10111)

The previous iteration, besides failing to handle leaf-only input,
was relying on splitFilePathName, which just doesn't do what's required
to incrementally build the directory tree the right way around ;).

This should more closely match mkdir -p, i.e., it will *fail* if any
part (or all of it) of the path exists but is *not* a directory.

Re #10074
											
										
										
											2023-02-12 22:48:33 +00:00
+								    if lfs.attributes(path, "mode") == "directory" then
 								        return true
 								    end
 								    local components
 								    if path:sub(1, 1) == "/" then
 								        -- Leading slash, remember that it's an absolute path
 								        components = "/"
 								    else
 								        -- Relative path
 								        components = ""
 								    end
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
-												util: Rewrite makePath (#10111)

The previous iteration, besides failing to handle leaf-only input,
was relying on splitFilePathName, which just doesn't do what's required
to incrementally build the directory tree the right way around ;).

This should more closely match mkdir -p, i.e., it will *fail* if any
part (or all of it) of the path exists but is *not* a directory.

Re #10074
											
										
										
											2023-02-12 22:48:33 +00:00
+								    local success, err
 								    -- NOTE: mkdir -p handles umask shenanigans for intermediate components, we don't
 								    for component in path:gmatch("([^/]+)") do
 								        -- The trailing slash ensures we properly fail via mkdir if the composite path already exists as a file/link
 								        components = components .. component .. "/"
-												util: Add removePath (#10129)

Will attempt to prune empty directories from the given path, going as far back up as possible.
											
										
										
											2023-02-17 22:24:42 +00:00
+								        if lfs.attributes(components, "mode") == nil then
-												util: Rewrite makePath (#10111)

The previous iteration, besides failing to handle leaf-only input,
was relying on splitFilePathName, which just doesn't do what's required
to incrementally build the directory tree the right way around ;).

This should more closely match mkdir -p, i.e., it will *fail* if any
part (or all of it) of the path exists but is *not* a directory.

Re #10074
											
										
										
											2023-02-12 22:48:33 +00:00
+								            success, err = lfs.mkdir(components)
 								            if not success then
 								                return nil, err .. " (creating `" .. components .. "` for `" .. path .. "`)"
 								            end
 								        end
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								    end
-												util: Rewrite makePath (#10111)

The previous iteration, besides failing to handle leaf-only input,
was relying on splitFilePathName, which just doesn't do what's required
to incrementally build the directory tree the right way around ;).

This should more closely match mkdir -p, i.e., it will *fail* if any
part (or all of it) of the path exists but is *not* a directory.

Re #10074
											
										
										
											2023-02-12 22:48:33 +00:00
+								    return success, err
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								end
-												util: Add removePath (#10129)

Will attempt to prune empty directories from the given path, going as far back up as possible.
											
										
										
											2023-02-17 22:24:42 +00:00
+								--- Remove as many of the empty directories specified in path, children-first.
 								-- Does not fail if the directory is already gone.
 								-- @string path the directory tree to prune
 								-- @treturn bool true on success; nil, err_message on error
 								function util.removePath(path)
 								    local component = path
 								    repeat
 								        local attr = lfs.attributes(component, "mode")
 								        if attr == "directory" then
 								            local success, err = lfs.rmdir(component)
 								            if not success then
 								                -- Most likely because ENOTEMPTY ;)
 								                return nil, err .. " (removing `" .. component .. "` for `" .. path .. "`)"
 								            end
 								        elseif attr ~= nil then
 								            return nil, "Encountered a component that isn't a directory" .. " (removing `" .. component .. "` for `" .. path .. "`)"
 								        end
 								        local parent = BaseUtil.dirname(component)
 								        component = parent
 								    until parent == "." or parent == "/"
 								    return true, nil
 								end
-												unified calibre plugin (#6177)

joins calibre metadata search and calibre wireless connections into a single plugin

search metadata changes:

- search directly into calibre metadata files.
- search can be performed on more than one library (configurable from a menu)
- device scans now find all calibre libraries under a given root
- search options can be configured from a menu. (case sensitive, find by title, author and path)
- removed legacy global variables.
- *option* to search from the reader
- *option* to generate a cache of books for faster searches.

calibre wireless connection changes:

- keep track of books in a library (includes prunning books from calibre metadata if the file was deleted locally)
- remove files on device from calibre
- support password protected connections
- FM integration: if we're in the inbox dir it will be updated each time a book is added or deleted.
- disconnect when requested by calibre, available on newer calibre versions (+4.17)
- remove unused opcodes.
- better report of client name, version and device id
- free disk space checks for all calibre versions
- bump supported extensions to match what KOReader can handle. Users can override this with their own list of extensions (or from calibre, by configuring the wireless device).
											
										
										
											2020-06-19 10:22:38 +00:00
+								--- As `rm`
 								-- @string path of the file to remove
 								-- @treturn bool true on success; nil, err_message on error
 								function util.removeFile(file)
 								    if file and lfs.attributes(file, "mode") == "file" then
 								        return os.remove(file)
 								    elseif file then
 								        return nil, file .. " is not a file"
 								    else
 								        return nil, "file is nil"
 								    end
 								end
 								-- Gets total, used and available bytes for the mountpoint that holds a given directory.
 								-- @string path of the directory
 								-- @treturn table with total, used and available bytes
 								function util.diskUsage(dir)
 								    -- safe way of testing df & awk
 								    local function doCommand(d)
-												rM: Work around LD_PRELOAD weirdness on forks on rM 2 (#10490)


											
										
										
											2023-05-26 18:51:15 +00:00
+								        local handle = io.popen("df -k " .. d .. " 2>/dev/null | awk '$3 ~ /[0-9]+/ { print $2,$3,$4 }' 2>/dev/null || echo ::ERROR::")
-												unified calibre plugin (#6177)

joins calibre metadata search and calibre wireless connections into a single plugin

search metadata changes:

- search directly into calibre metadata files.
- search can be performed on more than one library (configurable from a menu)
- device scans now find all calibre libraries under a given root
- search options can be configured from a menu. (case sensitive, find by title, author and path)
- removed legacy global variables.
- *option* to search from the reader
- *option* to generate a cache of books for faster searches.

calibre wireless connection changes:

- keep track of books in a library (includes prunning books from calibre metadata if the file was deleted locally)
- remove files on device from calibre
- support password protected connections
- FM integration: if we're in the inbox dir it will be updated each time a book is added or deleted.
- disconnect when requested by calibre, available on newer calibre versions (+4.17)
- remove unused opcodes.
- better report of client name, version and device id
- free disk space checks for all calibre versions
- bump supported extensions to match what KOReader can handle. Users can override this with their own list of extensions (or from calibre, by configuring the wireless device).
											
										
										
											2020-06-19 10:22:38 +00:00
+								        if not handle then return end
 								        local output = handle:read("*all")
 								        handle:close()
 								        if not output:find "::ERROR::" then
 								            return output
 								        end
 								    end
 								    local err = { total = nil, used = nil, available = nil }
 								    if not dir or lfs.attributes(dir, "mode") ~= "directory" then return err end
 								    local usage = doCommand(dir)
 								    if not usage then return err end
 								    local stage, result = {}, {}
 								    for size in usage:gmatch("%w+") do
 								        table.insert(stage, size)
 								    end
 								    for k, v in pairs({"total", "used", "available"}) do
 								        if stage[k] ~= nil then
 								            -- sizes are in kb, return bytes here
 								            result[v] = stage[k] * 1024
 								        end
 								    end
 								    return result
 								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Replaces characters that are invalid filenames.
 								--
 								-- Replaces the characters <code>\/:*?"<>|</code> with an <code>_</code>.
 								-- These characters are problematic on Windows filesystems. On Linux only
 								-- <code>/</code> poses a problem.
 								---- @string str filename
 								---- @treturn string sanitized filename
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								local function replaceAllInvalidChars(str)
-												Fix crash when downloading books with unknown characters in title (#2628)


											
										
										
											2017-03-15 07:59:42 +00:00
+								    if str then
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
+								        return str:gsub('[\\,%/,:,%*,%?,%",%<,%>,%|]','_')
-												Fix crash when downloading books with unknown characters in title (#2628)


											
										
										
											2017-03-15 07:59:42 +00:00
+								    end
-												Sanitize filename for vfat, fix #2433 (#2464)


											
										
										
											2017-01-10 00:05:15 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Replaces slash with an underscore.
 								---- @string str
 								---- @treturn string
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								local function replaceSlashChar(str)
-												Fix crash when downloading books with unknown characters in title (#2628)


											
										
										
											2017-03-15 07:59:42 +00:00
+								    if str then
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
+								        return str:gsub('%/','_')
-												Fix crash when downloading books with unknown characters in title (#2628)


											
										
										
											2017-03-15 07:59:42 +00:00
+								    end
-												Sanitize filename for vfat, fix #2433 (#2464)


											
										
										
											2017-01-10 00:05:15 +00:00
+								end
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--[[--
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								Replaces characters that are invalid in filenames.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
 								Replaces the characters `\/:*?"<>|` with an `_` unless an optional path is provided. These characters are problematic on Windows filesystems. On Linux only the `/` poses a problem.
 								If an optional path is provided, @{util.getFilesystemType}() will be used to determine whether stricter VFAT restrictions should be applied.
 								]]
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								---- @string str
 								---- @string path
 								---- @int limit
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								---- @treturn string safe filename
-												[fix] util.getSafeFilename() maximum extension length (#5067)

Strip HTML and do some semi-intelligent detection of faux extensions (i.e., more than 10 characters probably isn't one).

Fixes #5049.
											
										
										
											2019-06-10 15:06:13 +00:00
+								function util.getSafeFilename(str, path, limit, limit_ext)
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								    local filename, suffix = util.splitFileNameSuffix(str)
-												[fix] util.getSafeFilename() default to worst-case scenario (#5036)

Also pass path from Wallabag plugin.

Fixes #5025.
											
										
										
											2019-05-22 09:34:46 +00:00
+								    local replaceFunc = replaceAllInvalidChars
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								    local safe_filename
 								    -- VFAT supports a maximum of 255 UCS-2 characters, although it's probably treated as UTF-16 by Windows
 								    -- default to a slightly lower limit just in case
 								    limit = limit or 240
-												[fix] util.getSafeFilename() maximum extension length (#5067)

Strip HTML and do some semi-intelligent detection of faux extensions (i.e., more than 10 characters probably isn't one).

Fixes #5049.
											
										
										
											2019-06-10 15:06:13 +00:00
+								    limit_ext = limit_ext or 10
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
-												Util: Always sanitize filenames on Android

Fix #7837

											
										
										
											2021-06-15 16:06:19 +00:00
+								    -- Always assume the worst on Android (#7837)
 								    if path and not BaseUtil.isAndroid() then
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								        local file_system = util.getFilesystemType(path)
-												[fix] util.getSafeFilename() default to worst-case scenario (#5036)

Also pass path from Wallabag plugin.

Fixes #5025.
											
										
										
											2019-05-22 09:34:46 +00:00
+								        if file_system ~= "vfat" and file_system ~= "fuse.fsp" then
 								            replaceFunc = replaceSlashChar
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								        end
 								    end
-												[fix] util.getSafeFilename() maximum extension length (#5067)

Strip HTML and do some semi-intelligent detection of faux extensions (i.e., more than 10 characters probably isn't one).

Fixes #5049.
											
										
										
											2019-06-10 15:06:13 +00:00
+								    if suffix:len() > limit_ext then
 								        -- probably not an actual file extension, or at least not one we'd be
 								        -- dealing with, so strip the whole string
 								        filename = str
 								        suffix = nil
 								    end
 								    filename = util.htmlToPlainTextIfHtml(filename)
-												[fix, chore] Abstract filename logic in util.getSafeFilename() (#5026)

Fixes https://github.com/koreader/koreader/issues/5025

The OPDS browser was doing some fancier stuff in a way that should be abstracted away in util (because it applies anywhere files will be saved):

https://github.com/koreader/koreader/blob/eace8d25c1cbf9bd13e98220098494e8fb63c18f/frontend/ui/widget/opdsbrowser.lua#L482-L491
											
										
										
											2019-05-14 17:10:41 +00:00
+								    filename = filename:sub(1, limit)
 								    -- the limit might result in broken UTF-8, which we don't want in the result
 								    filename = util.fixUtf8(filename, "")
 								    if suffix and suffix ~= "" then
 								        safe_filename = replaceFunc(filename) .. "." .. replaceFunc(suffix)
 								    else
 								        safe_filename = replaceFunc(filename)
 								    end
 								    return safe_filename
 								end
-												Basic fixes to calibre-sync (#3558)

* Properly create intermediate directories when receiving books from Calibre.

This fixes an issue where you can't receive books except into directories that
already exist on the Kobo, which, in particular, causes problems when your
configuration in Calibre is something like "put books in $Author/$Title.epub"
and you haven't previously synced any books by that author.

* Wake up periodically to process ZMQs if any are registered.

This fixes an issue where if there are any timed events (such as the suspend
timer) in the queue, ZMQ events may not get processed until the timed event
fires, which is a problem when (for example) the suspend timer goes off in
an hour and you have something trying to send a book to the kobo over wifi
*right now*.

With this change, the event loop will wake up every 50ms to check for ZMQ
events and process them if necessary. If there are no ZMQs registered (which
is typical), it uses the original behaviour -- so this won't affect battery
life under normal usage.

											
										
										
											2018-01-01 14:40:28 +00:00
+								--- Splits a file into its directory path and file name.
 								--- If the given path has a trailing /, returns the entire path as the directory
 								--- path and "" as the file name.
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								---- @string file
-												util: Simplify splitFile* util functions
* Use a single match call instead of two gsubs

											
										
										
											2020-12-10 19:51:21 +00:00
+								---- @treturn string directory, filename
-												evernote: ReadHistory integration and text file output (#2498)



											
										
										
											2017-01-21 09:32:42 +00:00
+								function util.splitFilePathName(file)
 								    if file == nil or file == "" then return "", "" end
 								    if string.find(file, "/") == nil then return "", file end
-												BookInfoManager: Convert thumbnail compression to ZSTD
* Avoid the Lua string copies when compressing/uncompressing the bb
  thumbs
* Delay the statement reset so that the pointer returned by the BLOB
  query is actually usable ;).
* Re-use a ZSTD decompression context
* Store date & mtime for successfully extracted documents
  c.f., https://github.com/NiLuJe/koreader/commit/10477501f90471e84df68f561274f19a9a202dfb#r44914917
* Switch to user_version pragma to handle DB schema versioning
* Update file size/date on *each* extraction attempt.
  @poirez-z raised a good point, this ought to be useful to deem a file
  re-parseable after an update.
* Remove xutil
  Functionality has been moved to ffi/zlib & ffi/zstd
* Preserve settings when migrating the BookInfo DB
* Warn via InfoMessage that the DB was updated
* Only store the series name in series, and move the index in series_index
  (Column names chosen to match Calibre's).
* Handle the new series_index column in BookInfo consumers
* This allows us to get rid of the code that stripped empty decimals
  from series index in *most* places, since it's now a real Lua
  number, and the string formatting library does that magic on its
  own.

											
										
										
											2020-12-06 23:09:47 +00:00
+								    return file:match("(.*/)(.*)")
-												evernote: ReadHistory integration and text file output (#2498)



											
										
										
											2017-01-21 09:32:42 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Splits a file name into its pure file name and suffix
 								---- @string file
 								---- @treturn string path, extension
-												evernote: ReadHistory integration and text file output (#2498)



											
										
										
											2017-01-21 09:32:42 +00:00
+								function util.splitFileNameSuffix(file)
 								    if file == nil or file == "" then return "", "" end
 								    if string.find(file, "%.") == nil then return file, "" end
-												BookInfoManager: Convert thumbnail compression to ZSTD
* Avoid the Lua string copies when compressing/uncompressing the bb
  thumbs
* Delay the statement reset so that the pointer returned by the BLOB
  query is actually usable ;).
* Re-use a ZSTD decompression context
* Store date & mtime for successfully extracted documents
  c.f., https://github.com/NiLuJe/koreader/commit/10477501f90471e84df68f561274f19a9a202dfb#r44914917
* Switch to user_version pragma to handle DB schema versioning
* Update file size/date on *each* extraction attempt.
  @poirez-z raised a good point, this ought to be useful to deem a file
  re-parseable after an update.
* Remove xutil
  Functionality has been moved to ffi/zlib & ffi/zstd
* Preserve settings when migrating the BookInfo DB
* Warn via InfoMessage that the DB was updated
* Only store the series name in series, and move the index in series_index
  (Column names chosen to match Calibre's).
* Handle the new series_index column in BookInfo consumers
* This allows us to get rid of the code that stripped empty decimals
  from series index in *most* places, since it's now a real Lua
  number, and the string formatting library does that magic on its
  own.

											
										
										
											2020-12-06 23:09:47 +00:00
+								    return file:match("(.*)%.(.*)")
-												evernote: ReadHistory integration and text file output (#2498)



											
										
										
											2017-01-21 09:32:42 +00:00
+								end
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								--- Gets file extension
 								---- @string filename
 								---- @treturn string extension
-												More sorting orders in file manager (#2535)


											
										
										
											2017-02-12 02:55:31 +00:00
+								function util.getFileNameSuffix(file)
 								    local _, suffix = util.splitFileNameSuffix(file)
 								    return suffix
 								end
-												Allow running shell scripts from the FileManager/Favorites (#5804)

* Allow running Shell/Python scripts from the FM

* Show an InfoMessage before/after running the script

Since we're blocking the UI ;).

* Allow running scripts from the favorites menu, too.
											
										
										
											2020-02-03 19:08:18 +00:00
+								--- Companion helper function that returns the script's language,
-												util: Simplify splitFile* util functions
* Use a single match call instead of two gsubs

											
										
										
											2020-12-10 19:51:21 +00:00
+								--- based on the file extension.
-												Allow running shell scripts from the FileManager/Favorites (#5804)

* Allow running Shell/Python scripts from the FM

* Show an InfoMessage before/after running the script

Since we're blocking the UI ;).

* Allow running scripts from the favorites menu, too.
											
										
										
											2020-02-03 19:08:18 +00:00
+								---- @string filename
-												[fix] Remove isAllowedScript leftovers (#6366)

Fixes #6365
											
										
										
											2020-07-09 16:11:56 +00:00
+								---- @treturn string (lowercase) (or nil if not Device:canExecuteScript(file))
-												Allow running shell scripts from the FileManager/Favorites (#5804)

* Allow running Shell/Python scripts from the FM

* Show an InfoMessage before/after running the script

Since we're blocking the UI ;).

* Allow running scripts from the favorites menu, too.
											
										
										
											2020-02-03 19:08:18 +00:00
+								function util.getScriptType(file)
 								    local file_ext = string.lower(util.getFileNameSuffix(file))
 								    if file_ext == "sh" then
 								        return "shell"
 								    elseif file_ext == "py" then
 								        return "python"
 								    end
 								end
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								--- Gets human friendly size as string
 								---- @int size (bytes)
-												util.getFriendlySize(): add option to right align

Left align by default, but allow right alignment by
padding left with spaces.

											
										
										
											2019-12-17 12:00:35 +00:00
+								---- @bool right_align (by padding with spaces on the left)
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								---- @treturn string
-												util.getFriendlySize(): add option to right align

Left align by default, but allow right alignment by
padding left with spaces.

											
										
										
											2019-12-17 12:00:35 +00:00
+								function util.getFriendlySize(size, right_align)
 								    local frac_format = right_align and "%6.1f" or "%.1f"
 								    local deci_format = right_align and "%6d" or "%d"
-												[feat] Add dictionary download option (#3176)

You can now download pretty much all of the easily available freely licensed dictionaries I could find.
											
										
										
											2018-12-13 06:27:49 +00:00
+								    size = tonumber(size)
-												[spec] util_spec: tests for util.getFriendlySize() (#3650)


											
										
										
											2018-01-31 16:22:34 +00:00
+								    if not size or type(size) ~= "number" then return end
-												Address assorted Weblate comments (#7154)

* Fix typo in dropbox

Reported by @lescheck

* Rephrase text justification explanation more elegantly

* CoverBrowser: fix up some plurals

* Statistics: remove random use of template function

* Use ngettext for minute/minutes and second/seconds

* Change KB/MB/GB to kB/MB/GB SI units
											
										
										
											2021-01-16 20:40:00 +00:00
+								    if size > 1000*1000*1000 then
-												Autoturn: change to minutes and seconds (#9055)

fix #9051
and add some internal improvements (default values) to the datetimewidget.
											
										
										
											2022-05-31 22:04:46 +00:00
+								        return T(C_("Data storage size", "%1 GB"), string.format(frac_format, size/1000/1000/1000))
-												Translate file size unit (#5651)

Close #5649
											
										
										
											2019-11-28 22:22:07 +00:00
+								    end
-												Address assorted Weblate comments (#7154)

* Fix typo in dropbox

Reported by @lescheck

* Rephrase text justification explanation more elegantly

* CoverBrowser: fix up some plurals

* Statistics: remove random use of template function

* Use ngettext for minute/minutes and second/seconds

* Change KB/MB/GB to kB/MB/GB SI units
											
										
										
											2021-01-16 20:40:00 +00:00
+								    if size > 1000*1000 then
-												Autoturn: change to minutes and seconds (#9055)

fix #9051
and add some internal improvements (default values) to the datetimewidget.
											
										
										
											2022-05-31 22:04:46 +00:00
+								        return T(C_("Data storage size", "%1 MB"), string.format(frac_format, size/1000/1000))
-												Translate file size unit (#5651)

Close #5649
											
										
										
											2019-11-28 22:22:07 +00:00
+								    end
-												Address assorted Weblate comments (#7154)

* Fix typo in dropbox

Reported by @lescheck

* Rephrase text justification explanation more elegantly

* CoverBrowser: fix up some plurals

* Statistics: remove random use of template function

* Use ngettext for minute/minutes and second/seconds

* Change KB/MB/GB to kB/MB/GB SI units
											
										
										
											2021-01-16 20:40:00 +00:00
+								    if size > 1000 then
-												Autoturn: change to minutes and seconds (#9055)

fix #9051
and add some internal improvements (default values) to the datetimewidget.
											
										
										
											2022-05-31 22:04:46 +00:00
+								        return T(C_("Data storage size", "%1 kB"), string.format(frac_format, size/1000))
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								    else
-												Autoturn: change to minutes and seconds (#9055)

fix #9051
and add some internal improvements (default values) to the datetimewidget.
											
										
										
											2022-05-31 22:04:46 +00:00
+								        return T(C_("Data storage size", "%1 B"), string.format(deci_format, size))
-												Added util.getFriendlySize() (#3381)

* Added util.getFriendlySize()

* Allow for GB

											
										
										
											2017-10-20 15:48:32 +00:00
+								    end
 								end
-												Added util.getFormattedSize() (#3383)


											
										
										
											2017-10-20 17:29:52 +00:00
+								--- Gets formatted size as string (1273334 => "1,273,334")
 								---- @int size (bytes)
 								---- @treturn string
 								function util.getFormattedSize(size)
 								    local s = tostring(size)
 								    s = s:reverse():gsub("(%d%d%d)", "%1,")
 								    s = s:reverse():gsub("^,", "")
 								    return s
 								end
-												Add hash-based document metadata storage option (#10945)

This option saves metadata sidecar (sdr) directories not next to the book or in koreader/docsettings/, but in koreader/hashdocsettings/ using the partial md5 hash of each documents, allowing users to move, rename, and copy their documents outside of KOReader without accidentally losing their highlights/notes/progress. Included are various warnings and info to users of the benefits and drawbacks of this non-default option.

Closes #10892.
											
										
										
											2023-10-11 08:39:33 +00:00
+								--- Calculate partial digest of an open file. To the calculating mechanism itself,
 								-- since only PDF documents could be modified by KOReader by appending data
 								-- at the end of the files when highlighting, we use a non-even sampling
 								-- algorithm which samples with larger weight at file head and much smaller
 								-- weight at file tail, thus reduces the probability that appended data may change
 								-- the digest value.
 								-- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144
 								-- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data
 								-- by highlighting in KOReader may change the digest value.
-												md5: centralize and deduplicate (#11003)

Document partial md5 hash is calculated by util.partialMD5() and stored in doc_settings as "partial_md5_checksum" on the first document opening.
											
										
										
											2023-10-15 04:47:09 +00:00
+								function util.partialMD5(filepath)
 								    if not filepath then return end
 								    local file = io.open(filepath, "rb")
 								    if not file then return end
-												Add hash-based document metadata storage option (#10945)

This option saves metadata sidecar (sdr) directories not next to the book or in koreader/docsettings/, but in koreader/hashdocsettings/ using the partial md5 hash of each documents, allowing users to move, rename, and copy their documents outside of KOReader without accidentally losing their highlights/notes/progress. Included are various warnings and info to users of the benefits and drawbacks of this non-default option.

Closes #10892.
											
										
										
											2023-10-11 08:39:33 +00:00
+								    local step, size = 1024, 1024
 								    local update = md5()
 								    for i = -1, 10 do
-												md5: centralize and deduplicate (#11003)

Document partial md5 hash is calculated by util.partialMD5() and stored in doc_settings as "partial_md5_checksum" on the first document opening.
											
										
										
											2023-10-15 04:47:09 +00:00
+								        file:seek("set", lshift(step, 2*i))
-												Add hash-based document metadata storage option (#10945)

This option saves metadata sidecar (sdr) directories not next to the book or in koreader/docsettings/, but in koreader/hashdocsettings/ using the partial md5 hash of each documents, allowing users to move, rename, and copy their documents outside of KOReader without accidentally losing their highlights/notes/progress. Included are various warnings and info to users of the benefits and drawbacks of this non-default option.

Closes #10892.
											
										
										
											2023-10-11 08:39:33 +00:00
+								        local sample = file:read(size)
 								        if sample then
 								            update(sample)
 								        else
 								            break
 								        end
 								    end
-												md5: centralize and deduplicate (#11003)

Document partial md5 hash is calculated by util.partialMD5() and stored in doc_settings as "partial_md5_checksum" on the first document opening.
											
										
										
											2023-10-15 04:47:09 +00:00
+								    file:close()
-												Add hash-based document metadata storage option (#10945)

This option saves metadata sidecar (sdr) directories not next to the book or in koreader/docsettings/, but in koreader/hashdocsettings/ using the partial md5 hash of each documents, allowing users to move, rename, and copy their documents outside of KOReader without accidentally losing their highlights/notes/progress. Included are various warnings and info to users of the benefits and drawbacks of this non-default option.

Closes #10892.
											
										
										
											2023-10-11 08:39:33 +00:00
+								    return update()
 								end
-												writeToFile: centralize (#11012)


											
										
										
											2023-10-17 04:42:07 +00:00
+								function util.writeToFile(data, filepath, force_flush, lua_dofile_ready, directory_updated)
 								    if not filepath then return end
 								    if lua_dofile_ready then
 								        local t = { "-- ", filepath, "\nreturn ", data, "\n" }
 								        data = table.concat(t)
 								    end
 								    local file, err = io.open(filepath, "wb")
 								    if not file then
 								        return nil, err
 								    end
 								    file:write(data)
 								    if force_flush then
 								        BaseUtil.fsyncOpenedFile(file)
 								    end
 								    file:close()
 								    if directory_updated then
 								        BaseUtil.fsyncDirectory(filepath)
 								    end
 								    return true
 								end
-												Add hash-based document metadata storage option (#10945)

This option saves metadata sidecar (sdr) directories not next to the book or in koreader/docsettings/, but in koreader/hashdocsettings/ using the partial md5 hash of each documents, allowing users to move, rename, and copy their documents outside of KOReader without accidentally losing their highlights/notes/progress. Included are various warnings and info to users of the benefits and drawbacks of this non-default option.

Closes #10892.
											
										
										
											2023-10-11 08:39:33 +00:00
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--[[--
 								Replaces invalid UTF-8 characters with a replacement string.
 								Based on <http://notebook.kulchenko.com/programming/fixing-malformed-utf8-in-lua>.
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								c.f.,    FixUTF8 @ <https://github.com/pkulchenko/ZeroBraneStudio/blob/master/src/util.lua>.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								@string str the string to be checked for invalid characters
 								@string replacement the string to replace invalid characters with
 								@treturn string valid UTF-8
 								]]
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
+								function util.fixUtf8(str, replacement)
 								    local pos = 1
 								    local len = #str
 								    while pos <= len do
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								        if     str:find("^[%z\1-\127]", pos) then pos = pos + 1
 								        elseif str:find("^[\194-\223][\128-\191]", pos) then pos = pos + 2
 								        elseif str:find(       "^\224[\160-\191][\128-\191]", pos)
 								            or str:find("^[\225-\236][\128-\191][\128-\191]", pos)
 								            or str:find(       "^\237[\128-\159][\128-\191]", pos)
 								            or str:find("^[\238-\239][\128-\191][\128-\191]", pos) then pos = pos + 3
 								        elseif str:find(       "^\240[\144-\191][\128-\191][\128-\191]", pos)
 								            or str:find("^[\241-\243][\128-\191][\128-\191][\128-\191]", pos)
 								            or str:find(       "^\244[\128-\143][\128-\191][\128-\191]", pos) then pos = pos + 4
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
+								        else
 								            str = str:sub(1, pos - 1) .. replacement .. str:sub(pos + 1)
 								            pos = pos + #replacement
 								            len = len + #replacement - 1
 								        end
 								    end
 								    return str
 								end
-												Merge various information into systemstat (#2764)

* Merge various information to systemstat
											
										
										
											2017-04-14 19:12:28 +00:00
+								--- Splits input string with the splitter into a table. This function ignores the last empty entity.
 								--
 								--- @string str the string to be split
 								--- @string splitter
 								--- @bool capture_empty_entity
 								--- @treturn an array-like table
 								function util.splitToArray(str, splitter, capture_empty_entity)
 								    local result = {}
 								    for word in util.gsplit(str, splitter, false, capture_empty_entity) do
 								        table.insert(result, word)
 								    end
 								    return result
 								end
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								--- Convert a Unicode codepoint (number) to UTF-8 char
 								--- c.f., <https://stackoverflow.com/a/4609989>
 								---     & <https://stackoverflow.com/a/38492214>
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								--- See utf8charcode in ffi/util for a decoder.
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								--
 								--- @int c Unicode codepoint
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								--- @treturn string UTF-8 char
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								function util.unicodeCodepointToUtf8(c)
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								    if c < 0x80 then
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								        return string.char(c)
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								    elseif c < 0x800 then
 								        return string.char(
 								                bor(0xC0, rshift(c, 6)),
 								                bor(0x80, band(c, 0x3F))
 								        )
 								    elseif c < 0x10000 then
 								        if c >= 0xD800 and c <= 0xDFFF then
 								            return '<EFBFBD>' -- Surrogates -> U+FFFD REPLACEMENT CHARACTER
 								        end
 								        return string.char(
 								                bor(0xE0, rshift(c, 12)),
 								                bor(0x80, band(rshift(c, 6), 0x3F)),
 								                bor(0x80, band(c, 0x3F))
 								        )
 								    elseif c < 0x110000 then
 								        return string.char(
 								                bor(0xF0, rshift(c, 18)),
 								                bor(0x80, band(rshift(c, 12), 0x3F)),
 								                bor(0x80, band(rshift(c, 6), 0x3F)),
 								                bor(0x80, band(c, 0x3F))
 								        )
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    else
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								        return '<EFBFBD>' -- Invalid -> U+FFFD REPLACEMENT CHARACTER
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    end
 								end
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								-- we need to use an array of arrays to keep them ordered as written
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								local HTML_ENTITIES_TO_UTF8 = {
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								    {"&lt;", "<"},
 								    {"&gt;", ">"},
 								    {"&quot;", '"'},
-												util.htmlEntitiesToUtf8: add some more semi common entities (#10979)


											
										
										
											2023-10-12 12:46:37 +00:00
+								    {"&lsquo;", '‘'},
 								    {"&rsquo;", '’'},
 								    {"&ldquo;", '“'},
 								    {"&rdquo;", '”'},
 								    {"&mdash;", '—'},
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								    {"&apos;", "'"},
-												[chore] replace utf8 bytes with Unicode escape sequence

											
										
										
											2023-08-01 08:09:29 +00:00
+								    {"&nbsp;", "\u{00A0}"},
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								    {"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end},
-												Rewrite unicodeCodepointToUtf8 w/ bitopt (#5625)

* Rewrite unicodeCodepointToUtf8 w/ bitopt

Avoids costly divs & modulos

											
										
										
											2019-11-22 18:50:58 +00:00
+								    {"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x, 16)) end},
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								    {"&amp;", "&"}, -- must be last
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								}
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--[[--
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								Replace HTML entities with their UTF-8 encoded equivalent in text.
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
 								Supports only basic ones and those with numbers (no support for named entities like `&eacute;`).
 								@int string text with HTML entities
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								@treturn string UTF-8 text
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								]]
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								function util.htmlEntitiesToUtf8(text)
-												Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine

											
										
										
											2018-04-10 16:30:27 +00:00
+								    for _, t in ipairs(HTML_ENTITIES_TO_UTF8) do
 								        text = text:gsub(t[1], t[2])
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    end
 								    return text
 								end
-												[doc] Tag @todo, @fixme and @warning (#5244)

This commit standardizes the various todos around the code a bit in a manner recognized by LDoc.

Besides drawing more attention by being displayed in the developer docs, they're also extractable with LDoc on the command line:

```sh
ldoc --tags todo,fixme *.lua
```

However, whether that particular usage offers any advantage over other search tools is questionable at best.

* and some random beautification
											
										
										
											2019-08-23 17:53:53 +00:00
+								--[[--
 								Convert simple HTML to plain text.
 								This may fail on complex HTML (with styles, scripts, comments), but should be fine enough with simple HTML as found in EPUB's `<dc:description>`.
 								@string text HTML text
 								@treturn string plain text
 								]]
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								function util.htmlToPlainText(text)
-												htmlToPlainText(): tweak format paragraphs with indentation (#7095)

Rework dafaf966, with a tabulation instead of multiple
&nbsp; to ensure a constant indentation when text
is justified.
											
										
										
											2020-12-31 22:23:05 +00:00
+								    -- Replace <br> with \n
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") -- <br> and <br/>
-												htmlToPlainText(): tweak format paragraphs with indentation (#7095)

Rework dafaf966, with a tabulation instead of multiple
&nbsp; to ensure a constant indentation when text
is justified.
											
										
										
											2020-12-31 22:23:05 +00:00
+								    -- Replace <p> with \n\t (\t, unlike any combination of spaces,
 								    -- ensures a constant indentation when text is justified.)
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    text = text:gsub("%s*</%s*p%s*>%s*", "\n") -- </p>
 								    text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone <p/>
-												htmlToPlainText(): tweak format paragraphs with indentation (#7095)

Rework dafaf966, with a tabulation instead of multiple
&nbsp; to ensure a constant indentation when text
is justified.
											
										
										
											2020-12-31 22:23:05 +00:00
+								    text = text:gsub("%s*<%s*p%s*>%s*", "\n\t") -- <p>
 								        -- (this one last, so \t is not removed by the others' %s)
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    -- Remove all HTML tags
 								    text = text:gsub("<[^>]*>", "")
 								    -- Convert HTML entities
 								    text = util.htmlEntitiesToUtf8(text)
-												htmlToPlainText(): tweak format paragraphs with indentation (#7095)

Rework dafaf966, with a tabulation instead of multiple
&nbsp; to ensure a constant indentation when text
is justified.
											
										
										
											2020-12-31 22:23:05 +00:00
+								    -- Trim spaces and new lines at start and end, including
 								    -- the \t we added (this looks fine enough with multiple
 								    -- paragraphs, but feels nicer with a single paragraph,
 								    -- whether it contains <br>s or not).
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								    text = text:gsub("^[\n%s]*", "")
 								    text = text:gsub("[\n%s]*$", "")
 								    return text
 								end
 								--- Convert HTML to plain text if text seems to be HTML
 								-- Detection of HTML is simple and may raise false positives
 								-- or negatives, but seems quite good at guessing content type
 								-- of text found in EPUB's <dc:description>.
 								--
 								--- @string text the string with possibly some HTML
 								--- @treturn string cleaned text
 								function util.htmlToPlainTextIfHtml(text)
 								    local is_html = false
 								    -- Quick way to check if text is some HTML:
 								    -- look for html tags
 								    local _, nb_tags
 								    _, nb_tags = text:gsub("<%w+.->", "")
 								    if nb_tags > 0 then
 								        is_html = true
 								    else
 								        -- no <tag> found
-												Other minor frontend.util cleanups (#5629)

* Resync fixUtf8 w/ upstream
* Fix lastIndexOf desc
* Drop unichar usage, it's a crappier unicodeCodepointToUtf8 ;).

											
										
										
											2019-11-23 23:27:27 +00:00
+								        -- but we may meet some text badly/twice encoded html containing "&lt;br&gt;"
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
+								        local nb_encoded_tags
 								        _, nb_encoded_tags = text:gsub("&lt;%a+&gt;", "")
 								        if nb_encoded_tags > 0 then
 								            is_html = true
 								            -- decode one of the two encodes
 								            text = util.htmlEntitiesToUtf8(text)
 								        end
 								    end
 								    if is_html then
 								        text = util.htmlToPlainText(text)
 								    else
 								        -- if text ends with ]]>, it probably comes from <![CDATA[ .. ]]> that
 								        -- crengine has extracted correctly, but let the ending tag in, so
 								        -- let's remove it
 								        text = text:gsub("]]>%s*$", "")
 								    end
 								    return text
 								end
-												HTML dictionary support (#3573)

* Adds a generic HTML widget modeled after the text widget, and HTML dictionary support. HTML dictionaries can have their own CSS (for X.ifo it must be X.css). The base CSS just resets the margin and sets the font.

Note that the widget doesn't handle links, that wasn't needed for the dictionary.

Closes <https://github.com/koreader/koreader/issues/1776>.

* Show tag stripped HTML if the dictionary entry isn't valid HTML

* Simulate the normal <br/> behavior

* Bump base

											
										
										
											2018-01-07 19:24:15 +00:00
+								--- Encode the HTML entities in a string
-												HTML dictionary link support (#3603)


											
										
										
											2018-01-15 22:51:43 +00:00
+								--- @string text the string to escape
-												HTML dictionary support (#3573)

* Adds a generic HTML widget modeled after the text widget, and HTML dictionary support. HTML dictionaries can have their own CSS (for X.ifo it must be X.css). The base CSS just resets the margin and sets the font.

Note that the widget doesn't handle links, that wasn't needed for the dictionary.

Closes <https://github.com/koreader/koreader/issues/1776>.

* Show tag stripped HTML if the dictionary entry isn't valid HTML

* Simulate the normal <br/> behavior

* Bump base

											
										
										
											2018-01-07 19:24:15 +00:00
+								-- Taken from https://github.com/kernelsauce/turbo/blob/e4a35c2e3fb63f07464f8f8e17252bea3a029685/turbo/escape.lua#L58-L70
 								function util.htmlEscape(text)
 								    return text:gsub("[}{\">/<'&]", {
 								        ["&"] = "&amp;",
 								        ["<"] = "&lt;",
 								        [">"] = "&gt;",
 								        ['"'] = "&quot;",
 								        ["'"] = "&#39;",
 								        ["/"] = "&#47;",
 								    })
 								end
-												Style tweaks: adds "Book-specific tweak" menu item (#6244)

Allows editing a CSS snippet to be applied to this
book only, without the need to create and edit
a User style tweak.
Allows copying any other tweak CSS by just taping
on it (and pasting into this with Hold).
Limit User style tweaks nb of items per menu page
to 6 (like we try to do for other tweaks menus).
											
										
										
											2020-06-08 18:47:31 +00:00
+								--- Prettify a CSS stylesheet
 								-- Not perfect, but enough to make some ugly CSS readable.
 								-- By default, each selector and each property is put on its own line.
 								-- With condensed=true, condense each full declaration on a single line.
 								--
 								--- @string CSS string
 								--- @boolean condensed[opt=false] true to condense each declaration on a line
 								--- @treturn string the CSS prettified
 								function util.prettifyCSS(css_text, condensed)
 								    if not condensed then
-												util.prettifyCSS(): do better with more complex CSS

											
										
										
											2023-08-01 08:09:27 +00:00
+								        -- Get rid of \t
-												Style tweaks: adds "Book-specific tweak" menu item (#6244)

Allows editing a CSS snippet to be applied to this
book only, without the need to create and edit
a User style tweak.
Allows copying any other tweak CSS by just taping
on it (and pasting into this with Hold).
Limit User style tweaks nb of items per menu page
to 6 (like we try to do for other tweaks menus).
											
										
										
											2020-06-08 18:47:31 +00:00
+								        css_text = css_text:gsub("\t", " ")
-												util.prettifyCSS(): do better with more complex CSS

											
										
										
											2023-08-01 08:09:27 +00:00
+								        css_text = css_text:gsub("\r", "")
 								        -- Protect ',:;' in comments by replacing them with rare control chars
 								        css_text = css_text:gsub("/%*.-%*/", function(s)
 								            s = s:gsub(",", "\v")
 								            s = s:gsub(":", "\f")
 								            s = s:gsub(";", "\b")
-												Style tweaks: adds "Book-specific tweak" menu item (#6244)

Allows editing a CSS snippet to be applied to this
book only, without the need to create and edit
a User style tweak.
Allows copying any other tweak CSS by just taping
on it (and pasting into this with Hold).
Limit User style tweaks nb of items per menu page
to 6 (like we try to do for other tweaks menus).
											
										
										
											2020-06-08 18:47:31 +00:00
+								            return s
 								        end)
-												util.prettifyCSS(): handle better :is() and similar

											
										
										
											2024-03-10 11:21:53 +00:00
+								        -- Protect ',' inside () (ie. ":is(td, th)") by replacing them with rare control chars
 								        css_text = css_text:gsub("%b()/", function(s)
 								            s = s:gsub(",", "\v")
 								            return s
 								        end)
-												util.prettifyCSS(): do better with more complex CSS

											
										
										
											2023-08-01 08:09:27 +00:00
+								        -- Cleanup declarations (the most nested ones only, which may be
 								        -- contained in "@supports (...) {...}" or "@media (...) {...}")
 								        css_text = css_text:gsub(" *{([^{}]*)} *", function(s)
 								            -- Comments inside declaration may be mixed with properties, on a same line,
 								            -- before or after them, and we don't know if they apply to what's before or
 								            -- what's after, except when they are standalone and probably apply to the
 								            -- next line. So, when not standalone, double indent them (so it looks like
 								            -- they apply to what's above - but will still look fine if they are about
 								            -- what's after.
 								            s = "\n" .. s -- so the next one match on the first line
 								            s = s:gsub("\n */%*", "\a/*")          -- '/*' with only blank before: mark them with '\a'
 								            s = s:gsub(" *([^\a])/%*", "\n\t/*")   -- unmarked '/*' (content before): marked, more indentation later
 								            s = s:gsub("\a", "")                   -- remove mark
 								            s = s:gsub("\t", "\a")                 -- replace mark by one that is not caught by '%s'
 								            s = s:gsub("%*/%s*", "*/\n")           -- '*/' end of css comment: newline after
 								            s = s:gsub("%s*;%s*", ";\n")           -- newline after ';'
 								            s = s:gsub("\n+%s*", "\n    ")         -- remove blank lines, 4 spaces indent on all lines
 								            s = s:gsub("\a", "    ")               -- expand our \a marks to have these /* more indented
 								            s = s:gsub("%s*:%s*", ": ")            -- normalize spacing in "keyword: value"
 								            s = s:gsub("^%s*(.-)%s*$", "\n    %1") -- remove leading and trailing spaces, indent first line
 								            s = s:gsub("^%s*$", "")                -- but have empty declaration really empty
 								            -- less indent for these crengine specific tweaks to the followup properties
 								            s = s:gsub("\n    %-cr%-hint: late", "\n -cr-hint: late")
 								            s = s:gsub("\n    %-cr%-only%-if", "\n -cr-only-if")
 								            -- Protect and normalize ',' in declarations (ie. in font-family list, rgb()...)
 								            s = s:gsub("%s*,%s*", "\v ")
 								            return " {" .. s .. "\n}"
 								        end)
-												Style tweaks: adds "Book-specific tweak" menu item (#6244)

Allows editing a CSS snippet to be applied to this
book only, without the need to create and edit
a User style tweak.
Allows copying any other tweak CSS by just taping
on it (and pasting into this with Hold).
Limit User style tweaks nb of items per menu page
to 6 (like we try to do for other tweaks menus).
											
										
										
											2020-06-08 18:47:31 +00:00
+								        -- Have each selector (separated by ',') on a new line
 								        css_text = css_text:gsub("%s*,%s*", " ,\n")
-												util.prettifyCSS(): do better with more complex CSS

											
										
										
											2023-08-01 08:09:27 +00:00
+								        css_text = css_text:gsub("\n *([^\n]+),", "\n%1,") -- remove leading spaces on the first one
 								        css_text = css_text:gsub("\n *([^\n]+){", "\n%1{") -- remove leading spaces on a standalone one
 								        -- Make sure { is on the same line with the selector it follows
 								        css_text = css_text:gsub("%s*\n *{", " {")
-												View HTML & CSS: fix minor spacing issues

											
										
										
											2023-08-10 14:46:51 +00:00
+								        -- Make sure we have a newline after our }
 								        css_text = css_text:gsub("\n} *([^\n]+)", "\n}\n%1")
-												util.prettifyCSS(): do better with more complex CSS

											
										
										
											2023-08-01 08:09:27 +00:00
+								        -- Restore all protected chars
 								        css_text = css_text:gsub("\v", ",")
 								        css_text = css_text:gsub("\f", ":")
 								        css_text = css_text:gsub("\b", ";")
-												Style tweaks: adds "Book-specific tweak" menu item (#6244)

Allows editing a CSS snippet to be applied to this
book only, without the need to create and edit
a User style tweak.
Allows copying any other tweak CSS by just taping
on it (and pasting into this with Hold).
Limit User style tweaks nb of items per menu page
to 6 (like we try to do for other tweaks menus).
											
										
										
											2020-06-08 18:47:31 +00:00
+								    else
 								        -- Go thru previous method to have something standard to work on
 								        css_text = util.prettifyCSS(css_text)
 								        -- And condense that
 								        css_text = css_text:gsub(" {\n    ", " { ")
 								        css_text = css_text:gsub(";\n    ", "; ")
 								        css_text = css_text:gsub("\n}", " }")
 								        css_text = css_text:gsub(" ,\n", ", ")
 								    end
 								    return css_text
 								end
-												HTML dictionary link support (#3603)


											
										
										
											2018-01-15 22:51:43 +00:00
+								--- Escape list for shell usage
 								--- @table args the list of arguments to escape
 								--- @treturn string the escaped and concatenated arguments
 								function util.shell_escape(args)
 								    local escaped_args = {}
 								    for _, arg in ipairs(args) do
 								        arg = "'" .. arg:gsub("'", "'\\''") .. "'"
 								        table.insert(escaped_args, arg)
 								    end
 								    return table.concat(escaped_args, " ")
 								end
-												Misc: Get rid of the legacy defaults.lua globals (#9546)

* This removes support for the following deprecated constants: `DTAP_ZONE_FLIPPING`, `DTAP_ZONE_BOOKMARK`, `DCREREADER_CONFIG_DEFAULT_FONT_GAMMA`
* The "Advanced settings" panel now highlights modified values in bold (think about:config in Firefox ;)).
* LuaData: Isolate global table lookup shenanigans, and fix a few issues in unused-in-prod codepaths.
* CodeStyle: Require module locals for Lua/C modules, too.
* ScreenSaver: Actually garbage collect our widget on close (ScreenSaver itself is not an instantiated object).
* DateTimeWidget: Code cleanups to ensure child widgets can be GC'ed.
											
										
										
											2022-09-27 23:10:50 +00:00
+								--- Clear all the elements from an array without reassignment.
 								--- @table t the array to be cleared
-												Do not execute background runner if device is suspended (#3608)


											
										
										
											2018-01-17 08:17:53 +00:00
+								function util.clearTable(t)
 								    local c = #t
 								    for i = 0, c do t[i] = nil end
 								end
-												CloudStorage: Allow use reserved characters in FTP username and FTP password  (#3924)

Depends on RFC 3986 compliant util.urlEncode() and adds unit tests for the new functions.
											
										
										
											2018-05-04 15:06:58 +00:00
+								--- Encode URL also known as percent-encoding see https://en.wikipedia.org/wiki/Percent-encoding
 								--- @string text the string to encode
 								--- @treturn encode string
 								--- Taken from https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
 								function util.urlEncode(url)
 								    local char_to_hex = function(c)
 								        return string.format("%%%02X", string.byte(c))
 								    end
 								    if url == nil then
 								        return
 								    end
 								    url = url:gsub("\n", "\r\n")
 								    url = url:gsub("([^%w%-%.%_%~%!%*%'%(%)])", char_to_hex)
 								    return url
 								end
 								--- Decode URL (reverse process to util.urlEncode())
 								--- @string text the string to decode
 								--- @treturn decode string
 								--- Taken from https://gist.github.com/liukun/f9ce7d6d14fa45fe9b924a3eed5c3d99
 								function util.urlDecode(url)
 								    local hex_to_char = function(x)
 								        return string.char(tonumber(x, 16))
 								    end
 								    if url == nil then
 								        return
 								    end
 								    url = url:gsub("%%(%x%x)", hex_to_char)
 								    return url
 								end
-												Text editor plugin, InputDialog enhancements (#4135)

This plugin mostly sets up a "Text editor>" submenu, that allows
browsing files, creating a new file, and managing a history of
previously opened file for easier re-opening.
It restore previous scroll and cursor positions on re-opening.
Additional "Check lua" syntax button is added when editing
a .lua file, and prevent saving if errors.
The text editing is mainly provided by the enhanced InputDialog.

InputDialog: added a few more options, the main one being
'save_callback', which will add a Save and Close buttons
and manage saving/discarding/exiting.
If "fullscreen" and "add_nav_bar", will add a show/hide keyboard
button to it.
Moved the preset buttons setup code in their own InputDialog
methods for clarity of the main init code.
Buttons are now enabled/disabled depending on context for feedback
(eg: Save is disabled as long as text has not been modified).

Added util.checkLuaSyntax(lua_string), might be useful elsewhere.
											
										
										
											2018-08-06 19:16:30 +00:00
+								--- Check lua syntax of string
 								--- @string text lua code text
 								--- @treturn string with parsing error, nil if syntax ok
 								function util.checkLuaSyntax(lua_text)
 								    local lua_code_ok, err = loadstring(lua_text)
 								    if lua_code_ok then
 								        return nil
 								    end
 								    -- Replace: [string "blah blah..."]:3: '=' expected near '123'
 								    -- with: Line 3: '=' expected near '123'
-												Misc: Get rid of the legacy defaults.lua globals (#9546)

* This removes support for the following deprecated constants: `DTAP_ZONE_FLIPPING`, `DTAP_ZONE_BOOKMARK`, `DCREREADER_CONFIG_DEFAULT_FONT_GAMMA`
* The "Advanced settings" panel now highlights modified values in bold (think about:config in Firefox ;)).
* LuaData: Isolate global table lookup shenanigans, and fix a few issues in unused-in-prod codepaths.
* CodeStyle: Require module locals for Lua/C modules, too.
* ScreenSaver: Actually garbage collect our widget on close (ScreenSaver itself is not an instantiated object).
* DateTimeWidget: Code cleanups to ensure child widgets can be GC'ed.
											
										
										
											2022-09-27 23:10:50 +00:00
+								    err = err and err:gsub("%[string \".-%\"]:", "Line ")
-												Text editor plugin, InputDialog enhancements (#4135)

This plugin mostly sets up a "Text editor>" submenu, that allows
browsing files, creating a new file, and managing a history of
previously opened file for easier re-opening.
It restore previous scroll and cursor positions on re-opening.
Additional "Check lua" syntax button is added when editing
a .lua file, and prevent saving if errors.
The text editing is mainly provided by the enhanced InputDialog.

InputDialog: added a few more options, the main one being
'save_callback', which will add a Save and Close buttons
and manage saving/discarding/exiting.
If "fullscreen" and "add_nav_bar", will add a show/hide keyboard
button to it.
Moved the preset buttons setup code in their own InputDialog
methods for clarity of the main init code.
Buttons are now enabled/disabled depending on context for feedback
(eg: Save is disabled as long as text has not been modified).

Added util.checkLuaSyntax(lua_string), might be useful elsewhere.
											
										
										
											2018-08-06 19:16:30 +00:00
+								    return err
 								end
-												[chore, documentation] Fix up util.stringStartsWith/stringEndsWith LDoc comments (#7292)

Cf. <https://github.com/koreader/koreader/issues/7286#issuecomment-778855817>.
											
										
										
											2021-02-15 08:28:22 +00:00
+								--- Simple startsWith string helper.
 								--
 								-- C.f., <http://lua-users.org/wiki/StringRecipes>.
 								-- @string str source string
 								-- @string start string to match
 								-- @treturn bool true on success
-												Allow toggling CRe's new dithering & scaling (#4922)

Smooth scaling toggle is per document, in the gear tab.
Dithering is in the Dev top menu ;).
											
										
										
											2019-04-18 21:26:53 +00:00
+								function util.stringStartsWith(str, start)
 								   return str:sub(1, #start) == start
 								end
-												[chore, documentation] Fix up util.stringStartsWith/stringEndsWith LDoc comments (#7292)

Cf. <https://github.com/koreader/koreader/issues/7286#issuecomment-778855817>.
											
										
										
											2021-02-15 08:28:22 +00:00
+								--- Simple endsWith string helper.
 								-- @string str source string
 								-- @string ending string to match
 								-- @treturn bool true on success
-												Allow toggling CRe's new dithering & scaling (#4922)

Smooth scaling toggle is per document, in the gear tab.
Dithering is in the Dev top menu ;).
											
										
										
											2019-04-18 21:26:53 +00:00
+								function util.stringEndsWith(str, ending)
 								   return ending == "" or str:sub(-#ending) == ending
 								end
-												TextViewer: add Find (#9507)


											
										
										
											2022-09-13 21:09:49 +00:00
+								--- Search a string in a text.
 								-- @string or table txt Text (char list) to search in
 								-- @string str String to search for
 								-- @boolean case_sensitive
 								-- @number start_pos Position number in text to start search from
 								-- @treturn number Position number or 0 if not found
-												TextViewer: make find result bold (#11427)


											
										
										
											2024-02-03 08:32:33 +00:00
+								-- @treturn table Text char list
 								-- @treturn table Search string char list
-												TextViewer: add Find (#9507)


											
										
										
											2022-09-13 21:09:49 +00:00
+								function util.stringSearch(txt, str, case_sensitive, start_pos)
 								    if not case_sensitive then
 								        str = Utf8Proc.lowercase(util.fixUtf8(str, "?"))
 								    end
 								    local txt_charlist = type(txt) == "table" and txt or util.splitToChars(txt)
 								    local str_charlist = util.splitToChars(str)
 								    local str_len = #str_charlist
 								    local char_pos, found = 0, 0
 								    for i = start_pos - 1, #txt_charlist - str_len do
 								        for j = 1, str_len do
 								            local char_txt = txt_charlist[i + j]
 								            local char_str = str_charlist[j]
 								            if not case_sensitive then
 								                char_txt = Utf8Proc.lowercase(util.fixUtf8(char_txt, "?"))
 								            end
 								            if char_txt ~= char_str then
 								                found = 0
 								                break
 								            end
 								            found = found + 1
 								        end
 								        if found == str_len then
 								            char_pos = i + 1
 								            break
 								        end
 								    end
-												TextViewer: make find result bold (#11427)


											
										
										
											2024-02-03 08:32:33 +00:00
+								    -- Returned charlists are used in TextViewer find,
 								    -- to avoid double call of util.splitToChars()
 								    return char_pos, txt_charlist, str_charlist
-												TextViewer: add Find (#9507)


											
										
										
											2022-09-13 21:09:49 +00:00
+								end
-												util: add reversible table method wrapping helper

In some cases, it's useful to be able to wrap a function and either
replace its contents entirely or have some callback be run before
calling the underlying function.

The most obvious users for this feature are the Japanese and Korean
keyboards (both of which need to wrap the inputbox methods with either
their own versions or have basic callbacks be run before the method is
executed).

This is loosely based on how busted/luassert spies work.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>

											
										
										
											2021-11-06 05:09:12 +00:00
+								local WrappedFunction_mt = {
 								    __call = function(self, ...)
 								        if self.before_callback then
 								            self.before_callback(self.target_table, ...)
 								        end
 								        if self.func then
 								            return self.func(...)
 								        end
 								    end,
 								}
 								--- Wrap (or replace) a table method with a custom method, in a revertable way.
 								-- This allows you extend the features of an existing module by modifying its
 								-- internal methods, and then revert them back to normal later if necessary.
 								--
 								-- The most notable use-case for this is VirtualKeyboard's inputbox method
 								-- wrapping to allow keyboards to add more complicated state-machines to modify
 								-- how characters are input.
 								--
 								-- The returned table is the same table `target_table[target_field_name]` is
 								-- set to. In addition to being callable, the new method has two sub-methods:
 								--
 								--  * `:revert()` will un-wrap the method and revert it to the original state.
 								--
 								--    Note that if a method is wrapped multiple times, reverting it will revert
 								--    it to the state of the method when util.wrapMethod was called (and if
 								--    called on the table returned from util.wrapMethod, that is the state when
 								--    that particular util.wrapMethod was called).
 								--
 								--  * `:raw_call(...)` will call the original method with the given arguments
 								--    and return whatever it returns.
 								--
 								--    This makes it more ergonomic to use the wrapped table methods in the case
 								--    where you've replaced the regular function with your own implementation
 								--    but you need to call the original functions inside your implementation.
 								--
 								--  * `:raw_method_call(...)` will call the original method with the arguments
 								--    `(target_table, ...)` and return whatever it returns. Note that the
 								--    target_table used is the one associated with the util.wrapMethod call.
 								--
 								--    This makes it more ergonomic to use the wrapped table methods in the case
 								--    where you've replaced the regular function with your own implementation
 								--    but you need to call the original functions inside your implementation.
 								--
 								--    This is effectively short-hand for `:raw_call(target_table, ...)`.
 								--
 								-- This is loosely based on busted/luassert's spies implementation (MIT).
 								--   <https://github.com/Olivine-Labs/luassert/blob/v1.7.11/src/spy.lua>
 								--
 								-- @tparam table target_table The table whose method will be wrapped.
 								-- @tparam string target_field_name The name of the field to wrap.
 								-- @tparam nil|func new_func If non-nil, this function will be called instead of the original function after wrapping.
 								-- @tparam nil|func before_callback If non-nil, this function will be called (with the arguments (target_table, ...)) before the function is called.
 								function util.wrapMethod(target_table, target_field_name, new_func, before_callback)
 								    local old_func = target_table[target_field_name]
 								    local wrapped = setmetatable({
 								        target_table = target_table,
 								        target_field_name = target_field_name,
 								        old_func = old_func,
 								        before_callback = before_callback,
 								        func = new_func or old_func,
 								        revert = function(self)
 								            if not self.reverted then
 								                self.target_table[self.target_field_name] = self.old_func
 								                self.reverted = true
 								            end
 								        end,
 								        raw_call = function(self, ...)
 								            if self.old_func then
 								                return self.old_func(...)
 								            end
 								        end,
 								        raw_method_call = function(self, ...)
 								            return self:raw_call(self.target_table, ...)
 								        end,
 								    }, WrappedFunction_mt)
 								    target_table[target_field_name] = wrapped
 								    return wrapped
 								end
-												FileChooser collates: use "item.sort_percent" for sorting + extra `on-hold` handling (#11592)

re https://github.com/koreader/koreader/pull/11524#issuecomment-1984110990
re https://github.com/koreader/koreader/pull/11542#issuecomment-2018164346

re #11592
											
										
										
											2024-03-30 13:04:43 +00:00
+								-- Round a given "num" to the decimal points of "points"
 								-- (i.e. `round_decimal(0.000000001, 2)` will yield `0.00`)
 								function util.round_decimal(num, points)
 								    local op = 10 ^ points
 								    return math.floor(num * op) / op
 								end
-												strip punctuations around word before searching
This should fix #1337.

											
										
										
											2015-02-01 09:40:34 +00:00
+								return util