2016-02-04 18:24:39 +00:00
--[[--
2016-12-13 16:06:02 +00:00
This module contains miscellaneous helper functions for the KOReader frontend .
2016-06-05 07:33:31 +00:00
] ]
2016-02-04 18:24:39 +00:00
2016-06-05 07:33:31 +00:00
local BaseUtil = require ( " ffi/util " )
2015-02-01 09:40:34 +00:00
local util = { }
2016-12-13 16:06:02 +00:00
--- Strips all punctuation and spaces from a string.
2016-06-05 07:33:31 +00:00
---- @string text the string to be stripped
---- @treturn string stripped text
function util . stripePunctuations ( text )
if not text then return end
-- strip ASCII punctuation characters around text
-- and strip any generic punctuation (U+2000 - U+206F) in the text
return text : gsub ( " \226 [ \128 - \131 ][ \128 - \191 ] " , ' ' ) : gsub ( " ^%p+ " , ' ' ) : gsub ( " %p+$ " , ' ' )
2015-02-01 09:40:34 +00:00
end
2017-02-25 17:52:34 +00:00
--[[--
2017-04-04 07:57:14 +00:00
Splits a string by a pattern
2015-04-22 06:17:06 +00:00
Lua doesn ' t have a string.split() function and most of the time
you don ' t really need it because string.gmatch() is enough.
However string.gmatch ( ) has one significant disadvantage for me :
You can ' t split a string while matching both the delimited
strings and the delimiters themselves without tracking positions
and substrings . The gsplit function below takes care of
this problem .
2017-04-04 07:57:14 +00:00
2015-04-22 06:17:06 +00:00
Author : Peter Odding
2017-04-04 07:57:14 +00:00
2015-04-22 06:17:06 +00:00
License : MIT / X11
2017-04-04 07:57:14 +00:00
Source : < a href = " http://snippets.luacode.org/snippets/String_splitting_130 " > http : // snippets.luacode . org / snippets / String_splitting_130 </ a >
2017-02-25 17:52:34 +00:00
] ]
----@string str string to split
----@param pattern the pattern to split against
----@bool capture
2017-04-14 19:12:28 +00:00
----@bool capture_empty_entity
function util . gsplit ( str , pattern , capture , capture_empty_entity )
2015-04-22 06:17:06 +00:00
pattern = pattern and tostring ( pattern ) or ' %s+ '
if ( ' ' ) : find ( pattern ) then
error ( ' pattern matches empty string! ' , 2 )
end
return coroutine.wrap ( function ( )
local index = 1
repeat
local first , last = str : find ( pattern , index )
if first and last then
2017-04-14 19:12:28 +00:00
if index < first or ( index == first and capture_empty_entity ) then
2015-04-22 06:17:06 +00:00
coroutine.yield ( str : sub ( index , first - 1 ) )
end
if capture then
coroutine.yield ( str : sub ( first , last ) )
end
index = last + 1
else
if index <= # str then
coroutine.yield ( str : sub ( index ) )
end
break
end
until index > # str
end )
end
2017-04-04 07:57:14 +00:00
--[[--
Converts seconds to a clock string .
Source : < a href = " https://gist.github.com/jesseadams/791673 " > https : // gist.github . com / jesseadams / 791673 </ a >
] ]
2017-02-25 17:52:34 +00:00
---- @int seconds number of seconds
---- @bool withoutSeconds if true 00:00, if false 00:00:00
---- @treturn string clock string in the form of 00:00 or 00:00:00
2016-01-03 09:08:26 +00:00
function util . secondsToClock ( seconds , withoutSeconds )
seconds = tonumber ( seconds )
2015-11-27 15:13:01 +00:00
if seconds == 0 or seconds ~= seconds then
if withoutSeconds then
return " 00:00 " ;
else
return " 00:00:00 " ;
end
else
local hours = string.format ( " %02.f " , math.floor ( seconds / 3600 ) ) ;
local mins = string.format ( " %02.f " , math.floor ( seconds / 60 - ( hours * 60 ) ) ) ;
if withoutSeconds then
return hours .. " : " .. mins
end
local secs = string.format ( " %02.f " , math.floor ( seconds - hours * 3600 - mins * 60 ) ) ;
return hours .. " : " .. mins .. " : " .. secs
end
end
2016-02-04 18:24:39 +00:00
--- Returns number of keys in a table.
---- @param T Lua table
2016-06-05 07:33:31 +00:00
---- @treturn int number of keys in table T
2016-01-29 06:37:46 +00:00
function util . tableSize ( T )
2015-11-27 15:13:01 +00:00
local count = 0
for _ in pairs ( T ) do count = count + 1 end
return count
end
2017-04-04 07:57:14 +00:00
--- Append all elements from t2 into t1.
---- @param t1 Lua table
---- @param t2 Lua table
2016-01-31 22:23:44 +00:00
function util . arrayAppend ( t1 , t2 )
2016-02-12 14:55:02 +00:00
for _ , v in ipairs ( t2 ) do
2016-01-31 22:23:44 +00:00
table.insert ( t1 , v )
end
end
2017-04-04 07:57:14 +00:00
--[[--
Gets last index of string in character
Returns the index within this string of the last occurrence of the specified character
or - 1 if the character does not occur .
To find . you need to escape it .
] ]
---- @string string
---- @string ch
---- @treturn int last occurrence or -1 if not found
2016-02-12 14:55:02 +00:00
function util . lastIndexOf ( string , ch )
local i = string : match ( " .* " .. ch .. " () " )
if i == nil then return - 1 else return i - 1 end
end
2016-04-21 14:13:10 +00:00
2016-12-13 16:06:02 +00:00
--- Splits string into a list of UTF-8 characters.
---- @string text the string to be split.
2016-06-05 07:33:31 +00:00
---- @treturn table list of UTF-8 chars
2016-05-22 15:59:28 +00:00
function util . splitToChars ( text )
local tab = { }
if text ~= nil then
local prevcharcode , charcode = 0
for uchar in string.gfind ( text , " ([%z \1 - \127 \194 - \244 ][ \128 - \191 ]*) " ) do
charcode = BaseUtil.utf8charcode ( uchar )
if prevcharcode then -- utf8
table.insert ( tab , uchar )
end
prevcharcode = charcode
2016-04-21 14:13:10 +00:00
end
end
2016-05-22 15:59:28 +00:00
return tab
2016-04-21 14:13:10 +00:00
end
2017-04-04 07:57:14 +00:00
--- Tests whether c is a CJK character
---- @string c
---- @treturn boolean true if CJK
2016-11-26 00:46:56 +00:00
function util . isCJKChar ( c )
return string.match ( c , " [ \228 - \234 ][ \128 - \191 ]. " ) == c
end
2017-04-04 07:57:14 +00:00
--- Tests whether str contains CJK characters
---- @string str
---- @treturn boolean true if CJK
2016-11-26 00:46:56 +00:00
function util . hasCJKChar ( str )
return string.match ( str , " [ \228 - \234 ][ \128 - \191 ]. " ) ~= nil
end
2016-12-13 16:06:02 +00:00
--- Split texts into a list of words, spaces and punctuation.
2016-06-05 07:33:31 +00:00
---- @string text text to split
2016-12-13 16:06:02 +00:00
---- @treturn table list of words, spaces and punctuation
2016-06-05 07:33:31 +00:00
function util . splitToWords ( text )
local wlist = { }
2016-06-28 15:50:21 +00:00
for word in util.gsplit ( text , " [%s%p]+ " , true ) do
-- if space splitted word contains CJK characters
2016-11-26 00:46:56 +00:00
if util.hasCJKChar ( word ) then
2016-06-28 15:50:21 +00:00
-- split with CJK characters
for char in util.gsplit ( word , " [ \228 - \234 \192 - \255 ][ \128 - \191 ]+ " , true ) do
table.insert ( wlist , char )
end
else
2016-06-05 07:33:31 +00:00
table.insert ( wlist , word )
end
end
return wlist
end
2016-12-06 21:10:25 +00:00
-- We don't want to split on a space if it is followed by some
-- specific punctuation : e.g. "word :" or "word )"
-- (In french, there is a space before a colon, and it better
-- not be wrapped there.)
2017-04-04 07:57:14 +00:00
local non_splittable_space_tailers = " :;,.!?)]}$%=-+*/|<>»” "
2016-12-12 22:41:16 +00:00
-- Same if a space has some specific other punctuation before it
2017-04-04 07:57:14 +00:00
local non_splittable_space_leaders = " ([{$=-+*/|<>«“ "
2016-12-06 21:10:25 +00:00
2016-12-15 07:58:58 +00:00
-- Similar rules exist for CJK text. Taken from :
-- https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages
2017-04-04 07:57:14 +00:00
local cjk_non_splittable_tailers = table.concat ( {
2016-12-15 07:58:58 +00:00
-- Simplified Chinese
" !%),.:;?]}¢°·’ \" †‡›℃∶、。〃〆〕〗〞﹚﹜!"%'),.:;?!]}~ " ,
-- Traditional Chinese
" !),.:;?]}¢·–—’ \" •、。〆〞〕〉》」︰︱︲︳﹐﹑﹒﹔﹕﹖﹘﹚﹜!),.:;?︶︸︺︼︾﹀﹂﹗]|}、 " ,
-- Japanese
" )]}〕〉》」』】〙〗〟’ \" ⦆»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。. " ,
-- Korean
" !%),.:;?]}¢°’ \" †‡℃〆〈《「『〕!%),.:;?]} " ,
} )
2017-04-04 07:57:14 +00:00
local cjk_non_splittable_leaders = table.concat ( {
2016-12-15 07:58:58 +00:00
-- Simplified Chinese
" $(£¥·‘ \" 〈《「『【〔〖〝﹙﹛$(.[{£¥ " ,
-- Traditional Chinese
" ([{£¥‘ \" ‵〈《「『〔〝︴﹙﹛({︵︷︹︻︽︿﹁﹃﹏ " ,
-- Japanese
" ([{〔〈《「『【〘〖〝‘ \" ⦅« " ,
-- Korean
" $([{£¥‘ \" 々〇〉》」〔$([{⦆¥₩# " ,
} )
2017-04-04 07:57:14 +00:00
local cjk_non_splittable = table.concat ( {
2016-12-15 07:58:58 +00:00
-- Japanese
" —…‥〳〴〵 " ,
} )
2017-04-04 07:57:14 +00:00
--- Test whether a string can be separated by this char for multi-line rendering.
2016-12-12 22:41:16 +00:00
-- Optional next or prev chars may be provided to help make the decision
2017-04-04 07:57:14 +00:00
---- @string c
---- @string next_c
---- @string prev_c
---- @treturn boolean true if splittable, false if not
function util . isSplittable ( c , next_c , prev_c )
2016-12-06 21:10:25 +00:00
if util.isCJKChar ( c ) then
2017-04-04 07:57:14 +00:00
-- a CJKChar is a word in itself, and so is splittable
if cjk_non_splittable : find ( c , 1 , true ) then
2016-12-15 07:58:58 +00:00
-- except a few of them
return false
2017-04-04 07:57:14 +00:00
elseif next_c and cjk_non_splittable_tailers : find ( next_c , 1 , true ) then
2016-12-15 07:58:58 +00:00
-- but followed by a char that is not permitted at start of line
return false
2017-04-04 07:57:14 +00:00
elseif prev_c and cjk_non_splittable_leaders : find ( prev_c , 1 , true ) then
2016-12-15 07:58:58 +00:00
-- but preceded by a char that is not permitted at end of line
return false
else
-- we can split on this CJKchar
return true
end
2016-12-06 21:10:25 +00:00
elseif c == " " then
-- we only split on a space (so punctuation sticks to prev word)
2016-12-12 22:41:16 +00:00
-- if next_c or prev_c is provided, we can make a better decision
2017-04-04 07:57:14 +00:00
if next_c and non_splittable_space_tailers : find ( next_c , 1 , true ) then
2016-12-12 22:41:16 +00:00
-- this space is followed by some punctuation that is better kept with us
return false
2017-04-04 07:57:14 +00:00
elseif prev_c and non_splittable_space_leaders : find ( prev_c , 1 , true ) then
2016-12-12 22:41:16 +00:00
-- this space is lead by some punctuation that is better kept with us
2016-12-06 21:10:25 +00:00
return false
else
-- we can split on this space
return true
end
end
2017-04-04 07:57:14 +00:00
-- otherwise, non splittable
2016-12-06 21:10:25 +00:00
return false
2016-04-21 14:13:10 +00:00
end
2017-04-04 07:57:14 +00:00
--- Gets filesystem type of a path.
--
-- Checks if the path occurs in <code>/proc/mounts</code>
---- @string path an absolute path
---- @treturn string filesystem type
2017-01-10 00:05:15 +00:00
function util . getFilesystemType ( path )
local mounts = io.open ( " /proc/mounts " , " r " )
if not mounts then return nil end
local type
while true do
local line
local mount = { }
line = mounts : read ( )
if line == nil then
break
end
for param in line : gmatch ( " %S+ " ) do table.insert ( mount , param ) end
if string.match ( path , mount [ 2 ] ) then
type = mount [ 3 ]
if mount [ 2 ] ~= ' / ' then
break
end
end
end
mounts : close ( )
return type
end
2017-04-04 07:57:14 +00:00
--- Replaces characters that are invalid filenames.
--
-- Replaces the characters <code>\/:*?"<>|</code> with an <code>_</code>.
-- These characters are problematic on Windows filesystems. On Linux only
-- <code>/</code> poses a problem.
---- @string str filename
---- @treturn string sanitized filename
2017-01-10 00:05:15 +00:00
function util . replaceInvalidChars ( str )
2017-03-15 07:59:42 +00:00
if str then
2017-04-02 14:17:49 +00:00
return str : gsub ( ' [ \\ ,%/,:,%*,%?,%",%<,%>,%|] ' , ' _ ' )
2017-03-15 07:59:42 +00:00
end
2017-01-10 00:05:15 +00:00
end
2017-04-04 07:57:14 +00:00
--- Replaces slash with an underscore.
---- @string str
---- @treturn string
2017-01-10 00:05:15 +00:00
function util . replaceSlashChar ( str )
2017-03-15 07:59:42 +00:00
if str then
2017-04-02 14:17:49 +00:00
return str : gsub ( ' %/ ' , ' _ ' )
2017-03-15 07:59:42 +00:00
end
2017-01-10 00:05:15 +00:00
end
2017-04-04 07:57:14 +00:00
--- Splits a file into its path and name
---- @string file
---- @treturn string path, filename
2017-01-21 09:32:42 +00:00
function util . splitFilePathName ( file )
if file == nil or file == " " then return " " , " " end
if string.find ( file , " / " ) == nil then return " " , file end
return string.gsub ( file , " (.*/)(.*) " , " %1 " ) , string.gsub ( file , " .*/ " , " " )
end
2017-04-04 07:57:14 +00:00
--- Splits a file name into its pure file name and suffix
---- @string file
---- @treturn string path, extension
2017-01-21 09:32:42 +00:00
function util . splitFileNameSuffix ( file )
if file == nil or file == " " then return " " , " " end
if string.find ( file , " %. " ) == nil then return file , " " end
return string.gsub ( file , " (.*)%.(.*) " , " %1 " ) , string.gsub ( file , " .*%. " , " " )
end
2017-04-04 07:57:14 +00:00
--- Gets file extension
---- @string filename
---- @treturn string extension
2017-02-12 02:55:31 +00:00
function util . getFileNameSuffix ( file )
local _ , suffix = util.splitFileNameSuffix ( file )
return suffix
end
2017-04-04 07:57:14 +00:00
--- Adds > to touch menu items with a submenu
2017-02-04 04:26:47 +00:00
function util . getMenuText ( item )
local text
if item.text_func then
text = item.text_func ( )
else
text = item.text
end
if item.sub_item_table ~= nil then
2017-02-08 04:38:34 +00:00
text = text .. " \226 \150 \184 "
2017-02-04 04:26:47 +00:00
end
return text
end
2017-04-02 14:17:49 +00:00
--- Replaces invalid UTF-8 characters with a replacement string.
2017-04-04 07:57:14 +00:00
--
-- Based on <a href="http://notebook.kulchenko.com/programming/fixing-malformed-utf8-in-lua">http://notebook.kulchenko.com/programming/fixing-malformed-utf8-in-lua</a>
2017-04-02 14:17:49 +00:00
---- @string str the string to be checked for invalid characters
---- @string replacement the string to replace invalid characters with
---- @treturn string valid UTF-8
function util . fixUtf8 ( str , replacement )
local pos = 1
local len = # str
while pos <= len do
if pos == str : find ( " [%z \1 - \127 ] " , pos ) then pos = pos + 1
elseif pos == str : find ( " [ \194 - \223 ][ \128 - \191 ] " , pos ) then pos = pos + 2
elseif pos == str : find ( " \224 [ \160 - \191 ][ \128 - \191 ] " , pos )
or pos == str : find ( " [ \225 - \236 ][ \128 - \191 ][ \128 - \191 ] " , pos )
or pos == str : find ( " \237 [ \128 - \159 ][ \128 - \191 ] " , pos )
or pos == str : find ( " [ \238 - \239 ][ \128 - \191 ][ \128 - \191 ] " , pos ) then pos = pos + 3
elseif pos == str : find ( " \240 [ \144 - \191 ][ \128 - \191 ][ \128 - \191 ] " , pos )
or pos == str : find ( " [ \241 - \243 ][ \128 - \191 ][ \128 - \191 ][ \128 - \191 ] " , pos )
or pos == str : find ( " \244 [ \128 - \143 ][ \128 - \191 ][ \128 - \191 ] " , pos ) then pos = pos + 4
else
str = str : sub ( 1 , pos - 1 ) .. replacement .. str : sub ( pos + 1 )
pos = pos + # replacement
len = len + # replacement - 1
end
end
return str
end
2017-04-14 19:12:28 +00:00
--- Splits input string with the splitter into a table. This function ignores the last empty entity.
--
--- @string str the string to be split
--- @string splitter
--- @bool capture_empty_entity
--- @treturn an array-like table
function util . splitToArray ( str , splitter , capture_empty_entity )
local result = { }
for word in util.gsplit ( str , splitter , false , capture_empty_entity ) do
table.insert ( result , word )
end
return result
end
2015-02-01 09:40:34 +00:00
return util