koreader/spec/unit/util_spec.lua

describe("util module", function()
    local util
    setup(function()
        require("commonrequire")
        util = require("util")
    end)

    it("should strip punctuations around word", function()
        assert.is_equal(util.stripePunctuations("\"hello world\""), "hello world")
        assert.is_equal(util.stripePunctuations("\"hello world?\""), "hello world")
        assert.is_equal(util.stripePunctuations("\"hello, world?\""), "hello, world")
        assert.is_equal(util.stripePunctuations("“你好“"), "你好")
        assert.is_equal(util.stripePunctuations("“你好?“"), "你好")
        assert.is_equal(util.stripePunctuations(""), "")
        assert.is_equal(util.stripePunctuations(nil), nil)
    end)

    it("should split string with patterns", function()
        local sentence = "Hello world, welcome to KOReader!"
        local words = {}
        for word in util.gsplit(sentence, "%s+", false) do
            table.insert(words, word)
        end
        assert.are_same(words, {"Hello", "world,", "welcome", "to", "KOReader!"})
    end)

    it("should split command line arguments with quotation", function()
        local command = "./sdcv -nj \"words\" \"a lot\" 'more or less' --data-dir=dict"
        local argv = {}
        for arg1 in util.gsplit(command, "[\"'].-[\"']", true) do
            for arg2 in util.gsplit(arg1, "^[^\"'].-%s+", true) do
                for arg3 in util.gsplit(arg2, "[\"']", false) do
                    local trimed = arg3:gsub("^%s*(.-)%s*$", "%1")
                    if trimed ~= "" then
                        table.insert(argv, trimed)
                    end
                end
            end
        end
        assert.are_same(argv, {"./sdcv", "-nj", "words", "a lot", "more or less", "--data-dir=dict"})
    end)

    it("should split with splitter", function()
        local words = {}
        for word in util.gsplit("a-b-c-d", "-", false) do
            table.insert(words, word)
        end
        assert.are_same(words, {"a", "b", "c", "d"})
    end)

    it("should also split with splitter", function()
        local words = {}
        for word in util.gsplit("a-b-c-d-", "-", false) do
            table.insert(words, word)
        end
        assert.are_same(words, {"a", "b", "c", "d"})
    end)

    it("should split line into words", function()
        local words = util.splitToWords("one two,three  four . five")
        assert.are_same(words, {
            "one",
            " ",
            "two",
            ",",
            "three",
            "  ",
            "four",
            " . ",
            "five",
        })
    end)

    it("should split ancient greek words", function()
        local words = util.splitToWords("Λαρισαῖος Λευκοθέα Λιγυαστάδης.")
        assert.are_same(words, {
            "Λαρισαῖος",
            " ",
            "Λευκοθέα",
            " ",
            "Λιγυαστάδης",
            "."
        })
    end)

    it("should split Chinese words", function()
        local words = util.splitToWords("彩虹是通过太阳光的折射引起的。")
        assert.are_same(words, {
            "彩","虹","是","通","过","太","阳","光","的","折","射","引","起","的","。",
        })
    end)

    it("should split words of multilingual text", function()
        local words = util.splitToWords("BBC纪录片")
        assert.are_same(words, {"BBC", "纪", "录", "片"})
    end)

    it("should split text to line - unicode", function()
        local text = "Pójdźże, chmurność glück schließen Štěstí neštěstí. Uñas gavilán"
        local word = ""
        local table_of_words = {}
        local c
        local table_chars = util.splitToChars(text)
        for i = 1, #table_chars  do
            c = table_chars[i]
            word = word .. c
            if util.isSplittable(c) then
                table.insert(table_of_words, word)
                word = ""
            end
            if i == #table_chars then table.insert(table_of_words, word) end
        end
        assert.are_same(table_of_words, {
            "Pójdźże, ",
            "chmurność ",
            "glück ",
            "schließen ",
            "Štěstí ",
            "neštěstí. ",
            "Uñas ",
            "gavilán",
        })
    end)

    it("should split text to line - CJK", function()
        local text = "彩虹是通过太阳光的折射引起的。"
        local word = ""
        local table_of_words = {}
        local c
        local table_chars = util.splitToChars(text)
        for i = 1, #table_chars  do
            c = table_chars[i]
            word = word .. c
            if util.isSplittable(c) then
                table.insert(table_of_words, word)
                word = ""
            end
            if i == #table_chars then table.insert(table_of_words, word) end
        end
        assert.are_same(table_of_words, {
            "彩","虹","是","通","过","太","阳","光","的","折","射","引","起","的","。",
        })
    end)

    it("should split text to line with next_c - unicode", function()
        local text = "Ce test : 1) est très simple ; 2 ) simple comme ( 2/2 ) > 50 % ? ok."
        local word = ""
        local table_of_words = {}
        local c, next_c
        local table_chars = util.splitToChars(text)
        for i = 1, #table_chars  do
            c = table_chars[i]
            next_c = i < #table_chars and table_chars[i+1] or nil
            word = word .. c
            if util.isSplittable(c, next_c) then
                table.insert(table_of_words, word)
                word = ""
            end
            if i == #table_chars then table.insert(table_of_words, word) end
        end
        assert.are_same(table_of_words, {
            "Ce ",
            "test : ",
            "1) ",
            "est ",
            "très ",
            "simple ; ",
            "2 ) ",
            "simple ",
            "comme ",
            "( ",
            "2/2 ) > ",
            "50 % ? ",
            "ok."
        })
    end)

    it("should split text to line with next_c and prev_c - unicode", function()
        local text = "Ce test : 1) est « très simple » ; 2 ) simple comme ( 2/2 ) > 50 % ? ok."
        local word = ""
        local table_of_words = {}
        local c, next_c, prev_c
        local table_chars = util.splitToChars(text)
        for i = 1, #table_chars  do
            c = table_chars[i]
            next_c = i < #table_chars and table_chars[i+1] or nil
            prev_c = i > 1 and table_chars[i-1] or nil
            word = word .. c
            if util.isSplittable(c, next_c, prev_c) then
                table.insert(table_of_words, word)
                word = ""
            end
            if i == #table_chars then table.insert(table_of_words, word) end
        end
        assert.are_same(table_of_words, {
            "Ce ",
            "test : ",
            "1) ",
            "est ",
            "« très ",
            "simple » ; ",
            "2 ) ",
            "simple ",
            "comme ",
            "( 2/2 ) > 50 % ? ",
            "ok."
        })
    end)

    it("should split file path and name", function()
        local test = function(full, path, name)
            local p, n = util.splitFilePathName(full)
            assert.are_same(p, path)
            assert.are_same(n, name)
        end
        test("/a/b/c.txt", "/a/b/", "c.txt")
        test("/a/b////c.txt", "/a/b////", "c.txt")
        test("/a/b/", "/a/b/", "")
        test("c.txt", "", "c.txt")
        test("", "", "")
        test(nil, "", "")
        test("a/b", "a/", "b")
        test("/b", "/", "b")
        assert.are_same(util.splitFilePathName("/a/b/c.txt"), "/a/b/")
    end)

    it("should split file name and suffix", function()
        local test = function(full, name, suffix)
            local n, s = util.splitFileNameSuffix(full)
            assert.are_same(n, name)
            assert.are_same(s, suffix)
        end
        test("a.txt", "a", "txt")
        test("/a/b.txt", "/a/b", "txt")
        test("a", "a", "")
        test("/a/b", "/a/b", "")
        test("/a/", "/a/", "")
        test("/a/.txt", "/a/", "txt")
        test(nil, "", "")
        test("", "", "")
        assert.are_same(util.splitFileNameSuffix("a.txt"), "a")
    end)

    it("should replace invalid UTF-8 characters with an underscore", function()
        assert.is_equal(util.fixUtf8("\127 \128 \194\127 ", "_"), "\127 _ _\127 ")
    end)

    it("should replace invalid UTF-8 characters with multiple characters", function()
        assert.is_equal(util.fixUtf8("\127 \128 \194\127 ", "__"), "\127 __ __\127 ")
    end)

    it("should replace invalid UTF-8 characters with empty char", function()
        assert.is_equal(util.fixUtf8("\127 \128 \194\127 ", ""), "\127  \127 ")
    end)

    it("should not replace valid UTF-8 <20> character", function()
        assert.is_equal(util.fixUtf8("<EFBFBD>valid <20> char <20>", "__"), "<EFBFBD>valid <20> char <20>")
    end)

    it("should not replace valid UTF-8 characters", function()
        assert.is_equal(util.fixUtf8("\99 \244\129\130\190", "_"), "\99 \244\129\130\190")
    end)

    it("should not replace valid UTF-8 characters Polish chars", function()
        assert.is_equal(util.fixUtf8("Pójdźże źółć", "_"), "Pójdźże źółć")
    end)

    it("should not replace valid UTF-8 characters German chars", function()
        assert.is_equal(util.fixUtf8("glück schließen", "_"), "glück schließen")
    end)

    it("should split input to array", function()
        assert.are_same(util.splitToArray("100\tabc\t\tdef\tghi200\t", "\t", true),
                        {"100", "abc", "", "def", "ghi200"})
    end)

    it("should also split input to array", function()
        assert.are_same(util.splitToArray("abcabcabcabca", "a", true),
                        {"", "bc", "bc", "bc", "bc"})
    end)

    it("should split input to array without empty entities", function()
        assert.are_same(util.splitToArray("100  abc   def ghi200  ", " ", false),
                        {"100", "abc", "def", "ghi200"})
    end)

    it("should guess it is not HTML and let is as is", function()
        local s = "if (i < 0 && j < 0) j = i&amp;"
        assert.is_equal(util.htmlToPlainTextIfHtml(s), s)
    end)
    it("should guess it is HTML and convert it to text", function()
        assert.is_equal(util.htmlToPlainTextIfHtml("<div> <br> Making <b>unit&nbsp;tests</b> is <i class='notreally'>fun &amp; n&#xE9;c&#233;ssaire</i><br/> </div>"),
                    "Making unit tests is fun & nécéssaire")
    end)
    it("should guess it is double encoded HTML and convert it to text", function()
        assert.is_equal(util.htmlToPlainTextIfHtml("Deux parties.&lt;br&gt;Prologue.Désespérée, elle le tue...&lt;br&gt;Première partie. Sur la route &amp;amp; dans la nuit"),
                    "Deux parties.\nPrologue.Désespérée, elle le tue...\nPremière partie. Sur la route & dans la nuit")
    end)
end)
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								describe("util module", function()
-												kobo: fix screen probe for touch

											
										
										
											2016-04-03 04:52:30 +00:00
+								    local util
 								    setup(function()
 								        require("commonrequire")
 								        util = require("util")
 								    end)
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								    it("should strip punctuations around word", function()
 								        assert.is_equal(util.stripePunctuations("\"hello world\""), "hello world")
 								        assert.is_equal(util.stripePunctuations("\"hello world?\""), "hello world")
 								        assert.is_equal(util.stripePunctuations("\"hello, world?\""), "hello, world")
 								        assert.is_equal(util.stripePunctuations("“你好“"), "你好")
 								        assert.is_equal(util.stripePunctuations("“你好?“"), "你好")
-												test: add optmath spec stub (#2950)

* test: add optmath spec stub
											
										
										
											2017-06-13 16:40:56 +00:00
+								        assert.is_equal(util.stripePunctuations(""), "")
 								        assert.is_equal(util.stripePunctuations(nil), nil)
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								    end)
-												kobo: fix screen probe for touch

											
										
										
											2016-04-03 04:52:30 +00:00
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								    it("should split string with patterns", function()
-												Removed a bunch of "successfully", replaced an ellipsis, and some deviant spellings of KOReader.

											
										
										
											2016-04-16 10:21:49 +00:00
+								        local sentence = "Hello world, welcome to KOReader!"
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								        local words = {}
 								        for word in util.gsplit(sentence, "%s+", false) do
 								            table.insert(words, word)
 								        end
-												Removed a bunch of "successfully", replaced an ellipsis, and some deviant spellings of KOReader.

											
										
										
											2016-04-16 10:21:49 +00:00
+								        assert.are_same(words, {"Hello", "world,", "welcome", "to", "KOReader!"})
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								    end)
-												kobo: fix screen probe for touch

											
										
										
											2016-04-03 04:52:30 +00:00
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								    it("should split command line arguments with quotation", function()
 								        local command = "./sdcv -nj \"words\" \"a lot\" 'more or less' --data-dir=dict"
 								        local argv = {}
 								        for arg1 in util.gsplit(command, "[\"'].-[\"']", true) do
 								            for arg2 in util.gsplit(arg1, "^[^\"'].-%s+", true) do
 								                for arg3 in util.gsplit(arg2, "[\"']", false) do
 								                    local trimed = arg3:gsub("^%s*(.-)%s*$", "%1")
 								                    if trimed ~= "" then
 								                        table.insert(argv, trimed)
 								                    end
 								                end
 								            end
 								        end
 								        assert.are_same(argv, {"./sdcv", "-nj", "words", "a lot", "more or less", "--data-dir=dict"})
 								    end)
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
-												Merge various information into systemstat (#2764)

* Merge various information to systemstat
											
										
										
											2017-04-14 19:12:28 +00:00
+								    it("should split with splitter", function()
 								        local words = {}
 								        for word in util.gsplit("a-b-c-d", "-", false) do
 								            table.insert(words, word)
 								        end
 								        assert.are_same(words, {"a", "b", "c", "d"})
 								    end)
 								    it("should also split with splitter", function()
 								        local words = {}
 								        for word in util.gsplit("a-b-c-d-", "-", false) do
 								            table.insert(words, word)
 								        end
 								        assert.are_same(words, {"a", "b", "c", "d"})
 								    end)
-												textboxwidget(fix): handle onHoldWord event

											
										
										
											2016-06-05 07:33:31 +00:00
+								    it("should split line into words", function()
 								        local words = util.splitToWords("one two,three  four . five")
 								        assert.are_same(words, {
 								            "one",
 								            " ",
 								            "two",
 								            ",",
 								            "three",
 								            "  ",
 								            "four",
 								            " . ",
 								            "five",
 								        })
 								    end)
-												split accient greek words with spacing character
This should fix #1705.

											
										
										
											2016-06-28 15:50:21 +00:00
 								    it("should split ancient greek words", function()
 								        local words = util.splitToWords("Λαρισαῖος Λευκοθέα Λιγυαστάδης.")
 								        assert.are_same(words, {
 								            "Λαρισαῖος",
 								            " ",
 								            "Λευκοθέα",
 								            " ",
 								            "Λιγυαστάδης",
 								            "."
 								        })
 								    end)
 								    it("should split Chinese words", function()
 								        local words = util.splitToWords("彩虹是通过太阳光的折射引起的。")
 								        assert.are_same(words, {
 								            "彩","虹","是","通","过","太","阳","光","的","折","射","引","起","的","。",
 								        })
 								    end)
 								    it("should split words of multilingual text", function()
 								        local words = util.splitToWords("BBC纪录片")
 								        assert.are_same(words, {"BBC", "纪", "录", "片"})
 								    end)
-												Fix hyphenation words with unicode character in texboxwidget (#2356)


											
										
										
											2016-11-19 20:26:53 +00:00
 								    it("should split text to line - unicode", function()
 								        local text = "Pójdźże, chmurność glück schließen Štěstí neštěstí. Uñas gavilán"
 								        local word = ""
 								        local table_of_words = {}
 								        local c
 								        local table_chars = util.splitToChars(text)
 								        for i = 1, #table_chars  do
 								            c = table_chars[i]
 								            word = word .. c
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								            if util.isSplittable(c) then
-												Fix hyphenation words with unicode character in texboxwidget (#2356)


											
										
										
											2016-11-19 20:26:53 +00:00
+								                table.insert(table_of_words, word)
 								                word = ""
 								            end
 								            if i == #table_chars then table.insert(table_of_words, word) end
 								        end
 								        assert.are_same(table_of_words, {
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								            "Pójdźże, ",
-												Fix hyphenation words with unicode character in texboxwidget (#2356)


											
										
										
											2016-11-19 20:26:53 +00:00
+								            "chmurność ",
 								            "glück ",
 								            "schließen ",
 								            "Štěstí ",
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								            "neštěstí. ",
-												Fix hyphenation words with unicode character in texboxwidget (#2356)


											
										
										
											2016-11-19 20:26:53 +00:00
+								            "Uñas ",
 								            "gavilán",
 								        })
 								    end)
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								    it("should split text to line - CJK", function()
 								        local text = "彩虹是通过太阳光的折射引起的。"
 								        local word = ""
 								        local table_of_words = {}
 								        local c
 								        local table_chars = util.splitToChars(text)
 								        for i = 1, #table_chars  do
 								            c = table_chars[i]
 								            word = word .. c
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								            if util.isSplittable(c) then
-												PR #2356 breaks CJK character splitting

											
										
										
											2016-11-26 00:46:56 +00:00
+								                table.insert(table_of_words, word)
 								                word = ""
 								            end
 								            if i == #table_chars then table.insert(table_of_words, word) end
 								        end
 								        assert.are_same(table_of_words, {
 								            "彩","虹","是","通","过","太","阳","光","的","折","射","引","起","的","。",
 								        })
 								    end)
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								    it("should split text to line with next_c - unicode", function()
 								        local text = "Ce test : 1) est très simple ; 2 ) simple comme ( 2/2 ) > 50 % ? ok."
 								        local word = ""
 								        local table_of_words = {}
-												Travis: run luacheck on unit tests (#3059)

* Travis: run luacheck on unit tests
											
										
										
											2017-08-08 20:35:40 +00:00
+								        local c, next_c
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								        local table_chars = util.splitToChars(text)
 								        for i = 1, #table_chars  do
 								            c = table_chars[i]
 								            next_c = i < #table_chars and table_chars[i+1] or nil
 								            word = word .. c
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								            if util.isSplittable(c, next_c) then
-												textboxwidget and scrolltextwidget enhancements (#2393)

util: made isSplitable() accept an optional next_char
for wiser decision

textboxwidget: speed up rendering, enhanced text wrapping,
allow selection of multiple words with Hold.

scrolltextwidget: allow scrolling with Tap.

Details in #2393

											
										
										
											2016-12-06 21:10:25 +00:00
+								                table.insert(table_of_words, word)
 								                word = ""
 								            end
 								            if i == #table_chars then table.insert(table_of_words, word) end
 								        end
 								        assert.are_same(table_of_words, {
 								            "Ce ",
 								            "test : ",
 								            "1) ",
 								            "est ",
 								            "très ",
 								            "simple ; ",
 								            "2 ) ",
 								            "simple ",
 								            "comme ",
 								            "( ",
 								            "2/2 ) > ",
 								            "50 % ? ",
 								            "ok."
 								        })
 								    end)
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								    it("should split text to line with next_c and prev_c - unicode", function()
 								        local text = "Ce test : 1) est « très simple » ; 2 ) simple comme ( 2/2 ) > 50 % ? ok."
 								        local word = ""
 								        local table_of_words = {}
-												Travis: run luacheck on unit tests (#3059)

* Travis: run luacheck on unit tests
											
										
										
											2017-08-08 20:35:40 +00:00
+								        local c, next_c, prev_c
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								        local table_chars = util.splitToChars(text)
 								        for i = 1, #table_chars  do
 								            c = table_chars[i]
 								            next_c = i < #table_chars and table_chars[i+1] or nil
 								            prev_c = i > 1 and table_chars[i-1] or nil
 								            word = word .. c
-												Developer documentation improvements

* Fixed up all of util and added when absent
* Updated widget examples to new coding style

											
										
										
											2017-04-04 07:57:14 +00:00
+								            if util.isSplittable(c, next_c, prev_c) then
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
+								                table.insert(table_of_words, word)
 								                word = ""
 								            end
 								            if i == #table_chars then table.insert(table_of_words, word) end
 								        end
 								        assert.are_same(table_of_words, {
 								            "Ce ",
 								            "test : ",
 								            "1) ",
 								            "est ",
 								            "« très ",
 								            "simple » ; ",
 								            "2 ) ",
 								            "simple ",
 								            "comme ",
 								            "( 2/2 ) > 50 % ? ",
 								            "ok."
 								        })
 								    end)
-												evernote: ReadHistory integration and text file output (#2498)



											
										
										
											2017-01-21 09:32:42 +00:00
+								    it("should split file path and name", function()
 								        local test = function(full, path, name)
 								            local p, n = util.splitFilePathName(full)
 								            assert.are_same(p, path)
 								            assert.are_same(n, name)
 								        end
 								        test("/a/b/c.txt", "/a/b/", "c.txt")
 								        test("/a/b////c.txt", "/a/b////", "c.txt")
 								        test("/a/b/", "/a/b/", "")
 								        test("c.txt", "", "c.txt")
 								        test("", "", "")
 								        test(nil, "", "")
 								        test("a/b", "a/", "b")
 								        test("/b", "/", "b")
 								        assert.are_same(util.splitFilePathName("/a/b/c.txt"), "/a/b/")
 								    end)
-												textboxwidget: even better text wrapping

util.isSplitable() accepts now also the previous char to help
decide if a space can be used to split a line.
TextBoxWidget:_splitCharWidthList() : simplified logic

											
										
										
											2016-12-12 22:41:16 +00:00
-												evernote: ReadHistory integration and text file output (#2498)



											
										
										
											2017-01-21 09:32:42 +00:00
+								    it("should split file name and suffix", function()
 								        local test = function(full, name, suffix)
 								            local n, s = util.splitFileNameSuffix(full)
 								            assert.are_same(n, name)
 								            assert.are_same(s, suffix)
 								        end
 								        test("a.txt", "a", "txt")
 								        test("/a/b.txt", "/a/b", "txt")
 								        test("a", "a", "")
 								        test("/a/b", "/a/b", "")
 								        test("/a/", "/a/", "")
 								        test("/a/.txt", "/a/", "txt")
 								        test(nil, "", "")
 								        test("", "", "")
 								        assert.are_same(util.splitFileNameSuffix("a.txt"), "a")
 								    end)
-												Added util.fixUtf8 (#2704)

* Remove invalid UTF-8 chars from OPDS
* add unit tests
											
										
										
											2017-04-02 14:17:49 +00:00
 								    it("should replace invalid UTF-8 characters with an underscore", function()
 								        assert.is_equal(util.fixUtf8("\127 \128 \194\127 ", "_"), "\127 _ _\127 ")
 								    end)
 								    it("should replace invalid UTF-8 characters with multiple characters", function()
 								        assert.is_equal(util.fixUtf8("\127 \128 \194\127 ", "__"), "\127 __ __\127 ")
 								    end)
 								    it("should replace invalid UTF-8 characters with empty char", function()
 								        assert.is_equal(util.fixUtf8("\127 \128 \194\127 ", ""), "\127  \127 ")
 								    end)
 								    it("should not replace valid UTF-8 <20> character", function()
 								        assert.is_equal(util.fixUtf8("<EFBFBD>valid <20> char <20>", "__"), "<EFBFBD>valid <20> char <20>")
 								    end)
 								    it("should not replace valid UTF-8 characters", function()
 								        assert.is_equal(util.fixUtf8("\99 \244\129\130\190", "_"), "\99 \244\129\130\190")
 								    end)
 								    it("should not replace valid UTF-8 characters Polish chars", function()
 								        assert.is_equal(util.fixUtf8("Pójdźże źółć", "_"), "Pójdźże źółć")
 								    end)
 								    it("should not replace valid UTF-8 characters German chars", function()
 								        assert.is_equal(util.fixUtf8("glück schließen", "_"), "glück schließen")
 								    end)
-												Merge various information into systemstat (#2764)

* Merge various information to systemstat
											
										
										
											2017-04-14 19:12:28 +00:00
+								    it("should split input to array", function()
 								        assert.are_same(util.splitToArray("100\tabc\t\tdef\tghi200\t", "\t", true),
 								                        {"100", "abc", "", "def", "ghi200"})
 								    end)
 								    it("should also split input to array", function()
 								        assert.are_same(util.splitToArray("abcabcabcabca", "a", true),
 								                        {"", "bc", "bc", "bc", "bc"})
 								    end)
 								    it("should split input to array without empty entities", function()
 								        assert.are_same(util.splitToArray("100  abc   def ghi200  ", " ", false),
 								                        {"100", "abc", "def", "ghi200"})
 								    end)
-												Book information: refactored and additional features

- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua
to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua).
- Uses sidecar files' new doc_props and doc_pages settings, or fallback to
old 'stats' settings, or to opening document.
- Shows filename, filetype and directory.
- Shows description (Hold to see whole truncated text), keywords, and
cover image (tap to extract image from document and display it if available).
- Book information now available from reader menu, to display info about
the currently opened book.
- Convert possibly HTML description to plain text via added
util.htmlToPlainTextIfHtml() (for simple HTML conversion).

											
										
										
											2017-07-01 10:11:44 +00:00
 								    it("should guess it is not HTML and let is as is", function()
 								        local s = "if (i < 0 && j < 0) j = i&amp;"
 								        assert.is_equal(util.htmlToPlainTextIfHtml(s), s)
 								    end)
 								    it("should guess it is HTML and convert it to text", function()
 								        assert.is_equal(util.htmlToPlainTextIfHtml("<div> <br> Making <b>unit&nbsp;tests</b> is <i class='notreally'>fun &amp; n&#xE9;c&#233;ssaire</i><br/> </div>"),
 								                    "Making unit tests is fun & nécéssaire")
 								    end)
 								    it("should guess it is double encoded HTML and convert it to text", function()
 								        assert.is_equal(util.htmlToPlainTextIfHtml("Deux parties.&lt;br&gt;Prologue.Désespérée, elle le tue...&lt;br&gt;Première partie. Sur la route &amp;amp; dans la nuit"),
 								                    "Deux parties.\nPrologue.Désespérée, elle le tue...\nPremière partie. Sur la route & dans la nuit")
 								    end)
-												Refactor out string.gsplit to util.gsplit

											
										
										
											2015-04-22 06:17:06 +00:00
+								end)