koreader/spec/unit/util_spec.lua

describe("util module", function()
    local util
    setup(function()
        require("commonrequire")
        util = require("util")
    end)

    it("should strip punctuations around word", function()
        assert.is_equal(util.stripePunctuations("\"hello world\""), "hello world")
        assert.is_equal(util.stripePunctuations("\"hello world?\""), "hello world")
        assert.is_equal(util.stripePunctuations("\"hello, world?\""), "hello, world")
        assert.is_equal(util.stripePunctuations("“你好“"), "你好")
        assert.is_equal(util.stripePunctuations("“你好?“"), "你好")
    end)

    it("should split string with patterns", function()
        local sentence = "Hello world, welcome to KOReader!"
        local words = {}
        for word in util.gsplit(sentence, "%s+", false) do
            table.insert(words, word)
        end
        assert.are_same(words, {"Hello", "world,", "welcome", "to", "KOReader!"})
    end)

    it("should split command line arguments with quotation", function()
        local command = "./sdcv -nj \"words\" \"a lot\" 'more or less' --data-dir=dict"
        local argv = {}
        for arg1 in util.gsplit(command, "[\"'].-[\"']", true) do
            for arg2 in util.gsplit(arg1, "^[^\"'].-%s+", true) do
                for arg3 in util.gsplit(arg2, "[\"']", false) do
                    local trimed = arg3:gsub("^%s*(.-)%s*$", "%1")
                    if trimed ~= "" then
                        table.insert(argv, trimed)
                    end
                end
            end
        end
        assert.are_same(argv, {"./sdcv", "-nj", "words", "a lot", "more or less", "--data-dir=dict"})
    end)

    it("should split line into words", function()
        local words = util.splitToWords("one two,three  four . five")
        assert.are_same(words, {
            "one",
            " ",
            "two",
            ",",
            "three",
            "  ",
            "four",
            " . ",
            "five",
        })
    end)

    it("should split ancient greek words", function()
        local words = util.splitToWords("Λαρισαῖος Λευκοθέα Λιγυαστάδης.")
        assert.are_same(words, {
            "Λαρισαῖος",
            " ",
            "Λευκοθέα",
            " ",
            "Λιγυαστάδης",
            "."
        })
    end)

    it("should split Chinese words", function()
        local words = util.splitToWords("彩虹是通过太阳光的折射引起的。")
        assert.are_same(words, {
            "彩","虹","是","通","过","太","阳","光","的","折","射","引","起","的","。",
        })
    end)

    it("should split words of multilingual text", function()
        local words = util.splitToWords("BBC纪录片")
        assert.are_same(words, {"BBC", "纪", "录", "片"})
    end)

    it("should split text to line - unicode", function()
        local text = "Pójdźże, chmurność glück schließen Štěstí neštěstí. Uñas gavilán"
        local word = ""
        local table_of_words = {}
        local c
        local table_chars = util.splitToChars(text)
        for i = 1, #table_chars  do
            c = table_chars[i]
            word = word .. c
            if util.isSplitable(c) then
                table.insert(table_of_words, word)
                word = ""
            end
            if i == #table_chars then table.insert(table_of_words, word) end
        end
        assert.are_same(table_of_words, {
            "Pójdźże,",
            " ",
            "chmurność ",
            "glück ",
            "schließen ",
            "Štěstí ",
            "neštěstí.",
            " ",
            "Uñas ",
            "gavilán",
        })
    end)

end)
Refactor out string.gsplit to util.gsplit 2015-04-22 06:17:06 +00:00			`describe("util module", function()`
kobo: fix screen probe for touch 2016-04-03 04:52:30 +00:00			`local util`
			`setup(function()`
			`require("commonrequire")`
			`util = require("util")`
			`end)`

Refactor out string.gsplit to util.gsplit 2015-04-22 06:17:06 +00:00			`it("should strip punctuations around word", function()`
			`assert.is_equal(util.stripePunctuations("\"hello world\""), "hello world")`
			`assert.is_equal(util.stripePunctuations("\"hello world?\""), "hello world")`
			`assert.is_equal(util.stripePunctuations("\"hello, world?\""), "hello, world")`
			`assert.is_equal(util.stripePunctuations("“你好“"), "你好")`
			`assert.is_equal(util.stripePunctuations("“你好?“"), "你好")`
			`end)`
kobo: fix screen probe for touch 2016-04-03 04:52:30 +00:00
Refactor out string.gsplit to util.gsplit 2015-04-22 06:17:06 +00:00			`it("should split string with patterns", function()`
Removed a bunch of "successfully", replaced an ellipsis, and some deviant spellings of KOReader. 2016-04-16 10:21:49 +00:00			`local sentence = "Hello world, welcome to KOReader!"`
Refactor out string.gsplit to util.gsplit 2015-04-22 06:17:06 +00:00			`local words = {}`
			`for word in util.gsplit(sentence, "%s+", false) do`
			`table.insert(words, word)`
			`end`
Removed a bunch of "successfully", replaced an ellipsis, and some deviant spellings of KOReader. 2016-04-16 10:21:49 +00:00			`assert.are_same(words, {"Hello", "world,", "welcome", "to", "KOReader!"})`
Refactor out string.gsplit to util.gsplit 2015-04-22 06:17:06 +00:00			`end)`
kobo: fix screen probe for touch 2016-04-03 04:52:30 +00:00
Refactor out string.gsplit to util.gsplit 2015-04-22 06:17:06 +00:00			`it("should split command line arguments with quotation", function()`
			`local command = "./sdcv -nj \"words\" \"a lot\" 'more or less' --data-dir=dict"`
			`local argv = {}`
			`for arg1 in util.gsplit(command, "[\"'].-[\"']", true) do`
			`for arg2 in util.gsplit(arg1, "^[^\"'].-%s+", true) do`
			`for arg3 in util.gsplit(arg2, "[\"']", false) do`
			`local trimed = arg3:gsub("^%s(.-)%s$", "%1")`
			`if trimed ~= "" then`
			`table.insert(argv, trimed)`
			`end`
			`end`
			`end`
			`end`
			`assert.are_same(argv, {"./sdcv", "-nj", "words", "a lot", "more or less", "--data-dir=dict"})`
			`end)`
textboxwidget(fix): handle onHoldWord event 2016-06-05 07:33:31 +00:00
			`it("should split line into words", function()`
			`local words = util.splitToWords("one two,three four . five")`
			`assert.are_same(words, {`
			`"one",`
			`" ",`
			`"two",`
			`",",`
			`"three",`
			`" ",`
			`"four",`
			`" . ",`
			`"five",`
			`})`
			`end)`
split accient greek words with spacing character This should fix #1705. 2016-06-28 15:50:21 +00:00
			`it("should split ancient greek words", function()`
			`local words = util.splitToWords("Λαρισαῖος Λευκοθέα Λιγυαστάδης.")`
			`assert.are_same(words, {`
			`"Λαρισαῖος",`
			`" ",`
			`"Λευκοθέα",`
			`" ",`
			`"Λιγυαστάδης",`
			`"."`
			`})`
			`end)`

			`it("should split Chinese words", function()`
			`local words = util.splitToWords("彩虹是通过太阳光的折射引起的。")`
			`assert.are_same(words, {`
			`"彩","虹","是","通","过","太","阳","光","的","折","射","引","起","的","。",`
			`})`
			`end)`

			`it("should split words of multilingual text", function()`
			`local words = util.splitToWords("BBC纪录片")`
			`assert.are_same(words, {"BBC", "纪", "录", "片"})`
			`end)`
Fix hyphenation words with unicode character in texboxwidget (#2356) 2016-11-19 20:26:53 +00:00
			`it("should split text to line - unicode", function()`
			`local text = "Pójdźże, chmurność glück schließen Štěstí neštěstí. Uñas gavilán"`
			`local word = ""`
			`local table_of_words = {}`
			`local c`
			`local table_chars = util.splitToChars(text)`
			`for i = 1, #table_chars do`
			`c = table_chars[i]`
			`word = word .. c`
			`if util.isSplitable(c) then`
			`table.insert(table_of_words, word)`
			`word = ""`
			`end`
			`if i == #table_chars then table.insert(table_of_words, word) end`
			`end`
			`assert.are_same(table_of_words, {`
			`"Pójdźże,",`
			`" ",`
			`"chmurność ",`
			`"glück ",`
			`"schließen ",`
			`"Štěstí ",`
			`"neštěstí.",`
			`" ",`
			`"Uñas ",`
			`"gavilán",`
			`})`
			`end)`

Refactor out string.gsplit to util.gsplit 2015-04-22 06:17:06 +00:00			`end)`