2
0
mirror of https://github.com/koreader/koreader synced 2024-10-31 21:20:20 +00:00
koreader/spec/unit/util_spec.lua
2016-06-28 23:50:25 +08:00

80 lines
2.8 KiB
Lua

describe("util module", function()
local util
setup(function()
require("commonrequire")
util = require("util")
end)
it("should strip punctuations around word", function()
assert.is_equal(util.stripePunctuations("\"hello world\""), "hello world")
assert.is_equal(util.stripePunctuations("\"hello world?\""), "hello world")
assert.is_equal(util.stripePunctuations("\"hello, world?\""), "hello, world")
assert.is_equal(util.stripePunctuations("“你好“"), "你好")
assert.is_equal(util.stripePunctuations("“你好?“"), "你好")
end)
it("should split string with patterns", function()
local sentence = "Hello world, welcome to KOReader!"
local words = {}
for word in util.gsplit(sentence, "%s+", false) do
table.insert(words, word)
end
assert.are_same(words, {"Hello", "world,", "welcome", "to", "KOReader!"})
end)
it("should split command line arguments with quotation", function()
local command = "./sdcv -nj \"words\" \"a lot\" 'more or less' --data-dir=dict"
local argv = {}
for arg1 in util.gsplit(command, "[\"'].-[\"']", true) do
for arg2 in util.gsplit(arg1, "^[^\"'].-%s+", true) do
for arg3 in util.gsplit(arg2, "[\"']", false) do
local trimed = arg3:gsub("^%s*(.-)%s*$", "%1")
if trimed ~= "" then
table.insert(argv, trimed)
end
end
end
end
assert.are_same(argv, {"./sdcv", "-nj", "words", "a lot", "more or less", "--data-dir=dict"})
end)
it("should split line into words", function()
local words = util.splitToWords("one two,three four . five")
assert.are_same(words, {
"one",
" ",
"two",
",",
"three",
" ",
"four",
" . ",
"five",
})
end)
it("should split ancient greek words", function()
local words = util.splitToWords("Λαρισαῖος Λευκοθέα Λιγυαστάδης.")
assert.are_same(words, {
"Λαρισαῖος",
" ",
"Λευκοθέα",
" ",
"Λιγυαστάδης",
"."
})
end)
it("should split Chinese words", function()
local words = util.splitToWords("彩虹是通过太阳光的折射引起的。")
assert.are_same(words, {
"","","","","","","","","","","","","","","",
})
end)
it("should split words of multilingual text", function()
local words = util.splitToWords("BBC纪录片")
assert.are_same(words, {"BBC", "", "", ""})
end)
end)