Dictionary: disable fuzzy search when CJK words (#8458)

Fuzzy searching doesn't work with CJK text: with Japanese,
we get large numbers of useless results because sdcv
decides to strip off the wrong part of the word.
It seems unlikely that sdcv correctly handles Korean
or Chinese, so just disable fuzzy searching on all
CJK-containing word lookups.
pull/8462/head
Aleksa Sarai 2 years ago committed by GitHub
parent 8ef426d79f
commit 3461f8af31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -712,7 +712,6 @@ function ReaderDictionary:onShowDictionaryLookup()
end
function ReaderDictionary:rawSdcv(words, dict_names, fuzzy_search, lookup_progress_msg)
local all_results = {}
-- Allow for two sdcv calls : one in the classic data/dict, and
-- another one in data/dict_ext if it exists
-- We could put in data/dict_ext dictionaries with a great number of words
@ -728,6 +727,7 @@ function ReaderDictionary:rawSdcv(words, dict_names, fuzzy_search, lookup_progre
if dictDirsEmpty(dict_dirs) then
return false, nil
end
local all_results = {}
local lookup_cancelled = false
for _, dict_dir in ipairs(dict_dirs) do
if lookup_cancelled then
@ -811,6 +811,22 @@ function ReaderDictionary:startSdcv(word, dict_names, fuzzy_search)
end
end
-- If every word contains a CJK character, every word candidate is
-- (probably) a CJK word. We don't want fuzzy searching in this case
-- because sdcv cannot handle CJK text properly when fuzzy searching (with
-- Japanese, it returns hundreds of useless results).
local shouldnt_fuzzy_search = true
for _, word in ipairs(words) do
if not util.hasCJKChar(word) then
shouldnt_fuzzy_search = false
break
end
end
if shouldnt_fuzzy_search then
logger.dbg("disabling fuzzy searching for all-CJK word search:", words)
fuzzy_search = false
end
local lookup_cancelled, results = self:rawSdcv(words, dict_names, fuzzy_search, self.lookup_progress_msg or false)
if results == nil then -- no dictionaries found
return {

Loading…
Cancel
Save