From 3461f8af31c7b9143184498dd0799468b6f170f8 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 22 Nov 2021 05:13:29 +1100 Subject: [PATCH] Dictionary: disable fuzzy search when CJK words (#8458) Fuzzy searching doesn't work with CJK text: with Japanese, we get large numbers of useless results because sdcv decides to strip off the wrong part of the word. It seems unlikely that sdcv correctly handles Korean or Chinese, so just disable fuzzy searching on all CJK-containing word lookups. --- .../apps/reader/modules/readerdictionary.lua | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/frontend/apps/reader/modules/readerdictionary.lua b/frontend/apps/reader/modules/readerdictionary.lua index 43bc645d0..1e7084260 100644 --- a/frontend/apps/reader/modules/readerdictionary.lua +++ b/frontend/apps/reader/modules/readerdictionary.lua @@ -712,7 +712,6 @@ function ReaderDictionary:onShowDictionaryLookup() end function ReaderDictionary:rawSdcv(words, dict_names, fuzzy_search, lookup_progress_msg) - local all_results = {} -- Allow for two sdcv calls : one in the classic data/dict, and -- another one in data/dict_ext if it exists -- We could put in data/dict_ext dictionaries with a great number of words @@ -728,6 +727,7 @@ function ReaderDictionary:rawSdcv(words, dict_names, fuzzy_search, lookup_progre if dictDirsEmpty(dict_dirs) then return false, nil end + local all_results = {} local lookup_cancelled = false for _, dict_dir in ipairs(dict_dirs) do if lookup_cancelled then @@ -811,6 +811,22 @@ function ReaderDictionary:startSdcv(word, dict_names, fuzzy_search) end end + -- If every word contains a CJK character, every word candidate is + -- (probably) a CJK word. We don't want fuzzy searching in this case + -- because sdcv cannot handle CJK text properly when fuzzy searching (with + -- Japanese, it returns hundreds of useless results). + local shouldnt_fuzzy_search = true + for _, word in ipairs(words) do + if not util.hasCJKChar(word) then + shouldnt_fuzzy_search = false + break + end + end + if shouldnt_fuzzy_search then + logger.dbg("disabling fuzzy searching for all-CJK word search:", words) + fuzzy_search = false + end + local lookup_cancelled, results = self:rawSdcv(words, dict_names, fuzzy_search, self.lookup_progress_msg or false) if results == nil then -- no dictionaries found return {