From 7cdd31440e621937550072c3f73e68f644554842 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 12 Jan 2022 18:08:48 +0100 Subject: [PATCH] [fix] external bangs: don't overwrite Bangs in data trie Bangs with a `*` suffix (e.g. `!!d*`) overwrite Bangs with the same prefix (e.g. `!!d`) [1]. This can be avoid when a non printable character is used to tag a LEAF_KEY. [1] https://github.com/searxng/searxng/pull/740#issuecomment-1010411888 Signed-off-by: Markus Heiser --- searx/external_bang.py | 6 ++-- searxng_extra/update/update_external_bangs.py | 36 ++++++++++--------- tests/unit/test_external_bangs.py | 20 +++++++---- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/searx/external_bang.py b/searx/external_bang.py index ec5a46ed..a56737c7 100644 --- a/searx/external_bang.py +++ b/searx/external_bang.py @@ -2,6 +2,8 @@ from searx.data import EXTERNAL_BANGS +LEAF_KEY = chr(16) + def get_node(external_bangs_db, bang): node = external_bangs_db['trie'] @@ -26,8 +28,8 @@ def get_bang_definition_and_ac(external_bangs_db, bang): if k.startswith(after): bang_ac_list.append(before + k) elif isinstance(node, dict): - bang_definition = node.get('*') - bang_ac_list = [before + k for k in node.keys() if k != '*'] + bang_definition = node.get(LEAF_KEY) + bang_ac_list = [before + k for k in node.keys() if k != LEAF_KEY] elif isinstance(node, str): bang_definition = node bang_ac_list = [] diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py index be3aade0..7b826265 100755 --- a/searxng_extra/update/update_external_bangs.py +++ b/searxng_extra/update/update_external_bangs.py @@ -25,7 +25,7 @@ from os.path import join import httpx from searx import searx_dir # pylint: disable=E0401 C0413 - +from searx.external_bang import LEAF_KEY # from https://duckduckgo.com/newbang URL_BV1 = 'https://duckduckgo.com/bv1.js' @@ -51,18 +51,22 @@ def fetch_ddg_bangs(url): def merge_when_no_leaf(node): """Minimize the number of nodes - A -> B -> C - B is child of A - C is child of B + ``A -> B -> C`` + + - ``B`` is child of ``A`` + - ``C`` is child of ``B`` + + If there are no ``C`` equals to ````, then each ``C`` are merged + into ``A``. For example (5 nodes):: + + d -> d -> g -> (ddg) + -> i -> g -> (dig) + + becomes (3 noodes):: - If there are no C equals to '*', then each C are merged into A + d -> dg -> + -> ig -> - For example: - d -> d -> g -> * (ddg*) - -> i -> g -> * (dig*) - becomes - d -> dg -> * - -> ig -> * """ restart = False if not isinstance(node, dict): @@ -72,12 +76,12 @@ def merge_when_no_leaf(node): keys = list(node.keys()) for key in keys: - if key == '*': + if key == LEAF_KEY: continue value = node[key] value_keys = list(value.keys()) - if '*' not in value_keys: + if LEAF_KEY not in value_keys: for value_key in value_keys: node[key + value_key] = value[value_key] merge_when_no_leaf(node[key + value_key]) @@ -94,8 +98,8 @@ def optimize_leaf(parent, parent_key, node): if not isinstance(node, dict): return - if len(node) == 1 and '*' in node and parent is not None: - parent[parent_key] = node['*'] + if len(node) == 1 and LEAF_KEY in node and parent is not None: + parent[parent_key] = node[LEAF_KEY] else: for key, value in node.items(): optimize_leaf(node, key, value) @@ -138,7 +142,7 @@ def parse_ddg_bangs(ddg_bangs): t = bang_trie for bang_letter in bang: t = t.setdefault(bang_letter, {}) - t = t.setdefault('*', bang_def_output) + t = t.setdefault(LEAF_KEY, bang_def_output) # optimize the trie merge_when_no_leaf(bang_trie) diff --git a/tests/unit/test_external_bangs.py b/tests/unit/test_external_bangs.py index 698ce36c..794edf15 100644 --- a/tests/unit/test_external_bangs.py +++ b/tests/unit/test_external_bangs.py @@ -1,4 +1,10 @@ -from searx.external_bang import get_node, resolve_bang_definition, get_bang_url, get_bang_definition_and_autocomplete +from searx.external_bang import ( + get_node, + resolve_bang_definition, + get_bang_url, + get_bang_definition_and_autocomplete, + LEAF_KEY, +) from searx.search import SearchQuery, EngineRef from tests import SearxTestCase @@ -7,12 +13,12 @@ TEST_DB = { 'trie': { 'exam': { 'ple': '//example.com/' + chr(2) + chr(1) + '0', - '*': '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0', + LEAF_KEY: '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0', }, 'sea': { - '*': 'sea' + chr(2) + chr(1) + '0', + LEAF_KEY: 'sea' + chr(2) + chr(1) + '0', 'rch': { - '*': 'search' + chr(2) + chr(1) + '0', + LEAF_KEY: 'search' + chr(2) + chr(1) + '0', 'ing': 'searching' + chr(2) + chr(1) + '0', }, 's': { @@ -31,7 +37,7 @@ class TestGetNode(SearxTestCase): 'trie': { 'exam': { 'ple': 'test', - '*': 'not used', + LEAF_KEY: 'not used', } } } @@ -71,7 +77,7 @@ class TestResolveBangDefinition(SearxTestCase): class TestGetBangDefinitionAndAutocomplete(SearxTestCase): def test_found(self): bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB) - self.assertEqual(bang_definition, TEST_DB['trie']['exam']['*']) + self.assertEqual(bang_definition, TEST_DB['trie']['exam'][LEAF_KEY]) self.assertEqual(new_autocomplete, ['example']) def test_found_optimized(self): @@ -86,7 +92,7 @@ class TestGetBangDefinitionAndAutocomplete(SearxTestCase): def test_partial2(self): bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('sea', external_bangs_db=TEST_DB) - self.assertEqual(bang_definition, TEST_DB['trie']['sea']['*']) + self.assertEqual(bang_definition, TEST_DB['trie']['sea'][LEAF_KEY]) self.assertEqual(new_autocomplete, ['search', 'searching', 'seascapes', 'season']) def test_error(self):