From 7e201cbf65bdfe961fd5c9b97d92e822824a1dcb Mon Sep 17 00:00:00 2001 From: Dalf Date: Thu, 18 Jul 2019 21:32:17 +0200 Subject: [PATCH 1/2] [mod] use cache in _match_language function to speed up searx start time significantly --- searx/utils.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/searx/utils.py b/searx/utils.py index 6619dd0ae..b7e914557 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -47,6 +47,8 @@ blocked_tags = ('script', useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) + "/data/useragents.json", 'r', encoding='utf-8').read()) +lang_to_lc_cache = dict() + def searx_useragent(): return 'searx/{searx_version} {suffix}'.format( @@ -183,7 +185,7 @@ def get_resources_directory(searx_directory, subdirectory, resources_directory): if not resources_directory: resources_directory = os.path.join(searx_directory, subdirectory) if not os.path.isdir(resources_directory): - raise Exception(directory + " is not a directory") + raise Exception(resources_directory + " is not a directory") return resources_directory @@ -314,6 +316,17 @@ def is_valid_lang(lang): return False +def _get_lang_to_lc_dict(lang_list): + key = str(lang_list) + value = lang_to_lc_cache.get(key, None) + if value is None: + value = dict() + for lc in lang_list: + value.setdefault(lc.split('-')[0], lc) + lang_to_lc_cache[key] = value + return value + + # auxiliary function to match lang_code in lang_list def _match_language(lang_code, lang_list=[], custom_aliases={}): # replace language code with a custom alias if necessary @@ -334,11 +347,7 @@ def _match_language(lang_code, lang_list=[], custom_aliases={}): return new_code # try to get the any supported country for this language - for lc in lang_list: - if lang_code == lc.split('-')[0]: - return lc - - return None + return _get_lang_to_lc_dict(lang_list).get(lang_code, None) # get the language code from lang_list that best matches locale_code From da0ce5880fc636acb3356bfa93801a90ec06fa35 Mon Sep 17 00:00:00 2001 From: Dalf Date: Thu, 18 Jul 2019 21:34:07 +0200 Subject: [PATCH 2/2] [fix] fix soundcloud engine, speed up searx start time --- searx/engines/soundcloud.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index d59755e04..3ba9a7f39 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -28,8 +28,10 @@ categories = ['music'] paging = True # search-url -url = 'https://api.soundcloud.com/' +# missing attribute: user_id, app_version, app_locale +url = 'https://api-v2.soundcloud.com/' search_url = url + 'search?{query}'\ + '&variant_ids='\ '&facet=model'\ '&limit=20'\ '&offset={offset}'\ @@ -57,7 +59,7 @@ def get_client_id(): # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: - cids = cid_re.search(response.text) + cids = cid_re.search(response.content.decode("utf-8")) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")