From a15dfa5ee11228881f7a846f89196cbdb69021fb Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 19 Aug 2022 17:43:17 +0200 Subject: [PATCH 1/2] [fix] engine woxikon.de - don't raise exception on empty result list Woxikon expects a word in German, so with query "foo" the site finds nothing and respons a 404: httpx.HTTPStatusError: Client error '404 Not Found' \ for url 'https://synonyme.woxikon.de/synonyme/foo.php' [1] https://github.com/searxng/searxng/issues/1543#issuecomment-1193317054 Closes: https://github.com/searxng/searxng/issues/1543 Suggested-by: @allendema [1] Signed-off-by: Markus Heiser --- searx/engines/xpath.py | 5 ++++- searx/settings.yml | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 705a5211d..97656705a 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -60,6 +60,9 @@ lang_all = 'en' '''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is selected. ''' +raise_for_httperror = True +'''True by default: raise an exception if the HTTP code of response is ``>= +300``''' soft_max_redirects = 0 '''Maximum redirects, soft limit. Record an error but don't stop the engine''' @@ -176,7 +179,7 @@ def request(query, params): params['url'] = search_url.format(**fargs) params['soft_max_redirects'] = soft_max_redirects - + params['raise_for_httperror'] = raise_for_httperror return params diff --git a/searx/settings.yml b/searx/settings.yml index 416450d9b..f83c104fd 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1808,6 +1808,7 @@ engines: url_xpath: //div[@class="upper-synonyms"]/a/@href content_xpath: //div[@class="synonyms-list-group"] title_xpath: //div[@class="upper-synonyms"]/a + raise_for_httperror: false about: website: https://www.woxikon.de/ wikidata_id: # No Wikidata ID From dd0887be186d208846cdc7c3df13dde020dfa957 Mon Sep 17 00:00:00 2001 From: Alexandre FLAMENT Date: Fri, 2 Sep 2022 07:33:20 +0000 Subject: [PATCH 2/2] xpath engine: change raise_for_httperror to no_result_for_http_status no_result_for_http_status contains a list of HTTP status. These HTTP status are seen an empty result list. In other cases an exception is thrown as usual. Previously raise_for_httperror were ignoring all HTTP error, which make defective engines invisible in the stats. --- searx/engines/xpath.py | 23 ++++++++++++++++++----- searx/settings.yml | 2 +- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 97656705a..f9528e92d 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -22,6 +22,7 @@ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list +from searx.network import raise_for_httperror search_url = None """ @@ -60,9 +61,14 @@ lang_all = 'en' '''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is selected. ''' -raise_for_httperror = True -'''True by default: raise an exception if the HTTP code of response is ``>= -300``''' + +no_result_for_http_status = [] +'''Return empty result for these HTTP status codes instead of throwing an error. + +.. code:: yaml + + no_result_for_http_status: [] +''' soft_max_redirects = 0 '''Maximum redirects, soft limit. Record an error but don't stop the engine''' @@ -179,12 +185,19 @@ def request(query, params): params['url'] = search_url.format(**fargs) params['soft_max_redirects'] = soft_max_redirects - params['raise_for_httperror'] = raise_for_httperror + + params['raise_for_httperror'] = False + return params -def response(resp): +def response(resp): # pylint: disable=too-many-branches '''Scrap *results* from the response (see :ref:`engine results`).''' + if no_result_for_http_status and resp.status_code in no_result_for_http_status: + return [] + + raise_for_httperror(resp) + results = [] dom = html.fromstring(resp.text) is_onion = 'onions' in categories diff --git a/searx/settings.yml b/searx/settings.yml index f83c104fd..5587ed51a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1808,7 +1808,7 @@ engines: url_xpath: //div[@class="upper-synonyms"]/a/@href content_xpath: //div[@class="synonyms-list-group"] title_xpath: //div[@class="upper-synonyms"]/a - raise_for_httperror: false + no_result_for_http_status: [404] about: website: https://www.woxikon.de/ wikidata_id: # No Wikidata ID