From b1557b544368b416c158c13f12946859abbe00e0 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 26 Apr 2021 11:12:02 +0200 Subject: [PATCH 1/2] [mod] processors: show identical error messages on /search and /stats --- searx/search/processors/abstract.py | 26 ++++++---- searx/search/processors/offline.py | 2 +- searx/search/processors/online.py | 12 ++--- searx/webapp.py | 76 +++++++++++++++++------------ 4 files changed, 67 insertions(+), 49 deletions(-) diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 854f6df6a..2a36222d4 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -59,22 +59,28 @@ class EngineProcessor(ABC): key = id(key) if key else self.engine_name self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) - def handle_exception(self, result_container, reason, exception, suspend=False, display_exception=True): + def handle_exception(self, result_container, exception_or_message, suspend=False): # update result_container - error_message = str(exception) if display_exception and exception else None - result_container.add_unresponsive_engine(self.engine_name, reason, error_message) + if isinstance(exception_or_message, BaseException): + exception_class = exception_or_message.__class__ + module_name = getattr(exception_class, '__module__', 'builtins') + module_name = '' if module_name == 'builtins' else module_name + '.' + error_message = module_name + exception_class.__qualname__ + else: + error_message = exception_or_message + result_container.add_unresponsive_engine(self.engine_name, error_message) # metrics counter_inc('engine', self.engine_name, 'search', 'count', 'error') - if exception: - count_exception(self.engine_name, exception) + if isinstance(exception_or_message, BaseException): + count_exception(self.engine_name, exception_or_message) else: - count_error(self.engine_name, reason) + count_error(self.engine_name, exception_or_message) # suspend the engine ? if suspend: suspended_time = None - if isinstance(exception, SearxEngineAccessDeniedException): - suspended_time = exception.suspended_time - self.suspended_status.suspend(suspended_time, reason) # pylint: disable=no-member + if isinstance(exception_or_message, SearxEngineAccessDeniedException): + suspended_time = exception_or_message.suspended_time + self.suspended_status.suspend(suspended_time, error_message) # pylint: disable=no-member def _extend_container_basic(self, result_container, start_time, search_results): # update result_container @@ -91,7 +97,7 @@ class EngineProcessor(ABC): def extend_container(self, result_container, start_time, search_results): if getattr(threading.current_thread(), '_timeout', False): # the main thread is not waiting anymore - self.handle_exception(result_container, 'Timeout', None) + self.handle_exception(result_container, 'timeout', None) else: # check if the engine accepted the request if search_results is not None: diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index 5186b346a..ad03fed4b 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -22,5 +22,5 @@ class OfflineProcessor(EngineProcessor): # do not record the error logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) except Exception as e: - self.handle_exception(result_container, 'unexpected crash', e) + self.handle_exception(result_container, e) logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index c39937023..57422c007 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -130,7 +130,7 @@ class OnlineProcessor(EngineProcessor): self.extend_container(result_container, start_time, search_results) except (httpx.TimeoutException, asyncio.TimeoutError) as e: # requests timeout (connect or read) - self.handle_exception(result_container, 'HTTP timeout', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.error("engine {0} : HTTP requests timeout" "(search duration : {1} s, timeout: {2} s) : {3}" .format(self.engine_name, time() - start_time, @@ -138,23 +138,23 @@ class OnlineProcessor(EngineProcessor): e.__class__.__name__)) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception - self.handle_exception(result_container, 'HTTP error', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.exception("engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : {3}" .format(self.engine_name, time() - start_time, timeout_limit, e)) except SearxEngineCaptchaException as e: - self.handle_exception(result_container, 'CAPTCHA required', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : CAPTCHA'.format(self.engine_name)) except SearxEngineTooManyRequestsException as e: - self.handle_exception(result_container, 'too many requests', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : Too many requests'.format(self.engine_name)) except SearxEngineAccessDeniedException as e: - self.handle_exception(result_container, 'blocked', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : Searx is blocked'.format(self.engine_name)) except Exception as e: - self.handle_exception(result_container, 'unexpected crash', e, display_exception=False) + self.handle_exception(result_container, e) logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) def get_default_tests(self): diff --git a/searx/webapp.py b/searx/webapp.py index 70d2d662b..b8bc60ec5 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -172,28 +172,34 @@ _category_names = (gettext('files'), gettext('science')) # -exception_classname_to_label = { - "searx.exceptions.SearxEngineCaptchaException": gettext("CAPTCHA"), - "searx.exceptions.SearxEngineTooManyRequestsException": gettext("too many requests"), - "searx.exceptions.SearxEngineAccessDeniedException": gettext("access denied"), - "searx.exceptions.SearxEngineAPIException": gettext("server API error"), - "httpx.TimeoutException": gettext("HTTP timeout"), - "httpx.ConnectTimeout": gettext("HTTP timeout"), - "httpx.ReadTimeout": gettext("HTTP timeout"), - "httpx.WriteTimeout": gettext("HTTP timeout"), - "httpx.HTTPStatusError": gettext("HTTP error"), - "httpx.ConnectError": gettext("HTTP connection error"), - "httpx.RemoteProtocolError": gettext("HTTP protocol error"), - "httpx.LocalProtocolError": gettext("HTTP protocol error"), - "httpx.ProtocolError": gettext("HTTP protocol error"), - "httpx.ReadError": gettext("network error"), - "httpx.WriteError": gettext("network error"), - "httpx.ProxyError": gettext("proxy error"), - "searx.exceptions.SearxEngineXPathException": gettext("parsing error"), - "KeyError": gettext("parsing error"), - "json.decoder.JSONDecodeError": gettext("parsing error"), - "lxml.etree.ParserError": gettext("parsing error"), - None: gettext("unexpected crash"), +timeout_text = gettext('timeout') +parsing_error_text = gettext('parsing error') +http_protocol_error_text = gettext('HTTP protocol error') +network_error_text = gettext('network error') +exception_classname_to_text = { + None: gettext('unexpected crash'), + 'timeout': timeout_text, + 'asyncio.TimeoutError': timeout_text, + 'httpx.TimeoutException': timeout_text, + 'httpx.ConnectTimeout': timeout_text, + 'httpx.ReadTimeout': timeout_text, + 'httpx.WriteTimeout': timeout_text, + 'httpx.HTTPStatusError': gettext('HTTP error'), + 'httpx.ConnectError': gettext("HTTP connection error"), + 'httpx.RemoteProtocolError': http_protocol_error_text, + 'httpx.LocalProtocolError': http_protocol_error_text, + 'httpx.ProtocolError': http_protocol_error_text, + 'httpx.ReadError': network_error_text, + 'httpx.WriteError': network_error_text, + 'httpx.ProxyError': gettext("proxy error"), + 'searx.exceptions.SearxEngineCaptchaException': gettext("CAPTCHA"), + 'searx.exceptions.SearxEngineTooManyRequestsException': gettext("too many requests"), + 'searx.exceptions.SearxEngineAccessDeniedException': gettext("access denied"), + 'searx.exceptions.SearxEngineAPIException': gettext("server API error"), + 'searx.exceptions.SearxEngineXPathException': parsing_error_text, + 'KeyError': parsing_error_text, + 'json.decoder.JSONDecodeError': parsing_error_text, + 'lxml.etree.ParserError': parsing_error_text, } _flask_babel_get_translations = flask_babel.get_translations @@ -786,15 +792,21 @@ def search(): def __get_translated_errors(unresponsive_engines): - translated_errors = set() - for unresponsive_engine in unresponsive_engines: - error_msg = gettext(unresponsive_engine[1]) + translated_errors = [] + # make a copy unresponsive_engines to avoid "RuntimeError: Set changed size during iteration" + # it happens when an engine modifies the ResultContainer after the search_multiple_requests method + # has stopped waiting + for unresponsive_engine in list(unresponsive_engines): + error_user_text = exception_classname_to_text.get(unresponsive_engine[1]) + if not error_user_text: + error_user_text = exception_classname_to_text[None] + error_msg = gettext(error_user_text) if unresponsive_engine[2]: error_msg = "{} {}".format(error_msg, unresponsive_engine[2]) if unresponsive_engine[3]: error_msg = gettext('Suspended') + ': ' + error_msg - translated_errors.add((unresponsive_engine[0], error_msg)) - return translated_errors + translated_errors.append((unresponsive_engine[0], error_msg)) + return sorted(translated_errors, key=lambda e: e[0]) @app.route('/about', methods=['GET']) @@ -944,14 +956,14 @@ def preferences(): # the first element has the highest percentage rate. reliabilities_errors = [] for error in errors: - error_user_message = None + error_user_text = None if error.get('secondary') or 'exception_classname' not in error: continue - error_user_message = exception_classname_to_label.get(error.get('exception_classname')) + error_user_text = exception_classname_to_text.get(error.get('exception_classname')) if not error: - error_user_message = exception_classname_to_label[None] - if error_user_message not in reliabilities_errors: - reliabilities_errors.append(error_user_message) + error_user_text = exception_classname_to_text[None] + if error_user_text not in reliabilities_errors: + reliabilities_errors.append(error_user_text) reliabilities[e.name]['errors'] = reliabilities_errors # supports From 924f9afea37b6c545a03505a7ec291cf44654ca7 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 27 Apr 2021 15:13:39 +0200 Subject: [PATCH 2/2] [lint] pylint searx/search/processors files / BTW add some doc-strings Signed-off-by: Markus Heiser --- searx/search/processors/__init__.py | 32 ++++++++++++++------ searx/search/processors/abstract.py | 15 ++++++--- searx/search/processors/offline.py | 12 +++++--- searx/search/processors/online.py | 22 +++++++++----- searx/search/processors/online_currency.py | 15 +++++---- searx/search/processors/online_dictionary.py | 7 +++-- 6 files changed, 68 insertions(+), 35 deletions(-) diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index 4cae3cd0f..caac74e65 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -1,37 +1,49 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint + +"""Implement request processores used by engine-types. + +""" + +__all__ = [ + 'EngineProcessor', + 'OfflineProcessor', + 'OnlineProcessor', + 'OnlineDictionaryProcessor', + 'OnlineCurrencyProcessor', + 'processors', +] + +from searx import logger +import searx.engines as engines from .online import OnlineProcessor from .offline import OfflineProcessor from .online_dictionary import OnlineDictionaryProcessor from .online_currency import OnlineCurrencyProcessor from .abstract import EngineProcessor -from searx import logger -import searx.engines as engines - -__all__ = ['EngineProcessor', 'OfflineProcessor', 'OnlineProcessor', - 'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', 'processors'] logger = logger.getChild('search.processors') processors = {} - +"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" def get_processor_class(engine_type): + """Return processor class according to the ``engine_type``""" for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]: if c.engine_type == engine_type: return c return None - def get_processor(engine, engine_name): + """Return processor instance that fits to ``engine.engine.type``)""" engine_type = getattr(engine, 'engine_type', 'online') processor_class = get_processor_class(engine_type) if processor_class: return processor_class(engine, engine_name) - else: - return None - + return None def initialize(engine_list): + """Initialize all engines and store a processor for each engine in :py:obj:`processors`.""" engines.initialize_engines(engine_list) for engine_name, engine in engines.engines.items(): processor = get_processor(engine, engine_name) diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 2a36222d4..38811d87c 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -1,4 +1,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint + +"""Abstract base classes for engine request processores. + +""" import threading from abc import abstractmethod, ABC @@ -10,12 +15,13 @@ from searx.network import get_time_for_thread, get_network from searx.metrics import histogram_observe, counter_inc, count_exception, count_error from searx.exceptions import SearxEngineAccessDeniedException - logger = logger.getChild('searx.search.processor') SUSPENDED_STATUS = {} +# pylint: disable=missing-function-docstring class SuspendedStatus: + """Class to handle suspend state.""" __slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock' @@ -49,6 +55,7 @@ class SuspendedStatus: class EngineProcessor(ABC): + """Base classes used for all types of reqest processores.""" __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status' @@ -143,9 +150,7 @@ class EngineProcessor(ABC): if tests is None: tests = getattr(self.engine, 'additional_tests', {}) tests.update(self.get_default_tests()) - return tests - else: - return tests + return tests - def get_default_tests(self): + def get_default_tests(self): # pylint: disable=no-self-use return {} diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index ad03fed4b..f40626f39 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -1,13 +1,17 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint + +"""Processores for engine-type: ``offline`` + +""" from searx import logger -from searx.search.processors.abstract import EngineProcessor - +from .abstract import EngineProcessor logger = logger.getChild('searx.search.processor.offline') - class OfflineProcessor(EngineProcessor): + """Processor class used by ``offline`` engines""" engine_type = 'offline' @@ -21,6 +25,6 @@ class OfflineProcessor(EngineProcessor): except ValueError as e: # do not record the error logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) - except Exception as e: + except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 57422c007..93a9c6cbf 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -1,24 +1,29 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint + +"""Processores for engine-type: ``online`` + +""" from time import time import asyncio - import httpx import searx.network from searx import logger from searx.utils import gen_useragent -from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException, - SearxEngineTooManyRequestsException,) +from searx.exceptions import ( + SearxEngineAccessDeniedException, + SearxEngineCaptchaException, + SearxEngineTooManyRequestsException, +) from searx.metrics.error_recorder import count_error - -from searx.search.processors.abstract import EngineProcessor - +from .abstract import EngineProcessor logger = logger.getChild('searx.search.processor.online') - def default_request_params(): + """Default request parameters for ``online`` engines.""" return { 'method': 'GET', 'headers': {}, @@ -31,6 +36,7 @@ def default_request_params(): class OnlineProcessor(EngineProcessor): + """Processor class for ``online`` engines.""" engine_type = 'online' @@ -153,7 +159,7 @@ class OnlineProcessor(EngineProcessor): except SearxEngineAccessDeniedException as e: self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : Searx is blocked'.format(self.engine_name)) - except Exception as e: + except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index 0dc3f3b6a..4f642fa72 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -1,4 +1,8 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Processores for engine-type: ``online_currency`` + +""" import unicodedata import re @@ -6,32 +10,31 @@ import re from searx.data import CURRENCIES from .online import OnlineProcessor - parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) +# pylint: disable=missing-function-docstring def normalize_name(name): name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() - def name_to_iso4217(name): - global CURRENCIES + global CURRENCIES # pylint: disable=global-statement name = normalize_name(name) currency = CURRENCIES['names'].get(name, [name]) if isinstance(currency, str): return currency return currency[0] - def iso4217_to_name(iso4217, language): - global CURRENCIES + global CURRENCIES # pylint: disable=global-statement return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217) - class OnlineCurrencyProcessor(OnlineProcessor): + """Processor class used by ``online_currency`` engines.""" + engine_type = 'online_currency' def get_params(self, search_query, engine_category): diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py index 987c710a1..11ca0335d 100644 --- a/searx/search/processors/online_dictionary.py +++ b/searx/search/processors/online_dictionary.py @@ -1,15 +1,18 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Processores for engine-type: ``online_dictionary`` + +""" import re from searx.utils import is_valid_lang from .online import OnlineProcessor - parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) - class OnlineDictionaryProcessor(OnlineProcessor): + """Processor class used by ``online_dictionnary`` engines.""" engine_type = 'online_dictionnary'