From 32e8c2cf098ae59baae5672e70436e47299bec82 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 19 Jul 2022 23:40:11 +0200 Subject: [PATCH] searx.network: add "verify" option to the networks Each network can define a verify option: * false to disable certificate verification * a path to existing certificate. SearXNG uses SSL_CERT_FILE and SSL_CERT_DIR when they are defined see https://www.python-httpx.org/environment_variables/#ssl_cert_file --- docs/admin/engines/settings.rst | 44 ++++++++++++++++++++++--------- searx/network/client.py | 9 ++----- searx/network/network.py | 2 +- searx/search/processors/online.py | 18 ++++++++++--- searx/settings.yml | 5 ++++ searx/settings_defaults.py | 1 + searx/webapp.py | 3 +++ 7 files changed, 58 insertions(+), 24 deletions(-) diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst index cac9d286..086b3ccb 100644 --- a/docs/admin/engines/settings.rst +++ b/docs/admin/engines/settings.rst @@ -347,18 +347,27 @@ Communication with search engines. pool_maxsize: 10 # Number of allowable keep-alive connections, or null # to always allow. The default is 10. enable_http2: true # See https://www.python-httpx.org/http2/ - # uncomment below section if you want to use a proxy - # proxies: - # all://: - # - http://proxy1:8080 - # - http://proxy2:8080 - # uncomment below section only if you have more than one network interface - # which can be the source of outgoing search requests - # source_ips: - # - 1.1.1.1 - # - 1.1.1.2 - # - fe80::/126 - + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # + # uncomment below section if you want to use a proxyq see: SOCKS proxies + # https://2.python-requests.org/en/latest/user/advanced/#proxies + # are also supported: see + # https://2.python-requests.org/en/latest/user/advanced/#socks + # + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # + # using_tor_proxy: true + # + # Extra seconds to add in order to account for the time taken by the proxy + # + # extra_proxy_timeout: 10.0 + # ``request_timeout`` : Global timeout of the requests made to others engines in seconds. A bigger @@ -408,6 +417,17 @@ Communication with search engines. ``enable_http2`` : Enable by default. Set to ``false`` to disable HTTP/2. +.. _httpx verification defaults: https://www.python-httpx.org/advanced/#changing-the-verification-defaults +.. _httpx ssl configuration: https://www.python-httpx.org/compatibility/#ssl-configuration + +``verify``: : ``$SSL_CERT_FILE``, ``$SSL_CERT_DIR`` + Allow to specify a path to certificate. + see `httpx verification defaults`_. + + In addition to ``verify``, SearXNG supports the ``$SSL_CERT_FILE`` (for a file) and + ``$SSL_CERT_DIR`` (for a directory) OpenSSL variables. + see `httpx ssl configuration`_. + ``max_redirects`` : 30 by default. Maximum redirect before it is an error. diff --git a/searx/network/client.py b/searx/network/client.py index 11086dd3..f25aaf9a 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -26,9 +26,6 @@ else: logger = logger.getChild('searx.network.client') LOOP = None SSLCONTEXTS: Dict[Any, SSLContext] = {} -TRANSPORT_KWARGS = { - 'trust_env': False, -} def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False): @@ -74,7 +71,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit rdns = True proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url) - verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify + verify = get_sslcontexts(proxy_url, None, verify, True, http2) if verify is True else verify return AsyncProxyTransportFixed( proxy_type=proxy_type, proxy_host=proxy_host, @@ -88,12 +85,11 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit local_address=local_address, limits=limit, retries=retries, - **TRANSPORT_KWARGS, ) def get_transport(verify, http2, local_address, proxy_url, limit, retries): - verify = get_sslcontexts(None, None, True, False, http2) if verify is True else verify + verify = get_sslcontexts(None, None, verify, True, http2) if verify is True else verify return httpx.AsyncHTTPTransport( # pylint: disable=protected-access verify=verify, @@ -102,7 +98,6 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries): proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, local_address=local_address, retries=retries, - **TRANSPORT_KWARGS, ) diff --git a/searx/network/network.py b/searx/network/network.py index 677a908b..87c077f2 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -334,7 +334,7 @@ def initialize(settings_engines=None, settings_outgoing=None): # see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # pylint: disable=line-too-long default_params = { 'enable_http': False, - 'verify': True, + 'verify': settings_outgoing['verify'], 'enable_http2': settings_outgoing['enable_http2'], 'max_connections': settings_outgoing['pool_connections'], 'max_keepalive_connections': settings_outgoing['pool_maxsize'], diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index be1ca57f..dd5d1e36 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -7,6 +7,7 @@ from timeit import default_timer import asyncio +import ssl import httpx import searx.network @@ -29,7 +30,6 @@ def default_request_params(): 'data': {}, 'url': '', 'cookies': {}, - 'verify': True, 'auth': None # fmt: on } @@ -76,9 +76,15 @@ class OnlineProcessor(EngineProcessor): def _send_http_request(self, params): # create dictionary which contain all # information about the request - request_args = dict( - headers=params['headers'], cookies=params['cookies'], verify=params['verify'], auth=params['auth'] - ) + request_args = dict(headers=params['headers'], cookies=params['cookies'], auth=params['auth']) + + # verify + # if not None, it overrides the verify value defined in the network. + # use False to accept any server certificate + # use a path to file to specify a server certificate + verify = params.get('verify') + if verify is not None: + request_args['verify'] = params['verify'] # max_redirects max_redirects = params.get('max_redirects') @@ -153,6 +159,10 @@ class OnlineProcessor(EngineProcessor): # send requests and parse the results search_results = self._search_basic(query, params) self.extend_container(result_container, start_time, search_results) + except ssl.SSLError as e: + # requests timeout (connect or read) + self.handle_exception(result_container, e, suspend=True) + self.logger.error("SSLError {}, verify={}".format(e, searx.network.get_network(self.engine_name).verify)) except (httpx.TimeoutException, asyncio.TimeoutError) as e: # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) diff --git a/searx/settings.yml b/searx/settings.yml index f21d7f05..2304c6fe 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -145,6 +145,11 @@ outgoing: pool_maxsize: 20 # See https://www.python-httpx.org/http2/ enable_http2: true + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # # uncomment below section if you want to use a proxyq see: SOCKS proxies # https://2.python-requests.org/en/latest/user/advanced/#proxies # are also supported: see diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index 330878c1..6575b0b0 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -199,6 +199,7 @@ SCHEMA = { 'useragent_suffix': SettingsValue(str, ''), 'request_timeout': SettingsValue(numbers.Real, 3.0), 'enable_http2': SettingsValue(bool, True), + 'verify': SettingsValue((bool, str), True), 'max_request_timeout': SettingsValue((None, numbers.Real), None), # Magic number kept from previous code 'pool_connections': SettingsValue(int, 100), diff --git a/searx/webapp.py b/searx/webapp.py index 688698bb..5c3fbae8 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -165,6 +165,7 @@ timeout_text = gettext('timeout') parsing_error_text = gettext('parsing error') http_protocol_error_text = gettext('HTTP protocol error') network_error_text = gettext('network error') +ssl_cert_error_text = gettext("SSL error: certificate validation has failed") exception_classname_to_text = { None: gettext('unexpected crash'), 'timeout': timeout_text, @@ -189,6 +190,8 @@ exception_classname_to_text = { 'KeyError': parsing_error_text, 'json.decoder.JSONDecodeError': parsing_error_text, 'lxml.etree.ParserError': parsing_error_text, + 'ssl.SSLCertVerificationError': ssl_cert_error_text, # for Python > 3.7 + 'ssl.CertificateError': ssl_cert_error_text, # for Python 3.7 }