From 8e73438cbed62ca5c16f6d3c6ce7220b17550c4b Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sat, 7 Aug 2021 18:48:01 +0200 Subject: [PATCH 1/3] [upd] upgrade httpx 0.19.0 adjust searx.network module to the new internal API see https://github.com/encode/httpx/pull/1522 --- requirements.txt | 4 +- searx/network/__init__.py | 2 +- searx/network/client.py | 90 ++++++++++++++++++++------------------- searx/network/network.py | 8 ++-- 4 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index 371883b3e..b38aedcb4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,10 +7,10 @@ lxml==4.6.3 pygments==2.10.0 python-dateutil==2.8.2 pyyaml==5.4.1 -httpx[http2]==0.17.1 +httpx[http2]==0.19.0 Brotli==1.0.9 uvloop==0.16.0; python_version >= '3.7' uvloop==0.14.0; python_version < '3.7' -httpx-socks[asyncio]==0.3.1 +httpx-socks[asyncio]==0.4.1 langdetect==1.0.9 setproctitle==1.2.2 diff --git a/searx/network/__init__.py b/searx/network/__init__.py index 21c4c27b5..2bc233f46 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -172,7 +172,7 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs): async for chunk in response.aiter_raw(65536): if len(chunk) > 0: queue.put(chunk) - except httpx.ResponseClosed: + except httpx.StreamClosed: # the response was queued before the exception. # the exception was raised on aiter_raw. # we do nothing here: in the finally block, None will be queued diff --git a/searx/network/client.py b/searx/network/client.py index e1abff05a..46edf9b6b 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -5,6 +5,7 @@ import asyncio import logging import threading + import httpcore import httpx from httpx_socks import AsyncProxyTransport @@ -26,19 +27,22 @@ else: uvloop.install() -logger = logger.getChild('searx.http.client') +logger = logger.getChild('searx.network.client') LOOP = None SSLCONTEXTS = {} TRANSPORT_KWARGS = { - 'backend': 'asyncio', + # use anyio : + # * https://github.com/encode/httpcore/issues/344 + # * https://github.com/encode/httpx/discussions/1511 + 'backend': 'anyio', 'trust_env': False, } # pylint: disable=protected-access async def close_connections_for_url( - connection_pool: httpcore.AsyncConnectionPool, - url: httpcore._utils.URL ): + connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL +): origin = httpcore._utils.url_to_origin(url) logger.debug('Drop connections for %r', origin) @@ -47,7 +51,7 @@ async def close_connections_for_url( await connection_pool._remove_from_pool(connection) try: await connection.aclose() - except httpcore.NetworkError as e: + except httpx.NetworkError as e: logger.warning('Error closing an existing connection', exc_info=e) # pylint: enable=protected-access @@ -60,80 +64,78 @@ def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http return SSLCONTEXTS[key] -class AsyncHTTPTransportNoHttp(httpcore.AsyncHTTPTransport): +class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport): """Block HTTP request""" - async def arequest(self, method, url, headers=None, stream=None, ext=None): - raise httpcore.UnsupportedProtocol("HTTP protocol is disabled") + async def handle_async_request( + self, method, url, headers=None, stream=None, extensions=None + ): + raise httpx.UnsupportedProtocol('HTTP protocol is disabled') class AsyncProxyTransportFixed(AsyncProxyTransport): """Fix httpx_socks.AsyncProxyTransport - Map python_socks exceptions to httpcore.ProxyError - - Map socket.gaierror to httpcore.ConnectError + Map python_socks exceptions to httpx.ProxyError / httpx.ConnectError - Note: keepalive_expiry is ignored, AsyncProxyTransport should call: - * self._keepalive_sweep() - * self._response_closed(self, connection) + Map socket.gaierror to httpx.ConnectError Note: AsyncProxyTransport inherit from AsyncConnectionPool - - Note: the API is going to change on httpx 0.18.0 - see https://github.com/encode/httpx/pull/1522 """ - async def arequest(self, method, url, headers=None, stream=None, ext=None): + async def handle_async_request( + self, method, url, headers=None, stream=None, extensions=None + ): retry = 2 while retry > 0: retry -= 1 try: - return await super().arequest(method, url, headers, stream, ext) + return await super().handle_async_request( + method, url, headers=headers, stream=stream, extensions=extensions + ) except (ProxyConnectionError, ProxyTimeoutError, ProxyError) as e: - raise httpcore.ProxyError(e) + raise httpx.ProxyError from e except OSError as e: # socket.gaierror when DNS resolution fails - raise httpcore.NetworkError(e) - except httpcore.RemoteProtocolError as e: - # in case of httpcore.RemoteProtocolError: Server disconnected - await close_connections_for_url(self, url) - logger.warning('httpcore.RemoteProtocolError: retry', exc_info=e) - # retry - except (httpcore.NetworkError, httpcore.ProtocolError) as e: - # httpcore.WriteError on HTTP/2 connection leaves a new opened stream + raise httpx.ConnectError from e + except httpx.NetworkError as e: + # httpx.WriteError on HTTP/2 connection leaves a new opened stream # then each new request creates a new stream and raise the same WriteError await close_connections_for_url(self, url) raise e + except httpx.RemoteProtocolError as e: + # in case of httpx.RemoteProtocolError: Server disconnected + await close_connections_for_url(self, url) + logger.warning('httpx.RemoteProtocolError: retry', exc_info=e) + # retry class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): """Fix httpx.AsyncHTTPTransport""" - async def arequest(self, method, url, headers=None, stream=None, ext=None): + async def handle_async_request( + self, method, url, headers=None, stream=None, extensions=None + ): retry = 2 while retry > 0: retry -= 1 try: - return await super().arequest(method, url, headers, stream, ext) + return await super().handle_async_request( + method, url, headers=headers, stream=stream, extensions=extensions + ) except OSError as e: # socket.gaierror when DNS resolution fails - raise httpcore.ConnectError(e) - except httpcore.CloseError as e: - # httpcore.CloseError: [Errno 104] Connection reset by peer - # raised by _keepalive_sweep() - # from https://github.com/encode/httpcore/blob/4b662b5c42378a61e54d673b4c949420102379f5/httpcore/_backends/asyncio.py#L198 # pylint: disable=line-too-long + raise httpx.ConnectError from e + except httpx.NetworkError as e: + # httpx.WriteError on HTTP/2 connection leaves a new opened stream + # then each new request creates a new stream and raise the same WriteError await close_connections_for_url(self._pool, url) - logger.warning('httpcore.CloseError: retry', exc_info=e) - # retry - except httpcore.RemoteProtocolError as e: - # in case of httpcore.RemoteProtocolError: Server disconnected + raise e + except httpx.RemoteProtocolError as e: + # in case of httpx.RemoteProtocolError: Server disconnected await close_connections_for_url(self._pool, url) - logger.warning('httpcore.RemoteProtocolError: retry', exc_info=e) + logger.warning('httpx.RemoteProtocolError: retry', exc_info=e) # retry - except (httpcore.ProtocolError, httpcore.NetworkError) as e: - await close_connections_for_url(self._pool, url) - raise e def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries): @@ -206,7 +208,7 @@ def new_client( if not enable_http and (pattern == 'http' or pattern.startswith('http://')): continue if (proxy_url.startswith('socks4://') - or proxy_url.startswith('socks5://') + or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://') ): mounts[pattern] = get_transport_for_socks_proxy( diff --git a/searx/network/network.py b/searx/network/network.py index 9954f0507..ed9cc6037 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -138,12 +138,10 @@ class Network: request = response.request status = f"{response.status_code} {response.reason_phrase}" response_line = f"{response.http_version} {status}" - if hasattr(response, "_elapsed"): - elapsed_time = f"{response.elapsed.total_seconds()} sec" - else: - elapsed_time = "stream" + content_type = response.headers.get("Content-Type") + content_type = f' ({content_type})' if content_type else '' self._logger.debug( - f'HTTP Request: {request.method} {request.url} "{response_line}" ({elapsed_time})' + f'HTTP Request: {request.method} {request.url} "{response_line}"{content_type}' ) def get_client(self, verify=None, max_redirects=None): From b10403d3a1c1da6a6c5bbb49bc891f26ff45f1f1 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 18 Aug 2021 08:39:13 +0200 Subject: [PATCH 2/3] [mod] searx.network: remove redundant code searx.client.new_client: the proxies parameter is a dictonnary, and the protocol (key of the dictionnary) is already normalized (see usage of searx.network.network.PROXY_PATTERN_MAPPING) --- searx/network/client.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/searx/network/client.py b/searx/network/client.py index 46edf9b6b..f388523b5 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -183,15 +183,6 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries): ) -def iter_proxies(proxies): - # https://www.python-httpx.org/compatibility/#proxy-keys - if isinstance(proxies, str): - yield 'all://', proxies - elif isinstance(proxies, dict): - for pattern, proxy_url in proxies.items(): - yield pattern, proxy_url - - def new_client( # pylint: disable=too-many-arguments enable_http, verify, enable_http2, @@ -204,11 +195,11 @@ def new_client( ) # See https://www.python-httpx.org/advanced/#routing mounts = {} - for pattern, proxy_url in iter_proxies(proxies): - if not enable_http and (pattern == 'http' or pattern.startswith('http://')): + for pattern, proxy_url in proxies.items(): + if not enable_http and pattern.startswith('http://'): continue if (proxy_url.startswith('socks4://') - or proxy_url.startswith('socks5://') + or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://') ): mounts[pattern] = get_transport_for_socks_proxy( From 41f6359d06fd6fb93ddfde59cba17da57565f587 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Thu, 16 Sep 2021 18:05:31 +0200 Subject: [PATCH 3/3] [fix] error recorder: avoid RuntimeError on some rare occasion httpx.RequestError (subclass of httpx.HTTPError) has a property request. This property raises a RuntimeError if the attributes _request is None. To avoid a cascade of errors, this commit reads directly the _request attribute. --- searx/metrics/error_recorder.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index 6963cda2f..37594e5e8 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -74,9 +74,11 @@ def get_request_exception_messages(exc: HTTPError)\ status_code = None reason = None hostname = None - if hasattr(exc, 'request') and exc.request is not None: + if hasattr(exc, '_request') and exc._request is not None: + # exc.request is property that raise an RuntimeException + # if exc._request is not defined. url = exc.request.url - if url is None and hasattr(exc, 'response') and exc.respones is not None: + if url is None and hasattr(exc, 'response') and exc.response is not None: url = exc.response.url if url is not None: hostname = url.host