From 43fcaa642a63d75096b33d44ce7f7c0de1bce614 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sat, 14 Aug 2021 19:36:30 +0200 Subject: [PATCH] [fix] image_proxy: always close the httpx respone previously, when the content type was not an image and some other error, the httpx response was not closed --- searx/network/__init__.py | 23 +++++++++++++++++- searx/webapp.py | 49 ++++++++++++++++++++++++--------------- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/searx/network/__init__.py b/searx/network/__init__.py index aaebb792..9e80a30a 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -5,6 +5,7 @@ import asyncio import threading import concurrent.futures +from types import MethodType from timeit import default_timer import httpx @@ -161,6 +162,7 @@ def patch(url, data=None, **kwargs): def delete(url, **kwargs): return request('delete', url, **kwargs) + async def stream_chunk_to_queue(network, queue, method, url, **kwargs): try: async with network.stream(method, url, **kwargs) as response: @@ -170,12 +172,22 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs): async for chunk in response.aiter_raw(65536): if len(chunk) > 0: queue.put(chunk) + except httpx.ResponseClosed as e: + # the response was closed + pass except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e: queue.put(e) finally: queue.put(None) +def _close_response_method(self): + asyncio.run_coroutine_threadsafe( + self.aclose(), + get_loop() + ) + + def stream(method, url, **kwargs): """Replace httpx.stream. @@ -193,10 +205,19 @@ def stream(method, url, **kwargs): stream_chunk_to_queue(get_network(), queue, method, url, **kwargs), get_loop() ) + + # yield response + response = queue.get() + if isinstance(response, Exception): + raise response + response.close = MethodType(_close_response_method, response) + yield response + + # yield chunks chunk_or_exception = queue.get() while chunk_or_exception is not None: if isinstance(chunk_or_exception, Exception): raise chunk_or_exception yield chunk_or_exception chunk_or_exception = queue.get() - return future.result() + future.result() diff --git a/searx/webapp.py b/searx/webapp.py index a1abea88..0b60969a 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -1065,7 +1065,7 @@ def _is_selected_language_supported(engine, preferences): # pylint: disable=red @app.route('/image_proxy', methods=['GET']) def image_proxy(): - # pylint: disable=too-many-return-statements + # pylint: disable=too-many-return-statements, too-many-branches url = request.args.get('url') if not url: @@ -1076,14 +1076,20 @@ def image_proxy(): return '', 400 maximum_size = 5 * 1024 * 1024 - + forward_resp = False + resp = None try: - headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() + request_headers = { + 'User-Agent': gen_useragent(), + 'Accept': 'image/webp,*/*', + 'Accept-Encoding': 'gzip, deflate', + 'Sec-GPC': '1', + 'DNT': '1', + } stream = http_stream( method = 'GET', url = url, - headers = headers, + headers = request_headers, timeout = settings['outgoing']['request_timeout'], allow_redirects = True, max_redirects = 20 @@ -1095,32 +1101,37 @@ def image_proxy(): and int(content_length) > maximum_size ): return 'Max size', 400 - if resp.status_code == 304: - return '', resp.status_code - if resp.status_code != 200: - logger.debug( - 'image-proxy: wrong response code: {0}'.format( - resp.status_code)) + logger.debug('image-proxy: wrong response code: %i', resp.status_code) if resp.status_code >= 400: return '', resp.status_code return '', 400 - if not resp.headers.get('content-type', '').startswith('image/'): - logger.debug( - 'image-proxy: wrong content-type: {0}'.format( - resp.headers.get('content-type'))) + if not resp.headers.get('Content-Type', '').startswith('image/'): + logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', '')) return '', 400 + forward_resp = True + except httpx.HTTPError: + logger.exception('HTTP error') + return '', 400 + finally: + if resp and not forward_resp: + # the code is about to return an HTTP 400 error to the browser + # we make sure to close the response between searxng and the HTTP server + try: + resp.close() + except httpx.HTTPError: + logger.exception('HTTP error on closing') + + try: headers = dict_subset( resp.headers, - {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'} + {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'} ) - total_length = 0 - def forward_chunk(): - nonlocal total_length + total_length = 0 for chunk in stream: total_length += len(chunk) if total_length > maximum_size: