[fix] image_proxy: always close the httpx respone

previously, when the content type was not an image and some other error,
the httpx response was not closed
This commit is contained in:
Alexandre Flament 2021-08-14 19:36:30 +02:00
parent df15c655f7
commit 43fcaa642a
2 changed files with 52 additions and 20 deletions

View File

@ -5,6 +5,7 @@
import asyncio import asyncio
import threading import threading
import concurrent.futures import concurrent.futures
from types import MethodType
from timeit import default_timer from timeit import default_timer
import httpx import httpx
@ -161,6 +162,7 @@ def patch(url, data=None, **kwargs):
def delete(url, **kwargs): def delete(url, **kwargs):
return request('delete', url, **kwargs) return request('delete', url, **kwargs)
async def stream_chunk_to_queue(network, queue, method, url, **kwargs): async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
try: try:
async with network.stream(method, url, **kwargs) as response: async with network.stream(method, url, **kwargs) as response:
@ -170,12 +172,22 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
async for chunk in response.aiter_raw(65536): async for chunk in response.aiter_raw(65536):
if len(chunk) > 0: if len(chunk) > 0:
queue.put(chunk) queue.put(chunk)
except httpx.ResponseClosed as e:
# the response was closed
pass
except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e: except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e:
queue.put(e) queue.put(e)
finally: finally:
queue.put(None) queue.put(None)
def _close_response_method(self):
asyncio.run_coroutine_threadsafe(
self.aclose(),
get_loop()
)
def stream(method, url, **kwargs): def stream(method, url, **kwargs):
"""Replace httpx.stream. """Replace httpx.stream.
@ -193,10 +205,19 @@ def stream(method, url, **kwargs):
stream_chunk_to_queue(get_network(), queue, method, url, **kwargs), stream_chunk_to_queue(get_network(), queue, method, url, **kwargs),
get_loop() get_loop()
) )
# yield response
response = queue.get()
if isinstance(response, Exception):
raise response
response.close = MethodType(_close_response_method, response)
yield response
# yield chunks
chunk_or_exception = queue.get() chunk_or_exception = queue.get()
while chunk_or_exception is not None: while chunk_or_exception is not None:
if isinstance(chunk_or_exception, Exception): if isinstance(chunk_or_exception, Exception):
raise chunk_or_exception raise chunk_or_exception
yield chunk_or_exception yield chunk_or_exception
chunk_or_exception = queue.get() chunk_or_exception = queue.get()
return future.result() future.result()

View File

@ -1065,7 +1065,7 @@ def _is_selected_language_supported(engine, preferences): # pylint: disable=red
@app.route('/image_proxy', methods=['GET']) @app.route('/image_proxy', methods=['GET'])
def image_proxy(): def image_proxy():
# pylint: disable=too-many-return-statements # pylint: disable=too-many-return-statements, too-many-branches
url = request.args.get('url') url = request.args.get('url')
if not url: if not url:
@ -1076,14 +1076,20 @@ def image_proxy():
return '', 400 return '', 400
maximum_size = 5 * 1024 * 1024 maximum_size = 5 * 1024 * 1024
forward_resp = False
resp = None
try: try:
headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) request_headers = {
headers['User-Agent'] = gen_useragent() 'User-Agent': gen_useragent(),
'Accept': 'image/webp,*/*',
'Accept-Encoding': 'gzip, deflate',
'Sec-GPC': '1',
'DNT': '1',
}
stream = http_stream( stream = http_stream(
method = 'GET', method = 'GET',
url = url, url = url,
headers = headers, headers = request_headers,
timeout = settings['outgoing']['request_timeout'], timeout = settings['outgoing']['request_timeout'],
allow_redirects = True, allow_redirects = True,
max_redirects = 20 max_redirects = 20
@ -1095,32 +1101,37 @@ def image_proxy():
and int(content_length) > maximum_size ): and int(content_length) > maximum_size ):
return 'Max size', 400 return 'Max size', 400
if resp.status_code == 304:
return '', resp.status_code
if resp.status_code != 200: if resp.status_code != 200:
logger.debug( logger.debug('image-proxy: wrong response code: %i', resp.status_code)
'image-proxy: wrong response code: {0}'.format(
resp.status_code))
if resp.status_code >= 400: if resp.status_code >= 400:
return '', resp.status_code return '', resp.status_code
return '', 400 return '', 400
if not resp.headers.get('content-type', '').startswith('image/'): if not resp.headers.get('Content-Type', '').startswith('image/'):
logger.debug( logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', ''))
'image-proxy: wrong content-type: {0}'.format(
resp.headers.get('content-type')))
return '', 400 return '', 400
forward_resp = True
except httpx.HTTPError:
logger.exception('HTTP error')
return '', 400
finally:
if resp and not forward_resp:
# the code is about to return an HTTP 400 error to the browser
# we make sure to close the response between searxng and the HTTP server
try:
resp.close()
except httpx.HTTPError:
logger.exception('HTTP error on closing')
try:
headers = dict_subset( headers = dict_subset(
resp.headers, resp.headers,
{'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'} {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}
) )
total_length = 0
def forward_chunk(): def forward_chunk():
nonlocal total_length total_length = 0
for chunk in stream: for chunk in stream:
total_length += len(chunk) total_length += len(chunk)
if total_length > maximum_size: if total_length > maximum_size: