Merge pull request #798 from dalf/searchpy4

[mod] add a search_one_request_safe function wrapper
pull/1/head
Adam Tauber 8 years ago committed by GitHub
commit eaa0fb8102

@ -20,6 +20,7 @@ import threading
from thread import start_new_thread from thread import start_new_thread
from time import time from time import time
from uuid import uuid4 from uuid import uuid4
import requests.exceptions
import searx.poolrequests as requests_lib import searx.poolrequests as requests_lib
from searx.engines import ( from searx.engines import (
categories, engines categories, engines
@ -37,109 +38,117 @@ number_of_searches = 0
def send_http_request(engine, request_params, timeout_limit): def send_http_request(engine, request_params, timeout_limit):
response = None # for page_load_time stats
try: time_before_request = time()
# create dictionary which contain all
# informations about the request # create dictionary which contain all
request_args = dict( # informations about the request
headers=request_params['headers'], request_args = dict(
cookies=request_params['cookies'], headers=request_params['headers'],
timeout=timeout_limit, cookies=request_params['cookies'],
verify=request_params['verify'] timeout=timeout_limit,
) verify=request_params['verify']
# specific type of request (GET or POST) )
if request_params['method'] == 'GET':
req = requests_lib.get # specific type of request (GET or POST)
else: if request_params['method'] == 'GET':
req = requests_lib.post req = requests_lib.get
request_args['data'] = request_params['data'] else:
req = requests_lib.post
# for page_load_time stats request_args['data'] = request_params['data']
time_before_request = time()
# send the request
response = req(request_params['url'], **request_args)
with threading.RLock():
# no error : reset the suspend variables
engine.continuous_errors = 0
engine.suspend_end_time = 0
# update stats with current page-load-time
# only the HTTP request
engine.stats['page_load_time'] += time() - time_before_request
engine.stats['page_load_count'] += 1
# is there a timeout (no parsing in this case)
timeout_overhead = 0.2 # seconds
search_duration = time() - request_params['started']
if search_duration > timeout_limit + timeout_overhead:
logger.exception('engine timeout on HTTP request:'
'{0} (search duration : {1} ms, time-out: {2} )'
.format(engine.name, search_duration, timeout_limit))
with threading.RLock():
engine.stats['errors'] += 1
return False
# everything is ok : return the response # send the request
return response response = req(request_params['url'], **request_args)
except: # is there a timeout (no parsing in this case)
# increase errors stats timeout_overhead = 0.2 # seconds
with threading.RLock(): search_duration = time() - request_params['started']
engine.stats['errors'] += 1 if search_duration > timeout_limit + timeout_overhead:
engine.continuous_errors += 1 raise Timeout(response=response)
engine.suspend_end_time = time() + min(60, engine.continuous_errors)
# print engine name and specific error message with threading.RLock():
logger.exception('engine crash: {0}'.format(engine.name)) # no error : reset the suspend variables
return False engine.continuous_errors = 0
engine.suspend_end_time = 0
# update stats with current page-load-time
# only the HTTP request
engine.stats['page_load_time'] += time() - time_before_request
engine.stats['page_load_count'] += 1
# everything is ok : return the response
return response
def search_one_request(engine_name, query, request_params, result_container, timeout_limit):
engine = engines[engine_name]
def search_one_request(engine, query, request_params, timeout_limit):
# update request parameters dependent on # update request parameters dependent on
# search-engine (contained in engines folder) # search-engine (contained in engines folder)
engine.request(query, request_params) engine.request(query, request_params)
# TODO add support of offline engines # ignoring empty urls
if request_params['url'] is None: if request_params['url'] is None:
return False return []
# ignoring empty urls
if not request_params['url']: if not request_params['url']:
return False return []
# send request # send request
response = send_http_request(engine, request_params, timeout_limit) response = send_http_request(engine, request_params, timeout_limit)
# parse response # parse the response
success = None response.search_params = request_params
if response: return engine.response(response)
# parse the response
response.search_params = request_params
try: def search_one_request_safe(engine_name, query, request_params, result_container, timeout_limit):
search_results = engine.response(response) start_time = time()
except: engine = engines[engine_name]
logger.exception('engine crash: {0}'.format(engine.name))
search_results = [] try:
# send requests and parse the results
search_results = search_one_request(engine, query, request_params, timeout_limit)
# add results # add results
for result in search_results: for result in search_results:
result['engine'] = engine.name result['engine'] = engine_name
result_container.extend(engine_name, search_results)
result_container.extend(engine.name, search_results) # update engine time when there is no exception
with threading.RLock():
success = True engine.stats['engine_time'] += time() - start_time
else: engine.stats['engine_time_count'] += 1
success = False
return True
except Exception as e:
engine.stats['errors'] += 1
search_duration = time() - start_time
requests_exception = False
if (issubclass(e.__class__, requests.exceptions.Timeout)):
# requests timeout (connect or read)
logger.error("engine {0} : HTTP requests timeout"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(engine_name, search_duration, timeout_limit, e.__class__.__name__))
requests_exception = True
if (issubclass(e.__class__, requests.exceptions.RequestException)):
# other requests exception
logger.exception("engine {0} : requests exception"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(engine_name, search_duration, timeout_limit, e))
requests_exception = True
else:
# others errors
logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
with threading.RLock(): # update continuous_errors / suspend_end_time
# update stats : total time if requests_exception:
engine.stats['engine_time'] += time() - request_params['started'] with threading.RLock():
engine.stats['engine_time_count'] += 1 engine.continuous_errors += 1
engine.suspend_end_time = time() + min(60, engine.continuous_errors)
return success #
return False
def search_multiple_requests(requests, result_container, timeout_limit): def search_multiple_requests(requests, result_container, timeout_limit):
@ -148,7 +157,7 @@ def search_multiple_requests(requests, result_container, timeout_limit):
for engine_name, query, request_params in requests: for engine_name, query, request_params in requests:
th = threading.Thread( th = threading.Thread(
target=search_one_request, target=search_one_request_safe,
args=(engine_name, query, request_params, result_container, timeout_limit), args=(engine_name, query, request_params, result_container, timeout_limit),
name=search_id, name=search_id,
) )

Loading…
Cancel
Save