[fix] fetch extra search param of gigablast - fixes #1293

This commit is contained in:
Adam Tauber 2019-12-21 20:51:30 +01:00
parent 8850036ded
commit e5305f886c

View File

@ -14,6 +14,7 @@ import random
from json import loads from json import loads
from time import time from time import time
from lxml.html import fromstring from lxml.html import fromstring
from searx.poolrequests import get
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import eval_xpath from searx.utils import eval_xpath
@ -31,13 +32,9 @@ search_string = 'search?{query}'\
'&c=main'\ '&c=main'\
'&s={offset}'\ '&s={offset}'\
'&format=json'\ '&format=json'\
'&qh=0'\ '&langcountry={lang}'\
'&qlang={lang}'\
'&ff={safesearch}'\ '&ff={safesearch}'\
'&rxiec={rxieu}'\ '&rand={rxikd}'
'&ulse={ulse}'\
'&rand={rxikd}'\
'&dbez={dbez}'
# specific xpath variables # specific xpath variables
results_xpath = '//response//result' results_xpath = '//response//result'
url_xpath = './/url' url_xpath = './/url'
@ -46,9 +43,26 @@ content_xpath = './/sum'
supported_languages_url = 'https://gigablast.com/search?&rxikd=1' supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
extra_param = '' # gigablast requires a random extra parameter
# which can be extracted from the source code of the search page
def parse_extra_param(text):
global extra_param
param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')]
extra_param = ''
for l in param_lines:
extra_param += l.split("'")[1]
extra_param = extra_param.split('&')[-1]
def init(engine_settings=None):
parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text)
# do search-request # do search-request
def request(query, params): def request(query, params):
print("EXTRAPARAM:", extra_param)
offset = (params['pageno'] - 1) * number_of_results offset = (params['pageno'] - 1) * number_of_results
if params['language'] == 'all': if params['language'] == 'all':
@ -67,14 +81,11 @@ def request(query, params):
search_path = search_string.format(query=urlencode({'q': query}), search_path = search_string.format(query=urlencode({'q': query}),
offset=offset, offset=offset,
number_of_results=number_of_results, number_of_results=number_of_results,
rxikd=int(time() * 1000),
rxieu=random.randint(1000000000, 9999999999),
ulse=random.randint(100000000, 999999999),
lang=language, lang=language,
safesearch=safesearch, rxikd=int(time() * 1000),
dbez=random.randint(100000000, 999999999)) safesearch=safesearch)
params['url'] = base_url + search_path params['url'] = base_url + search_path + '&' + extra_param
return params return params
@ -84,7 +95,11 @@ def response(resp):
results = [] results = []
# parse results # parse results
response_json = loads(resp.text) try:
response_json = loads(resp.text)
except:
parse_extra_param(resp.text)
return results
for result in response_json['results']: for result in response_json['results']:
# append result # append result