From 28286cf3f2308113bf440fb6e7cf326c6ed07889 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 27 Mar 2021 15:29:00 +0100 Subject: [PATCH] [fix] update seznam engine to be compatible with the new website --- searx/engines/seznam.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py index 1df92a845..faceb0550 100644 --- a/searx/engines/seznam.py +++ b/searx/engines/seznam.py @@ -7,7 +7,12 @@ from urllib.parse import urlencode, urlparse from lxml import html from searx.poolrequests import get from searx.exceptions import SearxEngineAccessDeniedException -from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +from searx.utils import ( + extract_text, + eval_xpath_list, + eval_xpath_getindex, + eval_xpath, +) # about about = { @@ -26,7 +31,10 @@ def request(query, params): response_index = get(base_url, headers=params['headers'], raise_for_httperror=True) dom = html.fromstring(response_index.text) - url_params = {'q': query} + url_params = { + 'q': query, + 'oq': query, + } for e in eval_xpath_list(dom, '//input[@type="hidden"]'): name = e.get('name') value = e.get('value') @@ -45,20 +53,15 @@ def response(resp): results = [] dom = html.fromstring(resp.content.decode()) - for result_element in eval_xpath_list(dom, '//div[@id="searchpage-root"]//div[@data-dot="results"]/div'): - dot_data = eval_xpath_getindex(result_element, './div/div[@data-dot-data]/@data-dot-data', 0, default=None) - if dot_data is None: - title_element = eval_xpath_getindex(result_element, './/h3/a', 0) - results.append({ - 'url': title_element.get('href'), - 'title': extract_text(title_element), - 'content': extract_text(eval_xpath_getindex(title_element, '../../div[2]', 0)), - }) - elif dot_data == '{"reporter_name":"hint/related/relates"}': - suggestions_element = eval_xpath_getindex(result_element, - './div/div[@data-dot="main-box"]', 0, default=None) - if suggestions_element is not None: - for suggestion in eval_xpath_list(suggestions_element, './/ul/li'): - results.append({'suggestion': extract_text(suggestion)}) + for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'): + result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "Result")]', 0, default=None) + if result_data is None: + continue + title_element = eval_xpath_getindex(result_element, './/h3/a', 0) + results.append({ + 'url': title_element.get('href'), + 'title': extract_text(title_element), + 'content': extract_text(eval_xpath(result_data, './/p[@class="Result-description"]')), + }) return results