From b62851559b235b3fa7e833749f8d10597b7de6f2 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 28 Sep 2021 19:02:57 +0200 Subject: [PATCH] [mod] replace old stackoverflow engine by Stack Exchange API v2.3 Signed-off-by: Markus Heiser --- searx/engines/stackoverflow.py | 64 ---------------------------------- searx/settings.yml | 4 ++- 2 files changed, 3 insertions(+), 65 deletions(-) delete mode 100644 searx/engines/stackoverflow.py diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py deleted file mode 100644 index 8fc2cdb3a..000000000 --- a/searx/engines/stackoverflow.py +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - Stackoverflow (IT) -""" - -from urllib.parse import urlencode, urljoin -from lxml import html -from searx.utils import extract_text -from searx.exceptions import SearxEngineCaptchaException - -# about -about = { - "website": 'https://stackoverflow.com/', - "wikidata_id": 'Q549037', - "official_api_documentation": 'https://api.stackexchange.com/docs', - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['it'] -paging = True - -# search-url -url = 'https://stackoverflow.com/' -search_url = url + 'search?{query}&page={pageno}' - -# specific xpath variables -results_xpath = '//div[contains(@class,"question-summary")]' -link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' -content_xpath = './/div[@class="excerpt"]' - - -# do search-request -def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno']) - - return params - - -# get response from search-request -def response(resp): - if resp.url.path.startswith('/nocaptcha'): - raise SearxEngineCaptchaException() - - results = [] - - dom = html.fromstring(resp.text) - - # parse results - for result in dom.xpath(results_xpath): - link = result.xpath(link_xpath)[0] - href = urljoin(url, link.attrib.get('href')) - title = extract_text(link) - content = extract_text(result.xpath(content_xpath)) - - # append result - results.append({'url': href, - 'title': title, - 'content': content}) - - # return results - return results diff --git a/searx/settings.yml b/searx/settings.yml index 3fefb62fc..a56028774 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1125,8 +1125,10 @@ engines: shortcut: sc - name: stackoverflow - engine: stackoverflow + engine: stackexchange shortcut: st + api_site: 'stackoverflow' + categories: it - name: searchcode code engine: searchcode_code