From fb32425d784bbc8e2a958ed3062888b75cab230d Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Thu, 9 May 2024 10:40:55 +0200 Subject: [PATCH] [mod] yacy engine: pick base_url randomly from a list of instances Inspired by post [1] in the disscussion we had, while yacy.searchlab.eu was broken. [1] https://github.com/searxng/searxng/issues/3428#issuecomment-2101080101 Signed-off-by: Markus Heiser --- searx/engines/yacy.py | 61 +++++++++++++++++++++++++++---------------- searx/settings.yml | 7 +++-- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 3a0414787..8f0718038 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -22,20 +22,26 @@ The engine has the following (additional) settings: - :py:obj:`search_mode` - :py:obj:`search_type` +The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by +all yacy engines. + .. code:: yaml - name: yacy engine: yacy categories: general search_type: text - base_url: https://yacy.searchlab.eu shortcut: ya + base_url: + - https://yacy.searchlab.eu + - https://search.lomig.me + - https://yacy.ecosys.eu + - https://search.webproject.link - name: yacy images engine: yacy categories: images search_type: image - base_url: https://yacy.searchlab.eu shortcut: yai disabled: true @@ -45,6 +51,9 @@ Implementations """ # pylint: disable=fixme +from __future__ import annotations + +import random from json import loads from urllib.parse import urlencode from dateutil import parser @@ -87,15 +96,10 @@ search_type = 'text' ``video`` are not yet implemented (Pull-Requests are welcome). """ -# search-url -base_url = 'https://yacy.searchlab.eu' -search_url = ( - '/yacysearch.json?{query}' - '&startRecord={offset}' - '&maximumRecords={limit}' - '&contentdom={search_type}' - '&resource={resource}' -) +base_url: list | str = 'https://yacy.searchlab.eu' +"""The value is an URL or a list of URLs. In the latter case instance will be +selected randomly. +""" def init(_): @@ -108,23 +112,34 @@ def init(_): raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types)) +def _base_url() -> str: + from searx.engines import engines # pylint: disable=import-outside-toplevel + + url = engines['yacy'].base_url # type: ignore + if isinstance(url, list): + url = random.choice(url) + return url + + def request(query, params): + offset = (params['pageno'] - 1) * number_of_results - - params['url'] = base_url + search_url.format( - query=urlencode({'query': query}), - offset=offset, - limit=number_of_results, - search_type=search_type, - resource=search_mode, - ) - - if http_digest_auth_user and http_digest_auth_pass: - params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass) + args = { + 'query': query, + 'startRecord': offset, + 'maximumRecords': number_of_results, + 'contentdom': search_type, + 'resource': search_mode, + } # add language tag if specified if params['language'] != 'all': - params['url'] += '&lr=lang_' + params['language'].split('-')[0] + args['lr'] = 'lang_' + params['language'].split('-')[0] + + params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}" + + if http_digest_auth_user and http_digest_auth_pass: + params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass) return params diff --git a/searx/settings.yml b/searx/settings.yml index 8f1df1801..77f4b8339 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -2082,7 +2082,11 @@ engines: engine: yacy categories: general search_type: text - base_url: https://yacy.searchlab.eu + base_url: + - https://yacy.searchlab.eu + - https://search.lomig.me + - https://yacy.ecosys.eu + - https://search.webproject.link shortcut: ya disabled: true # required if you aren't using HTTPS for your local yacy instance @@ -2095,7 +2099,6 @@ engines: engine: yacy categories: images search_type: image - base_url: https://yacy.searchlab.eu shortcut: yai disabled: true