mirror of https://github.com/searxng/searxng
[mod] replace engines_languages.json by engines_traits.json
Implementations of the *traits* of the engines. Engine's traits are fetched from the origin engine and stored in a JSON file in the *data folder*. Most often traits are languages and region codes and their mapping from SearXNG's representation to the representation in the origin search engine. To load traits from the persistence:: searx.enginelib.traits.EngineTraitsMap.from_data() For new traits new properties can be added to the class:: searx.enginelib.traits.EngineTraits .. hint:: Implementation is downward compatible to the deprecated *supported_languages method* from the vintage implementation. The vintage code is tagged as *deprecated* an can be removed when all engines has been ported to the *traits method*. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>pull/2269/head
parent
64fea2f9cb
commit
6e5f22e558
@ -0,0 +1,17 @@
|
||||
.. _searx.enginelib:
|
||||
|
||||
============
|
||||
Engine model
|
||||
============
|
||||
|
||||
.. automodule:: searx.enginelib
|
||||
:members:
|
||||
|
||||
.. _searx.enginelib.traits:
|
||||
|
||||
=============
|
||||
Engine traits
|
||||
=============
|
||||
|
||||
.. automodule:: searx.enginelib.traits
|
||||
:members:
|
@ -1,8 +1,8 @@
|
||||
.. _load_engines:
|
||||
.. _searx.engines:
|
||||
|
||||
============
|
||||
Load Engines
|
||||
============
|
||||
=================
|
||||
SearXNG's engines
|
||||
=================
|
||||
|
||||
.. automodule:: searx.engines
|
||||
:members:
|
||||
|
@ -0,0 +1,47 @@
|
||||
.. _searx.search.processors:
|
||||
|
||||
=================
|
||||
Search processors
|
||||
=================
|
||||
|
||||
.. contents:: Contents
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
|
||||
Abstract processor class
|
||||
========================
|
||||
|
||||
.. automodule:: searx.search.processors.abstract
|
||||
:members:
|
||||
|
||||
Offline processor
|
||||
=================
|
||||
|
||||
.. automodule:: searx.search.processors.offline
|
||||
:members:
|
||||
|
||||
Online processor
|
||||
================
|
||||
|
||||
.. automodule:: searx.search.processors.online
|
||||
:members:
|
||||
|
||||
Online currency processor
|
||||
=========================
|
||||
|
||||
.. automodule:: searx.search.processors.online_currency
|
||||
:members:
|
||||
|
||||
Online Dictionary processor
|
||||
===========================
|
||||
|
||||
.. automodule:: searx.search.processors.online_dictionary
|
||||
:members:
|
||||
|
||||
Online URL search processor
|
||||
===========================
|
||||
|
||||
.. automodule:: searx.search.processors.online_url_search
|
||||
:members:
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,143 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Engine related implementations
|
||||
|
||||
.. note::
|
||||
|
||||
The long term goal is to modularize all relevant implementations to the
|
||||
engines here in this Python package. In addition to improved modularization,
|
||||
this will also be necessary in part because the probability of circular
|
||||
imports will increase due to the increased typification of implementations in
|
||||
the future.
|
||||
|
||||
ToDo:
|
||||
|
||||
- move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`.
|
||||
"""
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import Union, Dict, List, Callable, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from searx.enginelib import traits
|
||||
|
||||
|
||||
class Engine: # pylint: disable=too-few-public-methods
|
||||
"""Class of engine instances build from YAML settings.
|
||||
|
||||
Further documentation see :ref:`general engine configuration`.
|
||||
|
||||
.. hint::
|
||||
|
||||
This class is currently never initialized and only used for type hinting.
|
||||
"""
|
||||
|
||||
# Common options in the engine module
|
||||
|
||||
engine_type: str
|
||||
"""Type of the engine (:origin:`searx/search/processors`)"""
|
||||
|
||||
paging: bool
|
||||
"""Engine supports multiple pages."""
|
||||
|
||||
time_range_support: bool
|
||||
"""Engine supports search time range."""
|
||||
|
||||
safesearch: bool
|
||||
"""Engine supports SafeSearch"""
|
||||
|
||||
language_support: bool
|
||||
"""Engine supports languages (locales) search."""
|
||||
|
||||
language: str
|
||||
"""For an engine, when there is ``language: ...`` in the YAML settings the engine
|
||||
does support only this one language:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: google french
|
||||
engine: google
|
||||
language: fr
|
||||
"""
|
||||
|
||||
region: str
|
||||
"""For an engine, when there is ``region: ...`` in the YAML settings the engine
|
||||
does support only this one region::
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: google belgium
|
||||
engine: google
|
||||
region: fr-BE
|
||||
"""
|
||||
|
||||
fetch_traits: Callable
|
||||
"""Function to to fetch engine's traits from origin."""
|
||||
|
||||
traits: traits.EngineTraits
|
||||
"""Traits of the engine."""
|
||||
|
||||
# settings.yml
|
||||
|
||||
categories: List[str]
|
||||
"""Tabs, in which the engine is working."""
|
||||
|
||||
name: str
|
||||
"""Name that will be used across SearXNG to define this engine. In settings, on
|
||||
the result page .."""
|
||||
|
||||
engine: str
|
||||
"""Name of the python file used to handle requests and responses to and from
|
||||
this search engine (file name from :origin:`searx/engines` without
|
||||
``.py``)."""
|
||||
|
||||
enable_http: bool
|
||||
"""Enable HTTP (by default only HTTPS is enabled)."""
|
||||
|
||||
shortcut: str
|
||||
"""Code used to execute bang requests (``!foo``)"""
|
||||
|
||||
timeout: float
|
||||
"""Specific timeout for search-engine."""
|
||||
|
||||
display_error_messages: bool
|
||||
"""Display error messages on the web UI."""
|
||||
|
||||
proxies: dict
|
||||
"""Set proxies for a specific engine (YAML):
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
proxies :
|
||||
http: socks5://proxy:port
|
||||
https: socks5://proxy:port
|
||||
"""
|
||||
|
||||
disabled: bool
|
||||
"""To disable by default the engine, but not deleting it. It will allow the
|
||||
user to manually activate it in the settings."""
|
||||
|
||||
inactive: bool
|
||||
"""Remove the engine from the settings (*disabled & removed*)."""
|
||||
|
||||
about: dict
|
||||
"""Additional fileds describing the engine.
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
about:
|
||||
website: https://example.com
|
||||
wikidata_id: Q306656
|
||||
official_api_documentation: https://example.com/api-doc
|
||||
use_official_api: true
|
||||
require_api_key: true
|
||||
results: HTML
|
||||
"""
|
||||
|
||||
# deprecated properties
|
||||
|
||||
_fetch_supported_languages: Callable # deprecated use fetch_traits
|
||||
supported_languages: Union[List[str], Dict[str, str]] # deprecated use traits
|
||||
language_aliases: Dict[str, str] # deprecated not needed when using triats
|
||||
supported_languages_url: str # deprecated not needed when using triats
|
@ -0,0 +1,387 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Engine's traits are fetched from the origin engines and stored in a JSON file
|
||||
in the *data folder*. Most often traits are languages and region codes and
|
||||
their mapping from SearXNG's representation to the representation in the origin
|
||||
search engine. For new traits new properties can be added to the class
|
||||
:py:class:`EngineTraits`.
|
||||
|
||||
To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
|
||||
used.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import dataclasses
|
||||
from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING
|
||||
from typing_extensions import Literal, Self
|
||||
|
||||
from babel.localedata import locale_identifiers
|
||||
|
||||
from searx import locales
|
||||
from searx.data import data_dir, ENGINE_TRAITS
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from . import Engine
|
||||
|
||||
|
||||
class EngineTraitsEncoder(json.JSONEncoder):
|
||||
"""Encodes :class:`EngineTraits` to a serializable object, see
|
||||
:class:`json.JSONEncoder`."""
|
||||
|
||||
def default(self, o):
|
||||
"""Return dictionary of a :class:`EngineTraits` object."""
|
||||
if isinstance(o, EngineTraits):
|
||||
return o.__dict__
|
||||
return super().default(o)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class EngineTraits:
|
||||
"""The class is intended to be instantiated for each engine."""
|
||||
|
||||
regions: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
"""Maps SearXNG's internal representation of a region to the one of the engine.
|
||||
|
||||
SearXNG's internal representation can be parsed by babel and the value is
|
||||
send to the engine:
|
||||
|
||||
.. code:: python
|
||||
|
||||
regions ={
|
||||
'fr-BE' : <engine's region name>,
|
||||
}
|
||||
|
||||
for key, egnine_region regions.items():
|
||||
searxng_region = babel.Locale.parse(key, sep='-')
|
||||
...
|
||||
"""
|
||||
|
||||
languages: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
"""Maps SearXNG's internal representation of a language to the one of the engine.
|
||||
|
||||
SearXNG's internal representation can be parsed by babel and the value is
|
||||
send to the engine:
|
||||
|
||||
.. code:: python
|
||||
|
||||
languages = {
|
||||
'ca' : <engine's language name>,
|
||||
}
|
||||
|
||||
for key, egnine_lang in languages.items():
|
||||
searxng_lang = babel.Locale.parse(key)
|
||||
...
|
||||
"""
|
||||
|
||||
all_locale: Optional[str] = None
|
||||
"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default
|
||||
language").
|
||||
"""
|
||||
|
||||
data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1'
|
||||
"""Data type, default is 'traits_v1' for vintage use 'supported_languages'.
|
||||
|
||||
.. hint::
|
||||
|
||||
For the transition period until the *fetch* functions of all the engines
|
||||
are converted there will be the data_type 'supported_languages', which
|
||||
maps the old logic unchanged 1:1.
|
||||
|
||||
Instances of data_type 'supported_languages' do not implement methods
|
||||
like ``self.get_language(..)`` and ``self.get_region(..)``
|
||||
|
||||
"""
|
||||
|
||||
custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
|
||||
"""A place to store engine's custom traits, not related to the SearXNG core
|
||||
|
||||
"""
|
||||
|
||||
def get_language(self, searxng_locale: str, default=None):
|
||||
"""Return engine's language string that *best fits* to SearXNG's locale.
|
||||
|
||||
:param searxng_locale: SearXNG's internal representation of locale
|
||||
selected by the user.
|
||||
|
||||
:param default: engine's default language
|
||||
|
||||
The *best fits* rules are implemented in
|
||||
:py:obj:`locales.get_engine_locale`. Except for the special value ``all``
|
||||
which is determined from :py:obj`EngineTraits.all_language`.
|
||||
"""
|
||||
if searxng_locale == 'all' and self.all_locale is not None:
|
||||
return self.all_locale
|
||||
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
|
||||
|
||||
def get_region(self, searxng_locale: str, default=None):
|
||||
"""Return engine's region string that best fits to SearXNG's locale.
|
||||
|
||||
:param searxng_locale: SearXNG's internal representation of locale
|
||||
selected by the user.
|
||||
|
||||
:param default: engine's default region
|
||||
|
||||
The *best fits* rules are implemented in
|
||||
:py:obj:`locales.get_engine_locale`. Except for the special value ``all``
|
||||
which is determined from :py:obj`EngineTraits.all_language`.
|
||||
"""
|
||||
if searxng_locale == 'all' and self.all_locale is not None:
|
||||
return self.all_locale
|
||||
return locales.get_engine_locale(searxng_locale, self.regions, default=default)
|
||||
|
||||
def is_locale_supported(self, searxng_locale: str) -> bool:
|
||||
"""A *locale* (SearXNG's internal representation) is considered to be supported
|
||||
by the engine if the *region* or the *language* is supported by the
|
||||
engine. For verification the functions :py:func:`self.get_region` and
|
||||
:py:func:`self.get_region` are used.
|
||||
"""
|
||||
if self.data_type == 'traits_v1':
|
||||
return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
|
||||
|
||||
if self.data_type == 'supported_languages': # vintage / deprecated
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx.utils import match_language
|
||||
|
||||
if searxng_locale == 'all':
|
||||
return True
|
||||
x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None)
|
||||
return bool(x)
|
||||
|
||||
# return bool(self.get_supported_language(searxng_locale))
|
||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||
|
||||
def copy(self):
|
||||
"""Create a copy of the dataclass object."""
|
||||
return EngineTraits(**dataclasses.asdict(self))
|
||||
|
||||
@classmethod
|
||||
def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
|
||||
"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
|
||||
and set properties from the origin engine in the object ``engine_traits``. If
|
||||
function does not exists, ``None`` is returned.
|
||||
"""
|
||||
|
||||
fetch_traits = getattr(engine, 'fetch_traits', None)
|
||||
engine_traits = None
|
||||
|
||||
if fetch_traits:
|
||||
engine_traits = cls()
|
||||
fetch_traits(engine_traits)
|
||||
return engine_traits
|
||||
|
||||
def set_traits(self, engine: Engine):
|
||||
"""Set traits from self object in a :py:obj:`.Engine` namespace.
|
||||
|
||||
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
||||
"""
|
||||
|
||||
if self.data_type == 'traits_v1':
|
||||
self._set_traits_v1(engine)
|
||||
|
||||
elif self.data_type == 'supported_languages': # vintage / deprecated
|
||||
self._set_supported_languages(engine)
|
||||
|
||||
else:
|
||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||
|
||||
def _set_traits_v1(self, engine: Engine):
|
||||
# For an engine, when there is `language: ...` in the YAML settings the engine
|
||||
# does support only this one language (region)::
|
||||
#
|
||||
# - name: google italian
|
||||
# engine: google
|
||||
# language: it
|
||||
# region: it-IT
|
||||
|
||||
traits = self.copy()
|
||||
|
||||
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
|
||||
|
||||
languages = traits.languages
|
||||
if hasattr(engine, 'language'):
|
||||
if engine.language not in languages:
|
||||
raise ValueError(_msg % (engine.name, 'language', engine.language))
|
||||
traits.languages = {engine.language: languages[engine.language]}
|
||||
|
||||
regions = traits.regions
|
||||
if hasattr(engine, 'region'):
|
||||
if engine.region not in regions:
|
||||
raise ValueError(_msg % (engine.name, 'region', engine.region))
|
||||
traits.regions = {engine.region: regions[engine.region]}
|
||||
|
||||
engine.language_support = bool(traits.languages or traits.regions)
|
||||
|
||||
# set the copied & modified traits in engine's namespace
|
||||
engine.traits = traits
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# The code below is deprecated an can hopefully be deleted at one day
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict)
|
||||
"""depricated: does not work for engines that do support languages based on a
|
||||
region. With this type it is not guaranteed that the key values can be
|
||||
parsed by :py:obj:`babel.Locale.parse`!
|
||||
"""
|
||||
|
||||
# language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
# """depricated: does not work for engines that do support languages based on a
|
||||
# region. With this type it is not guaranteed that the key values can be
|
||||
# parsed by :py:obj:`babel.Locale.parse`!
|
||||
# """
|
||||
|
||||
BABEL_LANGS = [
|
||||
lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
|
||||
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
|
||||
]
|
||||
|
||||
# def get_supported_language(self, searxng_locale, default=None): # vintage / deprecated
|
||||
# """Return engine's language string that *best fits* to SearXNG's locale."""
|
||||
# if searxng_locale == 'all' and self.all_locale is not None:
|
||||
# return self.all_locale
|
||||
# return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default)
|
||||
|
||||
@classmethod # vintage / deprecated
|
||||
def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]:
|
||||
"""DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's
|
||||
namespace to fetch languages from the origin engine. If function does
|
||||
not exists, ``None`` is returned.
|
||||
"""
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx import network
|
||||
from searx.utils import gen_useragent
|
||||
|
||||
fetch_languages = getattr(engine, '_fetch_supported_languages', None)
|
||||
if fetch_languages is None:
|
||||
return None
|
||||
|
||||
# The headers has been moved here from commit 9b6ffed06: Some engines (at
|
||||
# least bing and startpage) return a different result list of supported
|
||||
# languages depending on the IP location where the HTTP request comes from.
|
||||
# The IP based results (from bing) can be avoided by setting a
|
||||
# 'Accept-Language' in the HTTP request.
|
||||
|
||||
headers = {
|
||||
'User-Agent': gen_useragent(),
|
||||
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
||||
}
|
||||
resp = network.get(engine.supported_languages_url, headers=headers)
|
||||
supported_languages = fetch_languages(resp)
|
||||
if isinstance(supported_languages, list):
|
||||
supported_languages.sort()
|
||||
|
||||
engine_traits = cls()
|
||||
engine_traits.data_type = 'supported_languages'
|
||||
engine_traits.supported_languages = supported_languages
|
||||
return engine_traits
|
||||
|
||||
def _set_supported_languages(self, engine: Engine): # vintage / deprecated
|
||||
traits = self.copy()
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx.utils import match_language
|
||||
|
||||
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
|
||||
|
||||
if hasattr(engine, 'language'):
|
||||
if engine.language not in self.supported_languages:
|
||||
raise ValueError(_msg % (engine.name, 'language', engine.language))
|
||||
|
||||
if isinstance(self.supported_languages, dict):
|
||||
traits.supported_languages = {engine.language: self.supported_languages[engine.language]}
|
||||
else:
|
||||
traits.supported_languages = [engine.language]
|
||||
|
||||
engine.language_support = bool(traits.supported_languages)
|
||||
engine.supported_languages = traits.supported_languages
|
||||
|
||||
# find custom aliases for non standard language codes
|
||||
traits.language_aliases = {} # pylint: disable=attribute-defined-outside-init
|
||||
|
||||
for engine_lang in getattr(engine, 'language_aliases', {}):
|
||||
iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None)
|
||||
if (
|
||||
iso_lang
|
||||
and iso_lang != engine_lang
|
||||
and not engine_lang.startswith(iso_lang)
|
||||
and iso_lang not in self.supported_languages
|
||||
):
|
||||
traits.language_aliases[iso_lang] = engine_lang
|
||||
|
||||
engine.language_aliases = traits.language_aliases
|
||||
|
||||
# set the copied & modified traits in engine's namespace
|
||||
engine.traits = traits
|
||||
|
||||
|
||||
class EngineTraitsMap(Dict[str, EngineTraits]):
|
||||
"""A python dictionary to map :class:`EngineTraits` by engine name."""
|
||||
|
||||
ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
|
||||
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
|
||||
|
||||
def save_data(self):
|
||||
"""Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
|
||||
with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
|
||||
|
||||
@classmethod
|
||||
def from_data(cls) -> Self:
|
||||
"""Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
|
||||
obj = cls()
|
||||
for k, v in ENGINE_TRAITS.items():
|
||||
obj[k] = EngineTraits(**v)
|
||||
return obj
|
||||
|
||||
@classmethod
|
||||
def fetch_traits(cls, log: Callable) -> Self:
|
||||
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
|
||||
|
||||
names = list(engines.engines)
|
||||
names.sort()
|
||||
obj = cls()
|
||||
|
||||
for engine_name in names:
|
||||
engine = engines.engines[engine_name]
|
||||
|
||||
traits = EngineTraits.fetch_traits(engine)
|
||||
if traits is not None:
|
||||
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
|
||||
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
|
||||
obj[engine_name] = traits
|
||||
|
||||
# vintage / deprecated
|
||||
_traits = EngineTraits.fetch_supported_languages(engine)
|
||||
if _traits is not None:
|
||||
log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages)))
|
||||
if traits is not None:
|
||||
traits.supported_languages = _traits.supported_languages
|
||||
obj[engine_name] = traits
|
||||
else:
|
||||
obj[engine_name] = _traits
|
||||
continue
|
||||
|
||||
return obj
|
||||
|
||||
def set_traits(self, engine: Engine):
|
||||
"""Set traits in a :py:obj:`Engine` namespace.
|
||||
|
||||
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
||||
"""
|
||||
|
||||
engine_traits = EngineTraits(data_type='traits_v1')
|
||||
if engine.name in self.keys():
|
||||
engine_traits = self[engine.name]
|
||||
|
||||
elif engine.engine in self.keys():
|
||||
# The key of the dictionary traits_map is the *engine name*
|
||||
# configured in settings.xml. When multiple engines are configured
|
||||
# in settings.yml to use the same origin engine (python module)
|
||||
# these additional engines can use the languages from the origin
|
||||
# engine. For this use the configured ``engine: ...`` from
|
||||
# settings.yml
|
||||
engine_traits = self[engine.engine]
|
||||
|
||||
engine_traits.set_traits(engine)
|
Loading…
Reference in New Issue