Compare commits

...

5 Commits

Author SHA1 Message Date
Bnyro 383d873597 [fix] unit converter plugin: can't be disabled in settings 1 month ago
Markus Heiser fb32425d78 [mod] yacy engine: pick base_url randomly from a list of instances
Inspired by post [1] in the disscussion we had, while yacy.searchlab.eu was
broken.

[1] https://github.com/searxng/searxng/issues/3428#issuecomment-2101080101

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
1 month ago
Bnyro 72be98e12f [feat] plugins: new calculator plugin 1 month ago
Markus Heiser 742303d030 [mod] improve unit converter plugin
- l10n support: parse and format decimal numbers by babel
- ability to add additional units
- improved unit detection (symbols are not unique)
- support for alias units (0,010C to F --> 32,018 °F)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
1 month ago
Markus Heiser 63cf80aae5 [fix] docutils dependencies (docutils>=0.21.2)
Another trip into the hell of dependencies: docutils tends to put major changes
in minor patches: the executables have been renamed / e.g.

     rst2html.py --> rts2html

so we have to use docutils at least from version 0.21.2, but this version of
docutils is only supported by myst-parser from version 3.0.1 on.

Additionally, docutils decided to drop python 3.8 in version 0.21 [1]

Further, linuxdoc needed an update to cope with docutils 0.21 [2]

[1] https://docutils.sourceforge.io/RELEASE-NOTES.html#release-0-21-2024-04-09
[2] https://github.com/return42/linuxdoc/pull/36

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
1 month ago

@ -0,0 +1,9 @@
.. _unit converter plugin:
=====================
Unit converter plugin
=====================
.. automodule:: searx.plugins.unit_converter
:members:

@ -14,9 +14,12 @@ sphinx-tabs==3.4.5
sphinxcontrib-programoutput==0.17 sphinxcontrib-programoutput==0.17
sphinx-autobuild==2021.3.14 sphinx-autobuild==2021.3.14
sphinx-notfound-page==1.0.0 sphinx-notfound-page==1.0.0
myst-parser==2.0.0 myst-parser==3.0.1
linuxdoc==20231020 linuxdoc==20240509
aiounittest==1.4.2 aiounittest==1.4.2
yamllint==1.35.1 yamllint==1.35.1
wlc==1.14 wlc==1.14
coloredlogs==15.0.1 coloredlogs==15.0.1
docutils<=0.21; python_version == '3.8'
docutils>=0.21.2; python_version > '3.8'

@ -22,20 +22,26 @@ The engine has the following (additional) settings:
- :py:obj:`search_mode` - :py:obj:`search_mode`
- :py:obj:`search_type` - :py:obj:`search_type`
The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by
all yacy engines.
.. code:: yaml .. code:: yaml
- name: yacy - name: yacy
engine: yacy engine: yacy
categories: general categories: general
search_type: text search_type: text
base_url: https://yacy.searchlab.eu
shortcut: ya shortcut: ya
base_url:
- https://yacy.searchlab.eu
- https://search.lomig.me
- https://yacy.ecosys.eu
- https://search.webproject.link
- name: yacy images - name: yacy images
engine: yacy engine: yacy
categories: images categories: images
search_type: image search_type: image
base_url: https://yacy.searchlab.eu
shortcut: yai shortcut: yai
disabled: true disabled: true
@ -45,6 +51,9 @@ Implementations
""" """
# pylint: disable=fixme # pylint: disable=fixme
from __future__ import annotations
import random
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
from dateutil import parser from dateutil import parser
@ -87,15 +96,10 @@ search_type = 'text'
``video`` are not yet implemented (Pull-Requests are welcome). ``video`` are not yet implemented (Pull-Requests are welcome).
""" """
# search-url base_url: list | str = 'https://yacy.searchlab.eu'
base_url = 'https://yacy.searchlab.eu' """The value is an URL or a list of URLs. In the latter case instance will be
search_url = ( selected randomly.
'/yacysearch.json?{query}' """
'&startRecord={offset}'
'&maximumRecords={limit}'
'&contentdom={search_type}'
'&resource={resource}'
)
def init(_): def init(_):
@ -108,24 +112,35 @@ def init(_):
raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types)) raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
def _base_url() -> str:
from searx.engines import engines # pylint: disable=import-outside-toplevel
url = engines['yacy'].base_url # type: ignore
if isinstance(url, list):
url = random.choice(url)
return url
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * number_of_results offset = (params['pageno'] - 1) * number_of_results
args = {
'query': query,
'startRecord': offset,
'maximumRecords': number_of_results,
'contentdom': search_type,
'resource': search_mode,
}
params['url'] = base_url + search_url.format( # add language tag if specified
query=urlencode({'query': query}), if params['language'] != 'all':
offset=offset, args['lr'] = 'lang_' + params['language'].split('-')[0]
limit=number_of_results,
search_type=search_type, params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}"
resource=search_mode,
)
if http_digest_auth_user and http_digest_auth_pass: if http_digest_auth_user and http_digest_auth_pass:
params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass) params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params return params

@ -0,0 +1,88 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Calculate mathematical expressions using ack#eval
"""
import ast
import operator
from flask_babel import gettext
from searx import settings
name = "Basic Calculator"
description = gettext("Calculate mathematical expressions via the search bar")
default_on = False
preference_section = 'general'
plugin_id = 'calculator'
operators = {
ast.Add: operator.add,
ast.Sub: operator.sub,
ast.Mult: operator.mul,
ast.Div: operator.truediv,
ast.Pow: operator.pow,
ast.BitXor: operator.xor,
ast.USub: operator.neg,
}
def _eval_expr(expr):
"""
>>> _eval_expr('2^6')
4
>>> _eval_expr('2**6')
64
>>> _eval_expr('1 + 2*3**(4^5) / (6 + -7)')
-5.0
"""
return _eval(ast.parse(expr, mode='eval').body)
def _eval(node):
if isinstance(node, ast.Constant) and isinstance(node.value, int):
return node.value
if isinstance(node, ast.BinOp):
return operators[type(node.op)](_eval(node.left), _eval(node.right))
if isinstance(node, ast.UnaryOp):
return operators[type(node.op)](_eval(node.operand))
raise TypeError(node)
def post_search(_request, search):
# don't run on public instances due to possible attack surfaces
if settings['server']['public_instance']:
return True
# only show the result of the expression on the first page
if search.search_query.pageno > 1:
return True
query = search.search_query.query
# in order to avoid DoS attacks with long expressions, ignore long expressions
if len(query) > 100:
return True
# replace commonly used math operators with their proper Python operator
query = query.replace("x", "*").replace(":", "/")
# only numbers and math operators are accepted
if any(str.isalpha(c) for c in query):
return True
# in python, powers are calculated via **
query_py_formatted = query.replace("^", "**")
try:
result = str(_eval_expr(query_py_formatted))
if result != query:
search.result_container.answers['calculate'] = {'answer': f"{query} = {result}"}
except (TypeError, SyntaxError, ArithmeticError):
pass
return True
def is_allowed():
return not settings['server']['public_instance']

@ -1,58 +1,245 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Calculate mathematical expressions using ack#eval """A plugin for converting measured values from one unit to another unit (a
unit converter).
The plugin looks up the symbols (given in the query term) in a list of
converters, each converter is one item in the list (compare
:py:obj:`ADDITIONAL_UNITS`). If the symbols are ambiguous, the matching units
of measurement are evaluated. The weighting in the evaluation results from the
sorting of the :py:obj:`list of unit converters<symbol_to_si>`.
Enable in ``settings.yml``:
.. code:: yaml
enabled_plugins:
..
- 'Unit converter plugin'
""" """
from flask_babel import gettext import re
import babel.numbers
from flask_babel import gettext, get_locale
from searx import data
from searx.data import WIKIDATA_UNITS
name = "Unit converter plugin" name = "Unit converter plugin"
description = gettext("Convert between units") description = gettext("Convert between units")
default_on = True default_on = True
plugin_id = "unit_converter"
preference_section = "general"
CONVERT_KEYWORDS = ["in", "to", "as"] CONVERT_KEYWORDS = ["in", "to", "as"]
# inspired from https://stackoverflow.com/a/42475086
RE_MEASURE = r'''
(?P<sign>[-+]?) # +/- or nothing for positive
(\s*) # separator: white space or nothing
(?P<number>[\d\.,]*) # number: 1,000.00 (en) or 1.000,00 (de)
(?P<E>[eE][-+]?\d+)? # scientific notation: e(+/-)2 (*10^2)
(\s*) # separator: white space or nothing
(?P<unit>\S+) # unit of measure
'''
ADDITIONAL_UNITS = [
{
"si_name": "Q11579",
"symbol": "°C",
"to_si": lambda val: val + 273.15,
"from_si": lambda val: val - 273.15,
},
{
"si_name": "Q11579",
"symbol": "°F",
"to_si": lambda val: (val + 459.67) * 5 / 9,
"from_si": lambda val: (val * 9 / 5) - 459.67,
},
]
"""Additional items to convert from a measure unit to a SI unit (vice versa).
.. code:: python
{
"si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
"symbol": "°C", # symbol of the measure unit
"to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
"from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
},
{
"si_name": "Q11573",
"symbol": "mi",
"to_si": 1609.344, # convert measure value (val) to SI unit
"from_si": 1 / 1609.344 # convert SI value (val) measure unit
},
The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
or a callable_ (val in / converted value returned).
.. _callable: https://docs.python.org/3/glossary.html#term-callable
"""
ALIAS_SYMBOLS = {
'°C': ('C',),
'°F': ('F',),
'mi': ('L',),
}
"""Alias symbols for known unit of measure symbols / by example::
'°C': ('C', ...), # list of alias symbols for °C (Q69362731)
'°F': ('F', ...), # list of alias symbols for °F (Q99490479)
'mi': ('L',), # list of alias symbols for mi (Q253276)
"""
SYMBOL_TO_SI = []
def symbol_to_si():
"""Generates a list of tuples, each tuple is a measure unit and the fields
in the tuple are:
0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
def _convert(from_value, source_si_factor, target_si_factor): 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
return from_value * source_si_factor / target_si_factor multiplied by 1609.344)
3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
100mi divided by 1609.344)
def _parse_text_and_convert(search, splitted_query): The returned list is sorted, the first items are created from
if len(splitted_query) != 2 or splitted_query[0].strip() == "" or splitted_query[1].strip() == "": ``WIKIDATA_UNITS``, the second group of items is build from
:py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
If you search this list for a symbol, then a match with a symbol from
Wikidata has the highest weighting (first hit in the list), followed by the
symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
"""
global SYMBOL_TO_SI # pylint: disable=global-statement
if SYMBOL_TO_SI:
return SYMBOL_TO_SI
# filter out units which can't be normalized to a SI unit and filter out
# units without a symbol / arcsecond does not have a symbol
# https://www.wikidata.org/wiki/Q829073
for item in data.WIKIDATA_UNITS.values():
if item['to_si_factor'] and item['symbol']:
SYMBOL_TO_SI.append(
(
item['symbol'],
item['si_name'],
item['to_si_factor'], # from_si
1 / item['to_si_factor'], # to_si
item['symbol'],
)
)
for item in ADDITIONAL_UNITS:
SYMBOL_TO_SI.append(
(
item['symbol'],
item['si_name'],
item['from_si'],
item['to_si'],
item['symbol'],
)
)
alias_items = []
for item in SYMBOL_TO_SI:
for alias in ALIAS_SYMBOLS.get(item[0], ()):
alias_items.append(
(
alias,
item[1],
item[2], # from_si
item[3], # to_si
item[0], # origin unit
)
)
SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
return SYMBOL_TO_SI
def _parse_text_and_convert(search, from_query, to_query):
# pylint: disable=too-many-branches, too-many-locals
if not (from_query and to_query):
return return
from_value = "" measured = re.match(RE_MEASURE, from_query, re.VERBOSE)
from_unit_key = "" if not (measured and measured.group('number'), measured.group('unit')):
return
# only parse digits as value that belong together # Symbols are not unique, if there are several hits for the from-unit, then
read_alpha = False # the correct one must be determined by comparing it with the to-unit
for c in splitted_query[0]: # https://github.com/searxng/searxng/pull/3378#issuecomment-2080974863
if not read_alpha and (c in ("-", ".") or str.isdigit(c)):
from_value += c
read_alpha = True
elif c != " ":
from_unit_key += c
to_unit_key = splitted_query[1].strip() # first: collecting possible units
from_unit = None source_list, target_list = [], []
to_unit = None
for unit in WIKIDATA_UNITS.values(): for symbol, si_name, from_si, to_si, orig_symbol in symbol_to_si():
if unit['symbol'] == from_unit_key:
from_unit = unit
if unit['symbol'] == to_unit_key: if symbol == measured.group('unit'):
to_unit = unit source_list.append((si_name, to_si))
if symbol == to_query:
target_list.append((si_name, from_si, orig_symbol))
if from_unit and to_unit: if not (source_list and target_list):
break return
source_to_si = target_from_si = target_symbol = None
# second: find the right unit by comparing list of from-units with list of to-units
if from_unit is None or to_unit is None or to_unit.get('si_name') != from_unit.get('si_name'): for source in source_list:
for target in target_list:
if source[0] == target[0]: # compare si_name
source_to_si = source[1]
target_from_si = target[1]
target_symbol = target[2]
if not (source_to_si and target_from_si):
return return
result = _convert(float(from_value), from_unit['to_si_factor'], to_unit['to_si_factor']) _locale = get_locale() or 'en_US'
search.result_container.answers['conversion'] = {'answer': f"{result:g} {to_unit['symbol']}"}
value = measured.group('sign') + measured.group('number') + (measured.group('E') or '')
value = babel.numbers.parse_decimal(value, locale=_locale)
# convert value to SI unit
if isinstance(source_to_si, (float, int)):
value = float(value) * source_to_si
else:
value = source_to_si(float(value))
# convert value from SI unit to target unit
if isinstance(target_from_si, (float, int)):
value = float(value) * target_from_si
else:
value = target_from_si(float(value))
if measured.group('E'):
# when incomming notation is scientific, outgoing notation is scientific
result = babel.numbers.format_scientific(value, locale=_locale)
else:
result = babel.numbers.format_decimal(value, locale=_locale, format='#,##0.##########;-#')
search.result_container.answers['conversion'] = {'answer': f'{result} {target_symbol}'}
def post_search(_request, search): def post_search(_request, search):
@ -69,8 +256,8 @@ def post_search(_request, search):
for query_part in query_parts: for query_part in query_parts:
for keyword in CONVERT_KEYWORDS: for keyword in CONVERT_KEYWORDS:
if query_part == keyword: if query_part == keyword:
keyword_split = query.split(keyword, 1) from_query, to_query = query.split(keyword, 1)
_parse_text_and_convert(search, keyword_split) _parse_text_and_convert(search, from_query.strip(), to_query.strip())
return True return True
return True return True

@ -220,6 +220,7 @@ outgoing:
# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy # - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy
# # these plugins are disabled if nothing is configured .. # # these plugins are disabled if nothing is configured ..
# - 'Hostname replace' # see hostname_replace configuration below # - 'Hostname replace' # see hostname_replace configuration below
# - 'Calculator plugin'
# - 'Open Access DOI rewrite' # - 'Open Access DOI rewrite'
# - 'Tor check plugin' # - 'Tor check plugin'
# # Read the docs before activate: auto-detection of the language could be # # Read the docs before activate: auto-detection of the language could be
@ -2081,7 +2082,11 @@ engines:
engine: yacy engine: yacy
categories: general categories: general
search_type: text search_type: text
base_url: https://yacy.searchlab.eu base_url:
- https://yacy.searchlab.eu
- https://search.lomig.me
- https://yacy.ecosys.eu
- https://search.webproject.link
shortcut: ya shortcut: ya
disabled: true disabled: true
# required if you aren't using HTTPS for your local yacy instance # required if you aren't using HTTPS for your local yacy instance
@ -2094,7 +2099,6 @@ engines:
engine: yacy engine: yacy
categories: images categories: images
search_type: image search_type: image
base_url: https://yacy.searchlab.eu
shortcut: yai shortcut: yai
disabled: true disabled: true

@ -38,7 +38,7 @@
{%- macro plugin_preferences(section) -%} {%- macro plugin_preferences(section) -%}
{%- for plugin in plugins -%} {%- for plugin in plugins -%}
{%- if plugin.preference_section == section -%} {%- if plugin.preference_section == section and (plugin.is_allowed() if plugin.is_allowed else True) -%}
<fieldset>{{- '' -}} <fieldset>{{- '' -}}
<legend>{{ _(plugin.name) }}</legend>{{- '' -}} <legend>{{ _(plugin.name) }}</legend>{{- '' -}}
<div class="value"> <div class="value">

@ -89,10 +89,17 @@ test.robot() {
dump_return $? dump_return $?
} }
test.rst() { test.rst() {
build_msg TEST "[reST markup] ${RST_FILES[*]}" build_msg TEST "[reST markup] ${RST_FILES[*]}"
local rst2html=rst2html
if [ "3.8" == "$(python -c 'import sys; print(".".join([str(x) for x in sys.version_info[:2]]))')" ]; then
rst2html=rst2html.py
fi
for rst in "${RST_FILES[@]}"; do for rst in "${RST_FILES[@]}"; do
pyenv.cmd rst2html.py --halt error "$rst" > /dev/null || die 42 "fix issue in $rst" pyenv.cmd "${rst2html}" --halt error "$rst" > /dev/null || die 42 "fix issue in $rst"
done done
} }

Loading…
Cancel
Save