From 46efb2f36d327e618c5cd6af702b96fa9143fc27 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sun, 7 Apr 2024 16:17:11 +0200 Subject: [PATCH] [feat] plugins: new unit converter plugin --- searx/engines/duckduckgo_definitions.py | 5 +- searx/plugins/unit_converter.py | 76 +++++++++++++++++++ searxng_extra/update/update_wikidata_units.py | 23 +++++- 3 files changed, 99 insertions(+), 5 deletions(-) create mode 100644 searx/plugins/unit_converter.py diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index e8146cc3e..59caed8ce 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -238,7 +238,10 @@ def unit_to_str(unit): for prefix in WIKIDATA_PREFIX: if unit.startswith(prefix): wikidata_entity = unit[len(prefix) :] - return WIKIDATA_UNITS.get(wikidata_entity, unit) + real_unit = WIKIDATA_UNITS.get(wikidata_entity) + if real_unit is None: + return unit + return real_unit['symbol'] return unit diff --git a/searx/plugins/unit_converter.py b/searx/plugins/unit_converter.py new file mode 100644 index 000000000..dd515aa72 --- /dev/null +++ b/searx/plugins/unit_converter.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Calculate mathematical expressions using ack#eval +""" + +from flask_babel import gettext + +from searx.data import WIKIDATA_UNITS + +name = "Unit converter plugin" +description = gettext("Convert between units") +default_on = True + +CONVERT_KEYWORDS = ["in", "to", "as"] + + +def _convert(from_value, source_si_factor, target_si_factor): + return from_value * source_si_factor / target_si_factor + + +def _parse_text_and_convert(search, splitted_query): + if len(splitted_query) != 2 or splitted_query[0].strip() == "" or splitted_query[1].strip() == "": + return + + from_value = "" + from_unit_key = "" + + # only parse digits as value that belong together + read_alpha = False + for c in splitted_query[0]: + if not read_alpha and (c in ("-", ".") or str.isdigit(c)): + from_value += c + read_alpha = True + elif c != " ": + from_unit_key += c + + to_unit_key = splitted_query[1].strip() + + from_unit = None + to_unit = None + + for unit in WIKIDATA_UNITS.values(): + if unit['symbol'] == from_unit_key: + from_unit = unit + + if unit['symbol'] == to_unit_key: + to_unit = unit + + if from_unit and to_unit: + break + + if from_unit is None or to_unit is None or to_unit.get('si_name') != from_unit.get('si_name'): + return + + result = _convert(float(from_value), from_unit['to_si_factor'], to_unit['to_si_factor']) + search.result_container.answers['conversion'] = {'answer': f"{result:g} {to_unit['symbol']}"} + + +def post_search(_request, search): + # only convert between units on the first page + if search.search_query.pageno > 1: + return True + + query = search.search_query.query + query_parts = query.split(" ") + + if len(query_parts) < 3: + return True + + for query_part in query_parts: + for keyword in CONVERT_KEYWORDS: + if query_part == keyword: + keyword_split = query.split(keyword, 1) + _parse_text_and_convert(search, keyword_split) + return True + + return True diff --git a/searxng_extra/update/update_wikidata_units.py b/searxng_extra/update/update_wikidata_units.py index a1a3731fc..f384df749 100755 --- a/searxng_extra/update/update_wikidata_units.py +++ b/searxng_extra/update/update_wikidata_units.py @@ -29,31 +29,46 @@ set_loggers(wikidata, 'wikidata') # * https://www.wikidata.org/wiki/Help:Ranking # * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section) # * https://w.wiki/32BT +# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities # see the result for https://www.wikidata.org/wiki/Q11582 # there are multiple symbols the same rank SARQL_REQUEST = """ -SELECT DISTINCT ?item ?symbol +SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit WHERE { ?item wdt:P31/wdt:P279 wd:Q47574 . ?item p:P5061 ?symbolP . ?symbolP ps:P5061 ?symbol ; wikibase:rank ?rank . + OPTIONAL { + ?item p:P2370 ?tosistmt . + ?tosistmt psv:P2370 ?tosinode . + ?tosinode wikibase:quantityAmount ?tosi . + ?tosinode wikibase:quantityUnit ?tosiUnit . + } FILTER(LANG(?symbol) = "en"). } ORDER BY ?item DESC(?rank) ?symbol """ +_wikidata_url = "https://www.wikidata.org/entity/" + def get_data(): results = collections.OrderedDict() response = wikidata.send_wikidata_query(SARQL_REQUEST) for unit in response['results']['bindings']: - name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '') - unit = unit['symbol']['value'] + name = unit['item']['value'].replace(_wikidata_url, '') + symbol = unit['symbol']['value'] + si_name = unit.get('tosiUnit', {}).get('value', '').replace(_wikidata_url, '') + to_si_factor = unit.get('tosi', {}).get('value', '') if name not in results: # ignore duplicate: always use the first one - results[name] = unit + results[name] = { + 'symbol': symbol, + 'si_name': si_name if si_name else None, + 'to_si_factor': float(to_si_factor) if to_si_factor else None, + } return results