From a7fe22770a830cafa4d74a7d5e6ae848c18a9f75 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 2 Oct 2022 23:52:11 +0200 Subject: [PATCH] [mod] Peertube: re-engineered & upgrade to data_type: traits_v1 - fetch_traits(): Fetch languages from peertube's search-index source code. [mod] Include migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object. [fix] old supported_languages_url is no longer valid since the sources has been moved to a different path. - fixed code to pass pylint - request(): complete re-implementation based on the API docs [1] - response(): complete re-implementation, adds serveral fields missed before - add source code documentation [1] https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos Signed-off-by: Markus Heiser --- docs/src/searx.engines.peertube.rst | 19 +++ searx/data/engine_traits.json | 51 +++---- searx/engines/peertube.py | 199 +++++++++++++++++++++------- searx/settings.yml | 5 +- 4 files changed, 197 insertions(+), 77 deletions(-) create mode 100644 docs/src/searx.engines.peertube.rst diff --git a/docs/src/searx.engines.peertube.rst b/docs/src/searx.engines.peertube.rst new file mode 100644 index 000000000..757d9c742 --- /dev/null +++ b/docs/src/searx.engines.peertube.rst @@ -0,0 +1,19 @@ +.. _peertube engines: + +================ +Peertube Engines +================ + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + + +.. _peertube video engine: + +Peertube Video +============== + +.. automodule:: searx.engines.peertube + :members: diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index cb84a5c90..658b7582f 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -1468,31 +1468,32 @@ "peertube": { "all_locale": null, "custom": {}, - "data_type": "supported_languages", - "languages": {}, + "data_type": "traits_v1", + "languages": { + "ca": "ca", + "cs": "cs", + "de": "de", + "el": "el", + "en": "en", + "eo": "eo", + "es": "es", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gd": "gd", + "it": "it", + "ja": "ja", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ru": "ru", + "sv": "sv", + "zh": "zh", + "zh_Hans": "zh", + "zh_Hant": "zh" + }, "regions": {}, - "supported_languages": [ - "ca", - "cs", - "de", - "el", - "en", - "eo", - "es", - "eu", - "fi", - "fr", - "gd", - "it", - "ja", - "nl", - "oc", - "pl", - "pt", - "ru", - "sv", - "zh" - ] + "supported_languages": {} }, "qwant": { "all_locale": null, @@ -4531,4 +4532,4 @@ "zh_cht" ] } -} \ No newline at end of file +} diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index 345c2f991..87b386d7a 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -1,18 +1,30 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - peertube (Videos) +# lint: pylint +"""Peertube and :py:obj:`SepiaSearch ` do share +(more or less) the same REST API and the schema of the JSON result is identical. + """ -from json import loads -from datetime import datetime +import re from urllib.parse import urlencode -from searx.utils import html_to_text +from datetime import datetime +from dateutil.parser import parse +from dateutil.relativedelta import relativedelta + +import babel + +from searx import network +from searx.locales import language_tag +from searx.utils import html_to_text +from searx.enginelib.traits import EngineTraits + +traits: EngineTraits -# about about = { + # pylint: disable=line-too-long "website": 'https://joinpeertube.org', "wikidata_id": 'Q50938515', - "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html', + "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos', "use_official_api": True, "require_api_key": False, "results": 'JSON', @@ -22,66 +34,155 @@ about = { categories = ["videos"] paging = True base_url = "https://peer.tube" -supported_languages_url = 'https://peer.tube/api/v1/videos/languages' +"""Base URL of the Peertube instance. A list of instances is available at: + +- https://instances.joinpeertube.org/instances +""" + +time_range_support = True +time_range_table = { + 'day': relativedelta(), + 'week': relativedelta(weeks=-1), + 'month': relativedelta(months=-1), + 'year': relativedelta(years=-1), +} + +safesearch = True +safesearch_table = {0: 'both', 1: 'false', 2: 'false'} + + +def minute_to_hm(minute): + if isinstance(minute, int): + return "%d:%02d" % (divmod(minute, 60)) + return None -# do search-request def request(query, params): - sanitized_url = base_url.rstrip("/") - pageno = (params["pageno"] - 1) * 15 - search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}" - query_dict = {"search": query} - language = params["language"].split("-")[0] - if "all" != language and language in supported_languages: - query_dict["languageOneOf"] = language - params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno) + """Assemble request for the Peertube API""" + + if not query: + return False + + # eng_region = traits.get_region(params['searxng_locale'], 'en_US') + eng_lang = traits.get_language(params['searxng_locale'], None) + + params['url'] = ( + base_url.rstrip("/") + + "/api/v1/search/videos?" + + urlencode( + { + 'search': query, + 'searchTarget': 'search-index', # Vidiversum + 'resultType': 'videos', + 'start': (params['pageno'] - 1) * 10, + 'count': 10, + # -createdAt: sort by date ascending / createdAt: date descending + 'sort': '-match', # sort by *match descending* + 'nsfw': safesearch_table[params['safesearch']], + } + ) + ) + + if eng_lang is not None: + params['url'] += '&languageOneOf[]=' + eng_lang + params['url'] += '&boostLanguages[]=' + eng_lang + + if params['time_range'] in time_range_table: + time = datetime.now().date() + time_range_table[params['time_range']] + params['url'] += '&startDate=' + time.isoformat() + return params -def _get_offset_from_pageno(pageno): - return (pageno - 1) * 15 + 1 - - -# get response from search-request def response(resp): - sanitized_url = base_url.rstrip("/") + return video_response(resp) + + +def video_response(resp): + """Parse video response from SepiaSearch and Peertube instances.""" results = [] - search_res = loads(resp.text) + json_data = resp.json() - # return empty array if there are no results - if "data" not in search_res: + if 'data' not in json_data: return [] - # parse results - for res in search_res["data"]: - title = res["name"] - url = sanitized_url + "/videos/watch/" + res["uuid"] - description = res["description"] - if description: - content = html_to_text(res["description"]) - else: - content = "" - thumbnail = sanitized_url + res["thumbnailPath"] - publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + for result in json_data['data']: + metadata = [ + x + for x in [ + result.get('channel', {}).get('displayName'), + result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'), + ', '.join(result.get('tags', [])), + ] + if x + ] results.append( { - "template": "videos.html", - "url": url, - "title": title, - "content": content, - "publishedDate": publishedDate, - "iframe_src": sanitized_url + res["embedPath"], - "thumbnail": thumbnail, + 'url': result['url'], + 'title': result['name'], + 'content': html_to_text(result.get('description') or ''), + 'author': result.get('account', {}).get('displayName'), + 'length': minute_to_hm(result.get('duration')), + 'template': 'videos.html', + 'publishedDate': parse(result['publishedAt']), + 'iframe_src': result.get('embedUrl'), + 'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'), + 'metadata': ' | '.join(metadata), } ) - # return results return results -def _fetch_supported_languages(resp): - videolanguages = resp.json() - peertube_languages = list(videolanguages.keys()) - return peertube_languages +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages from peertube's search-index source code. + + See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_ + + .. _8ed5c729 - Refactor and redesign client: + https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729 + .. _videoLanguages: + https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291 + """ + + resp = network.get( + 'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue', + # the response from search-index repository is very slow + timeout=60, + ) + + if not resp.ok: + print("ERROR: response from peertube is not OK.") + return + + js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) + if not js_lang: + print("ERROR: can't determine languages from peertube") + return + + for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)): + try: + eng_tag = lang.group(1) + if eng_tag == 'oc': + # Occitanis not known by babel, its closest relative is Catalan + # but 'ca' is already in the list of engine_traits.languages --> + # 'oc' will be ignored. + continue + + sxng_tag = language_tag(babel.Locale.parse(eng_tag)) + + except babel.UnknownLocaleError: + print("ERROR: %s is unknown by babel" % eng_tag) + continue + + conflict = engine_traits.languages.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + engine_traits.languages[sxng_tag] = eng_tag + + engine_traits.languages['zh_Hans'] = 'zh' + engine_traits.languages['zh_Hant'] = 'zh' diff --git a/searx/settings.yml b/searx/settings.yml index 841457b5e..e9bc61057 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1758,9 +1758,8 @@ engines: engine: peertube shortcut: ptb paging: true - # https://instances.joinpeertube.org/instances - base_url: https://peertube.biz/ - # base_url: https://tube.tardis.world/ + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com categories: videos disabled: true timeout: 6.0