From 10edcbe3c22716cf49a62b74a393ef346fac8aea Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 7 Jul 2022 20:42:41 +0100 Subject: [PATCH] [mod] Add engine for Emojipedia Emojipedia is an emoji reference website which documents the meaning and common usage of emoji characters in the Unicode Standard. It is owned by Zedge since 2021. Emojipedia is a voting member of The Unicode Consortium.[1] Cherry picked from @james-still [2[3] and slightly modified to fit SearXNG's quality gates. [1] https://en.wikipedia.org/wiki/Emojipedia [2] https://github.com/james-still/searx/commit/2fc01eb20f8de5f9cac492dcdfb817a6f0636580 [3] https://github.com/searx/searx/pull/3278 --- searx/engines/emojipedia.py | 67 +++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 ++++ 2 files changed, 73 insertions(+) create mode 100644 searx/engines/emojipedia.py diff --git a/searx/engines/emojipedia.py b/searx/engines/emojipedia.py new file mode 100644 index 000000000..b89267c0d --- /dev/null +++ b/searx/engines/emojipedia.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Emojipedia + +Emojipedia is an emoji reference website which documents the meaning and +common usage of emoji characters in the Unicode Standard. It is owned by Zedge +since 2021. Emojipedia is a voting member of The Unicode Consortium.[1] + +[1] https://en.wikipedia.org/wiki/Emojipedia +""" + +from urllib.parse import urlencode +from lxml import html + +from searx.utils import ( + eval_xpath_list, + eval_xpath_getindex, + extract_text, +) + +about = { + "website": 'https://emojipedia.org', + "wikidata_id": 'Q22908129', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + +categories = [] +paging = False +time_range_support = False + +base_url = 'https://emojipedia.org' +search_url = base_url + '/search/?{query}' + + +def request(query, params): + params['url'] = search_url.format( + query=urlencode({'q': query}), + ) + return params + + +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in eval_xpath_list(dom, "/html/body/div[2]/div[1]/ol/li"): + + extracted_desc = extract_text(eval_xpath_getindex(result, './/p', 0)) + + if 'No results found.' in extracted_desc: + break + + link = eval_xpath_getindex(result, './/h2/a', 0) + + url = base_url + link.attrib.get('href') + title = extract_text(link) + content = extracted_desc + + res = {'url': url, 'title': title, 'content': content} + + results.append(res) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 7bbdda85e..8bd2d1277 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -542,6 +542,12 @@ engines: timeout: 3.0 disabled: true + - name: emojipedia + engine: emojipedia + timeout: 4.0 + shortcut: em + disabled: true + - name: tineye engine: tineye shortcut: tin