From 20f4538e1369cb4933c579dceccc56439b9bf7a6 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 4 Mar 2022 22:00:59 +0100 Subject: [PATCH] [fix] engine: Semantic Scholar (Science) // rework & fix Closes: https://github.com/searxng/searxng/issues/939 Signed-off-by: Markus Heiser --- searx/engines/semantic_scholar.py | 53 +++++++++++++++++++++++++------ searx/settings.yml | 7 ---- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py index 5d9d1a8e9..bda731047 100644 --- a/searx/engines/semantic_scholar.py +++ b/searx/engines/semantic_scholar.py @@ -1,12 +1,23 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Semantic Scholar (Science) +# lint: pylint +"""Semantic Scholar (Science) """ from json import dumps, loads +from datetime import datetime +about = { + "website": 'https://www.semanticscholar.org/', + "wikidata_id": 'Q22908627', + "official_api_documentation": 'https://api.semanticscholar.org/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} +paging = True search_url = 'https://www.semanticscholar.org/api/1/search' +paper_url = 'https://www.semanticscholar.org/paper' def request(query, params): @@ -34,13 +45,37 @@ def request(query, params): def response(resp): res = loads(resp.text) results = [] + for result in res['results']: - results.append( - { - 'url': result['primaryPaperLink']['url'], - 'title': result['title']['text'], - 'content': result['paperAbstractTruncated'], - } - ) + item = {} + metadata = [] + + url = result.get('primaryPaperLink', {}).get('url') + if not url and result.get('links'): + url = result.get('links')[0] + if not url: + alternatePaperLinks = result.get('alternatePaperLinks') + if alternatePaperLinks: + url = alternatePaperLinks[0].get('url') + if not url: + url = paper_url + '/%s' % result['id'] + + item['url'] = url + + item['title'] = result['title']['text'] + item['content'] = result['paperAbstract']['text'] + + metadata = result.get('fieldsOfStudy') or [] + venue = result.get('venue', {}).get('text') + if venue: + metadata.append(venue) + if metadata: + item['metadata'] = ', '.join(metadata) + + pubDate = result.get('pubDate') + if pubDate: + item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d") + + results.append(item) return results diff --git a/searx/settings.yml b/searx/settings.yml index 66ceecf0c..b4ff5453c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1205,13 +1205,6 @@ engines: disabled: true shortcut: se categories: science - about: - website: https://www.semanticscholar.org/ - wikidata_id: Q22908627 - official_api_documentation: https://api.semanticscholar.org/ - use_official_api: false - require_api_key: false - results: JSON # Spotify needs API credentials # - name: spotify