From d20bba6dc74ded16556acf2a404d01ec47455ca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Wed, 1 Nov 2017 14:20:47 +0100 Subject: [PATCH] minor fixes of pubmed engine Closes #1045 --- searx/engines/pubmed.py | 17 +++++++---------- searx/settings.yml | 2 +- searx/url_utils.py | 2 -- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index abb59d2ed..6451f1467 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -11,9 +11,11 @@ More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/ """ +from flask_babel import gettext from lxml import etree from datetime import datetime -from searx.url_utils import urlencode, urlopen +from searx.url_utils import urlencode +from searx.poolrequests import get categories = ['science'] @@ -46,12 +48,7 @@ def response(resp): pubmed_retrieve_api_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'\ + 'db=pubmed&retmode=xml&id={pmids_string}' - # handle Python2 vs Python3 management of bytes and strings - try: - pmids_results = etree.XML(resp.text.encode('utf-8')) - except AttributeError: - pmids_results = etree.XML(resp.text) - + pmids_results = etree.XML(resp.content) pmids = pmids_results.xpath('//eSearchResult/IdList/Id') pmids_string = '' @@ -62,7 +59,7 @@ def response(resp): retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args) - search_results_xml = urlopen(retrieve_url_encoded).read() + search_results_xml = get(retrieve_url_encoded).content search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation') for entry in search_results: @@ -74,12 +71,12 @@ def response(resp): try: content = entry.xpath('.//Abstract/AbstractText')[0].text except: - content = 'No abstract is available for this publication.' + content = gettext('No abstract is available for this publication.') # If a doi is available, add it to the snipppet try: doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text - content = 'DOI: ' + doi + ' Abstract: ' + content + content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content) except: pass diff --git a/searx/settings.yml b/searx/settings.yml index 8ec5173f5..067a842e4 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -464,7 +464,7 @@ engines: engine : pubmed shortcut : pub categories: science - oa_first : false + timeout : 3.0 - name : qwant engine : qwant diff --git a/searx/url_utils.py b/searx/url_utils.py index 5e9e29190..dcafc3ba8 100644 --- a/searx/url_utils.py +++ b/searx/url_utils.py @@ -3,7 +3,6 @@ from sys import version_info if version_info[0] == 2: from urllib import quote, quote_plus, unquote, urlencode from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult - from urllib2 import urlopen else: from urllib.parse import ( parse_qs, @@ -17,7 +16,6 @@ else: urlunparse, ParseResult ) - from urllib.request import urlopen __export__ = (parse_qs,