minor fixes of pubmed engine

Closes #1045
pull/1/head
Noémi Ványi 7 years ago
parent df0d915806
commit d20bba6dc7

@ -11,9 +11,11 @@
More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/ More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/
""" """
from flask_babel import gettext
from lxml import etree from lxml import etree
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode, urlopen from searx.url_utils import urlencode
from searx.poolrequests import get
categories = ['science'] categories = ['science']
@ -46,12 +48,7 @@ def response(resp):
pubmed_retrieve_api_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'\ pubmed_retrieve_api_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'\
+ 'db=pubmed&retmode=xml&id={pmids_string}' + 'db=pubmed&retmode=xml&id={pmids_string}'
# handle Python2 vs Python3 management of bytes and strings pmids_results = etree.XML(resp.content)
try:
pmids_results = etree.XML(resp.text.encode('utf-8'))
except AttributeError:
pmids_results = etree.XML(resp.text)
pmids = pmids_results.xpath('//eSearchResult/IdList/Id') pmids = pmids_results.xpath('//eSearchResult/IdList/Id')
pmids_string = '' pmids_string = ''
@ -62,7 +59,7 @@ def response(resp):
retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args) retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
search_results_xml = urlopen(retrieve_url_encoded).read() search_results_xml = get(retrieve_url_encoded).content
search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation') search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation')
for entry in search_results: for entry in search_results:
@ -74,12 +71,12 @@ def response(resp):
try: try:
content = entry.xpath('.//Abstract/AbstractText')[0].text content = entry.xpath('.//Abstract/AbstractText')[0].text
except: except:
content = 'No abstract is available for this publication.' content = gettext('No abstract is available for this publication.')
# If a doi is available, add it to the snipppet # If a doi is available, add it to the snipppet
try: try:
doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text
content = 'DOI: ' + doi + ' Abstract: ' + content content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content)
except: except:
pass pass

@ -464,7 +464,7 @@ engines:
engine : pubmed engine : pubmed
shortcut : pub shortcut : pub
categories: science categories: science
oa_first : false timeout : 3.0
- name : qwant - name : qwant
engine : qwant engine : qwant

@ -3,7 +3,6 @@ from sys import version_info
if version_info[0] == 2: if version_info[0] == 2:
from urllib import quote, quote_plus, unquote, urlencode from urllib import quote, quote_plus, unquote, urlencode
from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult
from urllib2 import urlopen
else: else:
from urllib.parse import ( from urllib.parse import (
parse_qs, parse_qs,
@ -17,7 +16,6 @@ else:
urlunparse, urlunparse,
ParseResult ParseResult
) )
from urllib.request import urlopen
__export__ = (parse_qs, __export__ = (parse_qs,

Loading…
Cancel
Save