Merge pull request #609 from LuccoJ/betterwolfram

Improving Wolfram Alpha search hit content
This commit is contained in:
Adam Tauber 2016-09-11 00:29:05 +02:00 committed by GitHub
commit 8f48c518aa
4 changed files with 32 additions and 12 deletions

View File

@ -18,10 +18,10 @@ api_key = '' # defined in settings.yml
# xpath variables # xpath variables
failure_xpath = '/queryresult[attribute::success="false"]' failure_xpath = '/queryresult[attribute::success="false"]'
answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext' input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
pods_xpath = '//pod' pods_xpath = '//pod'
subpods_xpath = './subpod' subpods_xpath = './subpod'
pod_primary_xpath = './@primary'
pod_id_xpath = './@id' pod_id_xpath = './@id'
pod_title_xpath = './@title' pod_title_xpath = './@title'
plaintext_xpath = './plaintext' plaintext_xpath = './plaintext'
@ -75,13 +75,15 @@ def response(resp):
try: try:
infobox_title = search_results.xpath(input_xpath)[0].text infobox_title = search_results.xpath(input_xpath)[0].text
except: except:
infobox_title = None infobox_title = ""
pods = search_results.xpath(pods_xpath) pods = search_results.xpath(pods_xpath)
result_chunks = [] result_chunks = []
result_content = ""
for pod in pods: for pod in pods:
pod_id = pod.xpath(pod_id_xpath)[0] pod_id = pod.xpath(pod_id_xpath)[0]
pod_title = pod.xpath(pod_title_xpath)[0] pod_title = pod.xpath(pod_title_xpath)[0]
pod_is_result = pod.xpath(pod_primary_xpath)
subpods = pod.xpath(subpods_xpath) subpods = pod.xpath(subpods_xpath)
if not subpods: if not subpods:
@ -94,6 +96,10 @@ def response(resp):
if content and pod_id not in image_pods: if content and pod_id not in image_pods:
if pod_is_result or not result_content:
if pod_id != "Input":
result_content = "%s: %s" % (pod_title, content)
# if no input pod was found, title is first plaintext pod # if no input pod was found, title is first plaintext pod
if not infobox_title: if not infobox_title:
infobox_title = content infobox_title = content
@ -109,6 +115,8 @@ def response(resp):
if not result_chunks: if not result_chunks:
return [] return []
title = "Wolfram|Alpha (%s)" % infobox_title
# append infobox # append infobox
results.append({'infobox': infobox_title, results.append({'infobox': infobox_title,
'attributes': result_chunks, 'attributes': result_chunks,
@ -116,7 +124,7 @@ def response(resp):
# append link to site # append link to site
results.append({'url': resp.request.headers['Referer'].decode('utf8'), results.append({'url': resp.request.headers['Referer'].decode('utf8'),
'title': 'Wolfram|Alpha', 'title': title,
'content': infobox_title}) 'content': result_content})
return results return results

View File

@ -8,9 +8,11 @@
# @stable no # @stable no
# @parse url, infobox # @parse url, infobox
from cgi import escape
from json import loads from json import loads
from time import time from time import time
from urllib import urlencode from urllib import urlencode
from lxml.etree import XML
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\
referer_url = url + 'input/?{query}' referer_url = url + 'input/?{query}'
token = {'value': '', token = {'value': '',
'last_updated': 0} 'last_updated': None}
# pods to display as image in infobox # pods to display as image in infobox
# this pods do return a plaintext, but they look better and are more useful as images # this pods do return a plaintext, but they look better and are more useful as images
@ -80,10 +82,12 @@ def response(resp):
# TODO handle resp_json['queryresult']['assumptions'] # TODO handle resp_json['queryresult']['assumptions']
result_chunks = [] result_chunks = []
infobox_title = None infobox_title = ""
result_content = ""
for pod in resp_json['queryresult']['pods']: for pod in resp_json['queryresult']['pods']:
pod_id = pod.get('id', '') pod_id = pod.get('id', '')
pod_title = pod.get('title', '') pod_title = pod.get('title', '')
pod_is_result = pod.get('primary', None)
if 'subpods' not in pod: if 'subpods' not in pod:
continue continue
@ -97,6 +101,10 @@ def response(resp):
if subpod['plaintext'] != '(requires interactivity)': if subpod['plaintext'] != '(requires interactivity)':
result_chunks.append({'label': pod_title, 'value': subpod['plaintext']}) result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
if pod_is_result or not result_content:
if pod_id != "Input":
result_content = pod_title + ': ' + subpod['plaintext']
elif 'img' in subpod: elif 'img' in subpod:
result_chunks.append({'label': pod_title, 'image': subpod['img']}) result_chunks.append({'label': pod_title, 'image': subpod['img']})
@ -108,7 +116,7 @@ def response(resp):
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
results.append({'url': resp.request.headers['Referer'].decode('utf8'), results.append({'url': resp.request.headers['Referer'].decode('utf8'),
'title': 'Wolfram|Alpha', 'title': 'Wolfram|Alpha (' + infobox_title + ')',
'content': infobox_title}) 'content': result_content})
return results return results

View File

@ -103,7 +103,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual(referer_url, results[0]['urls'][0]['url'])
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
self.assertEqual(referer_url, results[1]['url']) self.assertEqual(referer_url, results[1]['url'])
self.assertEqual('Wolfram|Alpha', results[1]['title']) self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
self.assertIn('result_plaintext', results[1]['content'])
# test calc # test calc
xml = """<?xml version='1.0' encoding='UTF-8'?> xml = """<?xml version='1.0' encoding='UTF-8'?>
@ -161,4 +162,5 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual(referer_url, results[0]['urls'][0]['url'])
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
self.assertEqual(referer_url, results[1]['url']) self.assertEqual(referer_url, results[1]['url'])
self.assertEqual('Wolfram|Alpha', results[1]['title']) self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
self.assertIn('integral_plaintext', results[1]['content'])

View File

@ -140,7 +140,8 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual(referer_url, results[0]['urls'][0]['url'])
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
self.assertEqual(referer_url, results[1]['url']) self.assertEqual(referer_url, results[1]['url'])
self.assertEqual('Wolfram|Alpha', results[1]['title']) self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
self.assertIn('result_plaintext', results[1]['content'])
# test calc # test calc
json = r""" json = r"""
@ -219,4 +220,5 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual(referer_url, results[0]['urls'][0]['url'])
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
self.assertEqual(referer_url, results[1]['url']) self.assertEqual(referer_url, results[1]['url'])
self.assertEqual('Wolfram|Alpha', results[1]['title']) self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
self.assertIn('integral_plaintext', results[1]['content'])