From 09ee2aa69dbd4815e0e1e1de53f3571972e04903 Mon Sep 17 00:00:00 2001 From: marc Date: Wed, 6 Jul 2016 17:29:40 -0500 Subject: [PATCH 1/6] [fix] Result text in Wolfram|Alpha (#607) --- searx/engines/wolframalpha_api.py | 10 ++++++++-- searx/engines/wolframalpha_noapi.py | 9 +++++++-- tests/unit/engines/test_wolframalpha_api.py | 6 ++++-- tests/unit/engines/test_wolframalpha_noapi.py | 6 ++++-- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 4526c825f..0e38051d1 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -22,6 +22,7 @@ answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext' pods_xpath = '//pod' subpods_xpath = './subpod' +pod_primary_xpath = './@primary' pod_id_xpath = './@id' pod_title_xpath = './@title' plaintext_xpath = './plaintext' @@ -78,10 +79,12 @@ def response(resp): infobox_title = None pods = search_results.xpath(pods_xpath) + result = "" result_chunks = [] for pod in pods: pod_id = pod.xpath(pod_id_xpath)[0] pod_title = pod.xpath(pod_title_xpath)[0] + pod_is_result = pod.xpath(pod_primary_xpath) subpods = pod.xpath(subpods_xpath) if not subpods: @@ -94,6 +97,9 @@ def response(resp): if content and pod_id not in image_pods: + if pod_is_result: + result = content + # if no input pod was found, title is first plaintext pod if not infobox_title: infobox_title = content @@ -116,7 +122,7 @@ def response(resp): # append link to site results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': 'Wolfram|Alpha', - 'content': infobox_title}) + 'title': infobox_title + ' - Wolfram|Alpha', + 'content': result}) return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 3a8180f04..80a510e3a 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -81,9 +81,11 @@ def response(resp): # TODO handle resp_json['queryresult']['assumptions'] result_chunks = [] infobox_title = None + result = "" for pod in resp_json['queryresult']['pods']: pod_id = pod.get('id', '') pod_title = pod.get('title', '') + pod_is_result = pod.get('primary', None) if 'subpods' not in pod: continue @@ -97,6 +99,9 @@ def response(resp): if subpod['plaintext'] != '(requires interactivity)': result_chunks.append({'label': pod_title, 'value': subpod['plaintext']}) + if pod_is_result: + result = subpod['plaintext'] + elif 'img' in subpod: result_chunks.append({'label': pod_title, 'image': subpod['img']}) @@ -108,7 +113,7 @@ def response(resp): 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': 'Wolfram|Alpha', - 'content': infobox_title}) + 'title': infobox_title + ' - Wolfram|Alpha', + 'content': result}) return results diff --git a/tests/unit/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py index 76404e192..f5b3538a3 100644 --- a/tests/unit/engines/test_wolframalpha_api.py +++ b/tests/unit/engines/test_wolframalpha_api.py @@ -103,7 +103,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('Wolfram|Alpha', results[1]['title']) + self.assertEqual('input_plaintext - Wolfram|Alpha', results[1]['title']) + self.assertIn('result_plaintext', results[1]['content']) # test calc xml = """ @@ -161,4 +162,5 @@ class TestWolframAlphaAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('Wolfram|Alpha', results[1]['title']) + self.assertEqual('integral_plaintext - Wolfram|Alpha', results[1]['title']) + self.assertIn('integral_plaintext', results[1]['content']) diff --git a/tests/unit/engines/test_wolframalpha_noapi.py b/tests/unit/engines/test_wolframalpha_noapi.py index a8f73470e..b3688a5e8 100644 --- a/tests/unit/engines/test_wolframalpha_noapi.py +++ b/tests/unit/engines/test_wolframalpha_noapi.py @@ -140,7 +140,8 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('Wolfram|Alpha', results[1]['title']) + self.assertEqual('input_plaintext - Wolfram|Alpha', results[1]['title']) + self.assertIn('result_plaintext', results[1]['content']) # test calc json = r""" @@ -219,4 +220,5 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('Wolfram|Alpha', results[1]['title']) + self.assertEqual('integral_plaintext - Wolfram|Alpha', results[1]['title']) + self.assertIn('integral_plaintext', results[1]['content']) From a8907224a1c433b1227fd707e9bb2524dd405109 Mon Sep 17 00:00:00 2001 From: "Lorenzo J. Lucchini" Date: Thu, 7 Jul 2016 00:33:03 +0200 Subject: [PATCH 2/6] Improving Wolfram Alpha search hit content Making WA search hits contain - the (parsed) input inside the "title" instead of just "Wolfram|Alpha", to better match other hit titles and to confirm correct parsing of input to the user - the first output field that contains any text (skipping ones that are only pictures; this is usually the most meaningful "result" field) instead of the raw input as the "content", making it additionally possible to obtain WA computations from JSON API calls --- searx/engines/wolframalpha_api.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 0e38051d1..e743c8f56 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -18,7 +18,6 @@ api_key = '' # defined in settings.yml # xpath variables failure_xpath = '/queryresult[attribute::success="false"]' -answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext' pods_xpath = '//pod' subpods_xpath = './subpod' @@ -76,11 +75,11 @@ def response(resp): try: infobox_title = search_results.xpath(input_xpath)[0].text except: - infobox_title = None + infobox_title = "" pods = search_results.xpath(pods_xpath) - result = "" result_chunks = [] + result_content = "" for pod in pods: pod_id = pod.xpath(pod_id_xpath)[0] pod_title = pod.xpath(pod_title_xpath)[0] @@ -97,8 +96,9 @@ def response(resp): if content and pod_id not in image_pods: - if pod_is_result: - result = content + if pod_is_result or not result_content: + if pod_id != "Input": + result_content = "%s: %s" % (pod_title, content) # if no input pod was found, title is first plaintext pod if not infobox_title: @@ -115,6 +115,8 @@ def response(resp): if not result_chunks: return [] + title = "Wolfram|Alpha (%s)" % infobox_title + # append infobox results.append({'infobox': infobox_title, 'attributes': result_chunks, @@ -122,7 +124,7 @@ def response(resp): # append link to site results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': infobox_title + ' - Wolfram|Alpha', - 'content': result}) + 'title': title, + 'content': result_content}) return results From 05206f86e31402d2bc6010c814446cb9ac114085 Mon Sep 17 00:00:00 2001 From: "Lorenzo J. Lucchini" Date: Thu, 7 Jul 2016 00:41:59 +0200 Subject: [PATCH 3/6] Adding Urban Dictionary xpath engine --- searx/settings.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index 308a0bd45..2f1e8583b 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -494,6 +494,14 @@ engines: engine : wolframalpha_noapi timeout: 6.0 categories : science + + - name : urbandictionary + engine : xpath + search_url : http://www.urbandictionary.com/define.php?term={query} + url_xpath : //*[@class="word"]/@href + title_xpath : //*[@class="word"] + content_xpath : //*[@class="meaning"] + shortcut : ud - name : dictzone engine : dictzone From e145fdb86d0cd9dd8421ed63b3635f4bebcafa74 Mon Sep 17 00:00:00 2001 From: firebovine Date: Thu, 7 Jul 2016 19:41:33 -0400 Subject: [PATCH 4/6] #607 - noapi fix --- searx/engines/wolframalpha_noapi.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 80a510e3a..e318d93e6 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -8,9 +8,11 @@ # @stable no # @parse url, infobox +from cgi import escape from json import loads from time import time from urllib import urlencode +from lxml.etree import XML from searx.poolrequests import get as http_get @@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\ referer_url = url + 'input/?{query}' token = {'value': '', - 'last_updated': 0} + 'last_updated': None} # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images @@ -80,8 +82,8 @@ def response(resp): # TODO handle resp_json['queryresult']['assumptions'] result_chunks = [] - infobox_title = None - result = "" + infobox_title = "" + result_content = "" for pod in resp_json['queryresult']['pods']: pod_id = pod.get('id', '') pod_title = pod.get('title', '') @@ -99,8 +101,9 @@ def response(resp): if subpod['plaintext'] != '(requires interactivity)': result_chunks.append({'label': pod_title, 'value': subpod['plaintext']}) - if pod_is_result: - result = subpod['plaintext'] + if pod_is_result or not result_content: + if pod_id != "Input": + result_content = pod_title + ': ' + subpod['plaintext'] elif 'img' in subpod: result_chunks.append({'label': pod_title, 'image': subpod['img']}) @@ -113,7 +116,7 @@ def response(resp): 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': infobox_title + ' - Wolfram|Alpha', - 'content': result}) + 'title': 'Wolfram|Alpha (' + infobox_title + ')', + 'content': result_content}) return results From 2bc42b378d9ed197c4b94db3371c26106c648d03 Mon Sep 17 00:00:00 2001 From: "Lorenzo J. Lucchini" Date: Fri, 8 Jul 2016 03:03:57 +0200 Subject: [PATCH 5/6] Remove unrelated change that mistakenly ended up into this branch --- searx/settings.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/searx/settings.yml b/searx/settings.yml index 2f1e8583b..308a0bd45 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -494,14 +494,6 @@ engines: engine : wolframalpha_noapi timeout: 6.0 categories : science - - - name : urbandictionary - engine : xpath - search_url : http://www.urbandictionary.com/define.php?term={query} - url_xpath : //*[@class="word"]/@href - title_xpath : //*[@class="word"] - content_xpath : //*[@class="meaning"] - shortcut : ud - name : dictzone engine : dictzone From f67db1e107ec4a9c211771a584fa3d7e20c914d8 Mon Sep 17 00:00:00 2001 From: firebovine Date: Sat, 10 Sep 2016 18:04:24 -0400 Subject: [PATCH 6/6] fix tests to reflect: Wolfram|Alpha (input) response --- tests/unit/engines/test_wolframalpha_api.py | 4 ++-- tests/unit/engines/test_wolframalpha_noapi.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py index f5b3538a3..64a64ceb3 100644 --- a/tests/unit/engines/test_wolframalpha_api.py +++ b/tests/unit/engines/test_wolframalpha_api.py @@ -103,7 +103,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('input_plaintext - Wolfram|Alpha', results[1]['title']) + self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title']) self.assertIn('result_plaintext', results[1]['content']) # test calc @@ -162,5 +162,5 @@ class TestWolframAlphaAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('integral_plaintext - Wolfram|Alpha', results[1]['title']) + self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title']) self.assertIn('integral_plaintext', results[1]['content']) diff --git a/tests/unit/engines/test_wolframalpha_noapi.py b/tests/unit/engines/test_wolframalpha_noapi.py index b3688a5e8..982edd9f2 100644 --- a/tests/unit/engines/test_wolframalpha_noapi.py +++ b/tests/unit/engines/test_wolframalpha_noapi.py @@ -140,7 +140,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('input_plaintext - Wolfram|Alpha', results[1]['title']) + self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title']) self.assertIn('result_plaintext', results[1]['content']) # test calc @@ -220,5 +220,5 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('integral_plaintext - Wolfram|Alpha', results[1]['title']) + self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title']) self.assertIn('integral_plaintext', results[1]['content'])