From 75b276f408487db8fecc6eab7abd6126323a7efe Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sun, 20 May 2018 18:10:22 -0500 Subject: [PATCH 1/5] fix bing "garbage" results (issue #1275) --- searx/engines/bing.py | 5 ++++- searx/utils.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/searx/engines/bing.py b/searx/engines/bing.py index c6d41782b..2da40619d 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -16,7 +16,7 @@ from lxml import html from searx.engines.xpath import extract_text from searx.url_utils import urlencode -from searx.utils import match_language +from searx.utils import match_language, gen_useragent # engine dependent config categories = ['general'] @@ -43,6 +43,9 @@ def request(query, params): offset=offset) params['url'] = base_url + search_path + + params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64') + return params diff --git a/searx/utils.py b/searx/utils.py index 77c392909..eccbaaf19 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -57,9 +57,9 @@ blocked_tags = ('script', 'style') -def gen_useragent(): +def gen_useragent(os=None): # TODO - return ua.format(os=choice(ua_os), version=choice(ua_versions)) + return ua.format(os=os or choice(ua_os), version=choice(ua_versions)) def searx_useragent(): From c7000cd1df6d8f9aaa787515a6eca16f8a083715 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sat, 23 Jun 2018 16:24:06 -0500 Subject: [PATCH 2/5] [fix] update user agent versions this fixes duckduckgo error response --- searx/utils.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/searx/utils.py b/searx/utils.py index 1ec45ed6e..3b2e39919 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -39,14 +39,15 @@ else: logger = logger.getChild('utils') -ua_versions = ('40.0', - '41.0', - '42.0', - '43.0', - '44.0', - '45.0', - '46.0', - '47.0') +ua_versions = ('52.8.1', + '53.0', + '54.0', + '55.0', + '56.0', + '57.0', + '58.0', + '59.0', + '60.0.2') ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64', From acaef6600e34159d2edb7bf0ef6b5f34471136e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Bourrel?= Date: Thu, 5 Jul 2018 10:11:45 +0200 Subject: [PATCH 3/5] Update path to wikidata image --- searx/engines/wikidata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index fe53609c1..ffc1c8d0f 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -54,7 +54,7 @@ value_xpath = './/div[contains(@class,"wikibase-statementview-mainsnak")]'\ + '/*/div[contains(@class,"wikibase-snakview-value")]' language_fallback_xpath = '//sup[contains(@class,"wb-language-fallback-indicator")]' calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' - +media_xpath = value_xpath + '//div[contains(@class,"commons-media-caption")]//a' def request(query, params): params['url'] = url_search.format( @@ -313,7 +313,7 @@ def add_image(result): for property_id in property_ids: image = result.xpath(property_xpath.replace('{propertyid}', property_id)) if image: - image_name = image[0].xpath(value_xpath) + image_name = image[0].xpath(media_xpath) image_src = url_image.replace('{filename}', extract_text(image_name[0])) return image_src From 7a474db61bd9ba9a08111758b058f81cb5175db4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Bourrel?= Date: Fri, 6 Jul 2018 10:31:01 +0200 Subject: [PATCH 4/5] Fix formatting --- searx/engines/wikidata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index ffc1c8d0f..c315b30da 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -56,6 +56,7 @@ language_fallback_xpath = '//sup[contains(@class,"wb-language-fallback-indicator calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' media_xpath = value_xpath + '//div[contains(@class,"commons-media-caption")]//a' + def request(query, params): params['url'] = url_search.format( query=urlencode({'search': query})) From 0a37f909900f3d4a04c963430c93977d2c96f520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Bourrel?= Date: Fri, 6 Jul 2018 11:15:43 +0200 Subject: [PATCH 5/5] Fix wikidata tests with updated path to media --- tests/unit/engines/test_wikidata.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py index 545ef9ed8..62a409781 100644 --- a/tests/unit/engines/test_wikidata.py +++ b/tests/unit/engines/test_wikidata.py @@ -123,9 +123,10 @@ class TestWikidataEngine(SearxTestCase):
- - image.png - +
+ image.png +
2,687 × 3,356; 1.22 MB +
@@ -156,9 +157,10 @@ class TestWikidataEngine(SearxTestCase):
- - icon.png - +
+ icon.png +
671 × 671; 18 KB
+
@@ -179,9 +181,10 @@ class TestWikidataEngine(SearxTestCase):
- - logo.png - +
+ logo.png +
170 × 170; 1 KB +