From 44c9216c497862293318a48ad5c39f373cee95e6 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 25 Jan 2015 20:04:44 +0100 Subject: [PATCH 01/28] Sanitize extract_text --- searx/engines/xpath.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 72120304e..1a599dc0a 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -28,13 +28,13 @@ def extract_text(xpath_results): result = '' for e in xpath_results: result = result + extract_text(e) - return result + return result.strip() elif type(xpath_results) in [_ElementStringResult, _ElementUnicodeResult]: # it's a string return ''.join(xpath_results) else: # it's a element - return html_to_text(xpath_results.text_content()) + return html_to_text(xpath_results.text_content()).strip() def extract_url(xpath_results, search_url): From 525af2a031b787e22c3e310e61bfcd5fd1737bca Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 25 Jan 2015 20:14:37 +0100 Subject: [PATCH 02/28] Add bing in the test units --- .gitignore | 1 + searx/engines/bing.py | 9 ++-- searx/tests/engines/test_bing.py | 90 ++++++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 4 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 searx/tests/engines/test_bing.py diff --git a/.gitignore b/.gitignore index 08cf582aa..3268f8320 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ local/ parts/ searx.egg-info/ var/ +node_modules/ diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 5de461cfe..f9c323d05 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -14,6 +14,7 @@ from urllib import urlencode from cgi import escape from lxml import html +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -55,8 +56,8 @@ def response(resp): for result in dom.xpath('//div[@class="sa_cc"]'): link = result.xpath('.//h3/a')[0] url = link.attrib.get('href') - title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath('.//p//text()'))) + title = extract_text(link) + content = escape(extract_text(result.xpath('.//p'))) # append result results.append({'url': url, @@ -71,8 +72,8 @@ def response(resp): for result in dom.xpath('//li[@class="b_algo"]'): link = result.xpath('.//h2/a')[0] url = link.attrib.get('href') - title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath('.//p//text()'))) + title = extract_text(link) + content = escape(extract_text(result.xpath('.//p'))) # append result results.append({'url': url, diff --git a/searx/tests/engines/test_bing.py b/searx/tests/engines/test_bing.py new file mode 100644 index 000000000..52a049f01 --- /dev/null +++ b/searx/tests/engines/test_bing.py @@ -0,0 +1,90 @@ +from collections import defaultdict +import mock +from searx.engines import bing +from searx.testing import SearxTestCase + + +class TestBingEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = bing.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('bing.com' in params['url']) + self.assertTrue('SRCHHPGUSR' in params['cookies']) + self.assertTrue('fr' in params['cookies']['SRCHHPGUSR']) + + dicto['language'] = 'all' + params = bing.request(query, dicto) + self.assertTrue('SRCHHPGUSR' in params['cookies']) + self.assertTrue('en' in params['cookies']['SRCHHPGUSR']) + + def test_response(self): + self.assertRaises(AttributeError, bing.response, None) + self.assertRaises(AttributeError, bing.response, []) + self.assertRaises(AttributeError, bing.response, '') + self.assertRaises(AttributeError, bing.response, '[]') + + response = mock.Mock(content='') + self.assertEqual(bing.response(response), []) + + response = mock.Mock(content='') + self.assertEqual(bing.response(response), []) + + html = """ +
+
+ +
this.meta.com + + + + +
+

This should be the content.

+
+
+ """ + response = mock.Mock(content=html) + results = bing.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This should be the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') + + html = """ +
  • +
    + +
    this.meta.com + + + + +
    +

    This should be the content.

    +
    +
  • + """ + response = mock.Mock(content=html) + results = bing.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This should be the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 1ffdbe529..970131b48 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,2 +1,3 @@ +from searx.tests.engines.test_bing import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_github import * # noqa From 0f52cc75424b4b376b7b950801c9a91d4e24e282 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 25 Jan 2015 22:12:34 +0100 Subject: [PATCH 03/28] Deezer's unit test --- searx/tests/engines/test_deezer.py | 56 ++++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 2 files changed, 57 insertions(+) create mode 100644 searx/tests/engines/test_deezer.py diff --git a/searx/tests/engines/test_deezer.py b/searx/tests/engines/test_deezer.py new file mode 100644 index 000000000..e0b81e3d6 --- /dev/null +++ b/searx/tests/engines/test_deezer.py @@ -0,0 +1,56 @@ +from collections import defaultdict +import mock +from searx.engines import deezer +from searx.testing import SearxTestCase + + +class TestDeezerEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = deezer.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('deezer.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, deezer.response, None) + self.assertRaises(AttributeError, deezer.response, []) + self.assertRaises(AttributeError, deezer.response, '') + self.assertRaises(AttributeError, deezer.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(deezer.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(deezer.response(response), []) + + json = """ + {"data":[ + {"id":100, "title":"Title of track", + "link":"http:\/\/www.deezer.com\/track\/1094042","duration":232, + "artist":{"id":200,"name":"Artist Name", + "link":"http:\/\/www.deezer.com\/artist\/1217","type":"artist"}, + "album":{"id":118106,"title":"Album Title","type":"album"},"type":"track"} + ]} + """ + response = mock.Mock(text=json) + results = deezer.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title of track') + self.assertEqual(results[0]['url'], 'http://www.deezer.com/track/1094042') + self.assertEqual(results[0]['content'], 'Artist Name • Album Title • Title of track') + + json = """ + {"data":[ + {"id":200,"name":"Artist Name", + "link":"http:\/\/www.deezer.com\/artist\/1217","type":"artist"} + ]} + """ + response = mock.Mock(text=json) + results = deezer.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 970131b48..45c9d7e28 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,3 +1,4 @@ from searx.tests.engines.test_bing import * # noqa +from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_github import * # noqa From 192f255e13e3a38cd572b08a2aff4b6117ff0960 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 25 Jan 2015 22:33:02 +0100 Subject: [PATCH 04/28] Mixcloud's unit test --- searx/tests/engines/test_deezer.py | 1 + searx/tests/engines/test_mixcloud.py | 67 ++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 69 insertions(+) create mode 100644 searx/tests/engines/test_mixcloud.py diff --git a/searx/tests/engines/test_deezer.py b/searx/tests/engines/test_deezer.py index e0b81e3d6..c8c2c90f2 100644 --- a/searx/tests/engines/test_deezer.py +++ b/searx/tests/engines/test_deezer.py @@ -43,6 +43,7 @@ class TestDeezerEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'Title of track') self.assertEqual(results[0]['url'], 'http://www.deezer.com/track/1094042') self.assertEqual(results[0]['content'], 'Artist Name • Album Title • Title of track') + self.assertTrue('100' in results[0]['embedded']) json = """ {"data":[ diff --git a/searx/tests/engines/test_mixcloud.py b/searx/tests/engines/test_mixcloud.py new file mode 100644 index 000000000..a2ea47cf9 --- /dev/null +++ b/searx/tests/engines/test_mixcloud.py @@ -0,0 +1,67 @@ +from collections import defaultdict +import mock +from searx.engines import mixcloud +from searx.testing import SearxTestCase + + +class TestMixcloudEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = mixcloud.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('mixcloud.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, mixcloud.response, None) + self.assertRaises(AttributeError, mixcloud.response, []) + self.assertRaises(AttributeError, mixcloud.response, '') + self.assertRaises(AttributeError, mixcloud.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(mixcloud.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(mixcloud.response(response), []) + + json = """ + {"data":[ + { + "user": { + "url": "http://www.mixcloud.com/user/", + "username": "user", + "name": "User", + "key": "/user/" + }, + "key": "/user/this-is-the-url/", + "created_time": "2014-11-14T13:30:02Z", + "audio_length": 3728, + "slug": "this-is-the-url", + "name": "Title of track", + "url": "http://www.mixcloud.com/user/this-is-the-url/", + "updated_time": "2014-11-14T13:14:10Z" + } + ]} + """ + response = mock.Mock(text=json) + results = mixcloud.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title of track') + self.assertEqual(results[0]['url'], 'http://www.mixcloud.com/user/this-is-the-url/') + self.assertEqual(results[0]['content'], 'User') + self.assertTrue('http://www.mixcloud.com/user/this-is-the-url/' in results[0]['embedded']) + + json = """ + {"toto":[ + {"id":200,"name":"Artist Name", + "link":"http:\/\/www.mixcloud.com\/artist\/1217","type":"artist"} + ]} + """ + response = mock.Mock(text=json) + results = mixcloud.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 45c9d7e28..b42b1b89c 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -2,3 +2,4 @@ from searx.tests.engines.test_bing import * # noqa from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_github import * # noqa +from searx.tests.engines.test_mixcloud import * # noqa From 8f040e30adbbd615155a5075bec28ccadff10eff Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 26 Jan 2015 17:36:10 +0100 Subject: [PATCH 05/28] Flickr's test unit --- searx/tests/engines/test_flickr.py | 142 +++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 2 files changed, 143 insertions(+) create mode 100644 searx/tests/engines/test_flickr.py diff --git a/searx/tests/engines/test_flickr.py b/searx/tests/engines/test_flickr.py new file mode 100644 index 000000000..8b39e922f --- /dev/null +++ b/searx/tests/engines/test_flickr.py @@ -0,0 +1,142 @@ +from collections import defaultdict +import mock +from searx.engines import flickr +from searx.testing import SearxTestCase + + +class TestFlickrEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = flickr.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('flickr.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, flickr.response, None) + self.assertRaises(AttributeError, flickr.response, []) + self.assertRaises(AttributeError, flickr.response, '') + self.assertRaises(AttributeError, flickr.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(flickr.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(flickr.response(response), []) + + json = """ + { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", + "photo": [ + { "id": "15751017054", "owner": "66847915@N08", + "secret": "69c22afc40", "server": "7285", "farm": 8, + "title": "Photo title", "ispublic": 1, + "isfriend": 0, "isfamily": 0, + "description": { "_content": "Description" }, + "ownername": "Owner", + "url_o": "https:\/\/farm8.staticflickr.com\/7285\/15751017054_9178e0f963_o.jpg", + "height_o": "2100", "width_o": "2653", + "url_n": "https:\/\/farm8.staticflickr.com\/7285\/15751017054_69c22afc40_n.jpg", + "height_n": "253", "width_n": "320", + "url_z": "https:\/\/farm8.staticflickr.com\/7285\/15751017054_69c22afc40_z.jpg", + "height_z": "507", "width_z": "640" } + ] }, "stat": "ok" } + """ + response = mock.Mock(text=json) + results = flickr.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Photo title') + self.assertEqual(results[0]['url'], 'https://www.flickr.com/photos/66847915@N08/15751017054') + self.assertTrue('o.jpg' in results[0]['img_src']) + self.assertTrue('n.jpg' in results[0]['thumbnail_src']) + self.assertTrue('Owner' in results[0]['content']) + self.assertTrue('Description' in results[0]['content']) + + json = """ + { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", + "photo": [ + { "id": "15751017054", "owner": "66847915@N08", + "secret": "69c22afc40", "server": "7285", "farm": 8, + "title": "Photo title", "ispublic": 1, + "isfriend": 0, "isfamily": 0, + "description": { "_content": "Description" }, + "ownername": "Owner", + "url_z": "https:\/\/farm8.staticflickr.com\/7285\/15751017054_69c22afc40_z.jpg", + "height_z": "507", "width_z": "640" } + ] }, "stat": "ok" } + """ + response = mock.Mock(text=json) + results = flickr.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Photo title') + self.assertEqual(results[0]['url'], 'https://www.flickr.com/photos/66847915@N08/15751017054') + self.assertTrue('z.jpg' in results[0]['img_src']) + self.assertTrue('z.jpg' in results[0]['thumbnail_src']) + self.assertTrue('Owner' in results[0]['content']) + self.assertTrue('Description' in results[0]['content']) + + json = """ + { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", + "photo": [ + { "id": "15751017054", "owner": "66847915@N08", + "secret": "69c22afc40", "server": "7285", "farm": 8, + "title": "Photo title", "ispublic": 1, + "isfriend": 0, "isfamily": 0, + "description": { "_content": "Description" }, + "ownername": "Owner", + "url_o": "https:\/\/farm8.staticflickr.com\/7285\/15751017054_9178e0f963_o.jpg", + "height_o": "2100", "width_o": "2653" } + ] }, "stat": "ok" } + """ + response = mock.Mock(text=json) + results = flickr.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Photo title') + self.assertEqual(results[0]['url'], 'https://www.flickr.com/photos/66847915@N08/15751017054') + self.assertTrue('o.jpg' in results[0]['img_src']) + self.assertTrue('o.jpg' in results[0]['thumbnail_src']) + self.assertTrue('Owner' in results[0]['content']) + self.assertTrue('Description' in results[0]['content']) + + json = """ + { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", + "photo": [ + { "id": "15751017054", "owner": "66847915@N08", + "secret": "69c22afc40", "server": "7285", "farm": 8, + "title": "Photo title", "ispublic": 1, + "isfriend": 0, "isfamily": 0, + "description": { "_content": "Description" }, + "ownername": "Owner", + "url_n": "https:\/\/farm8.staticflickr.com\/7285\/15751017054_69c22afc40_n.jpg", + "height_n": "253", "width_n": "320" } + ] }, "stat": "ok" } + """ + response = mock.Mock(text=json) + results = flickr.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", + "toto": [] }, "stat": "ok" } + """ + response = mock.Mock(text=json) + results = flickr.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + {"toto":[ + {"id":200,"name":"Artist Name", + "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"} + ]} + """ + response = mock.Mock(text=json) + results = flickr.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index b42b1b89c..35280a329 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,5 +1,6 @@ from searx.tests.engines.test_bing import * # noqa from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_dummy import * # noqa +from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_github import * # noqa from searx.tests.engines.test_mixcloud import * # noqa From 4dba3739fb3b98572cbd51adab226376b5844105 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 26 Jan 2015 18:24:08 +0100 Subject: [PATCH 06/28] Youtube's unit test --- searx/engines/youtube.py | 4 +- searx/tests/engines/test_youtube.py | 204 ++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 searx/tests/engines/test_youtube.py diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py index 59f07c574..1375538a8 100644 --- a/searx/engines/youtube.py +++ b/searx/engines/youtube.py @@ -57,7 +57,7 @@ def response(resp): url = [x['href'] for x in result['link'] if x['type'] == 'text/html'] if not url: - return + continue # remove tracking url = url[0].replace('feature=youtube_gdata', '') @@ -73,7 +73,7 @@ def response(resp): pubdate = result['published']['$t'] publishedDate = parser.parse(pubdate) - if result['media$group']['media$thumbnail']: + if 'media$thumbnail' in result['media$group']: thumbnail = result['media$group']['media$thumbnail'][0]['url'] content = result['content']['$t'] diff --git a/searx/tests/engines/test_youtube.py b/searx/tests/engines/test_youtube.py new file mode 100644 index 000000000..434305228 --- /dev/null +++ b/searx/tests/engines/test_youtube.py @@ -0,0 +1,204 @@ +from collections import defaultdict +import mock +from searx.engines import youtube +from searx.testing import SearxTestCase + + +class TestYoutubeEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = youtube.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('youtube.com' in params['url']) + self.assertTrue('fr' in params['url']) + + dicto['language'] = 'all' + params = youtube.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, youtube.response, None) + self.assertRaises(AttributeError, youtube.response, []) + self.assertRaises(AttributeError, youtube.response, '') + self.assertRaises(AttributeError, youtube.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(youtube.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(youtube.response(response), []) + + json = """ + {"feed":{"entry":[{ + "id":{"$t":"http://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM"}, + "published":{"$t":"2015-01-23T21:25:00.000Z"}, + "updated":{"$t":"2015-01-26T14:38:15.000Z"}, + "title":{"$t":"Title", + "type":"text"},"content":{"$t":"Description","type":"text"}, + "link":[{"rel":"alternate","type":"text/html", + "href":"https://www.youtube.com/watch?v=DIVZCPfAOeM&feature=youtube_gdata"}, + {"rel":"http://gdata.youtube.com/schemas/2007#video.related", + "type":"application/atom+xml", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM/related"}, + {"rel":"http://gdata.youtube.com/schemas/2007#mobile","type":"text/html", + "href":"https://m.youtube.com/details?v=DIVZCPfAOeM"}, + {"rel":"self","type":"application/atom+xml", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM"}], + "author":[{"name":{"$t":"Cauet"}, + "uri":{"$t":"https://gdata.youtube.com/feeds/api/users/cauetofficiel"} }], + "gd$comments":{"gd$feedLink":{"rel":"http://gdata.youtube.com/schemas/2007#comments", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM/comments", + "countHint":8} }, + "media$group":{"media$category":[{"$t":"Comedy","label":"Comedy", + "scheme":"http://gdata.youtube.com/schemas/2007/categories.cat"}], + "media$content":[{"url":"https://www.youtube.com/v/DIVZCPfAOeM?version=3&f=videos&app=youtube_gdata", + "type":"application/x-shockwave-flash","medium":"video", + "isDefault":"true","expression":"full","duration":354,"yt$format":5}, + {"url":"rtsp://r1---sn-cg07luel.c.youtube.com/CiILENy73wIaGQnjOcD3CFmFDBMYDSANFEgGUgZ2aWRlb3MM/0/0/0/video.3gp", + "type":"video/3gpp","medium":"video","expression":"full","duration":354, + "yt$format":1}, + {"url":"rtsp://r1---sn-cg07luel.c.youtube.com/CiILENy73wIaGQnjOcD3CFmFDBMYESARFEgGUgZ2aWRlb3MM/0/0/0/video.3gp", + "type":"video/3gpp","medium":"video","expression":"full","duration":354,"yt$format":6}], + "media$description":{"$t":"Desc","type":"plain"}, + "media$keywords":{}, + "media$player":[{"url":"https://www.youtube.com/watch?v=DIVZCPfAOeM&feature=youtube_gdata_player"}], + "media$thumbnail":[{"url":"https://i.ytimg.com/vi/DIVZCPfAOeM/0.jpg", + "height":360,"width":480,"time":"00:02:57"}, + {"url":"https://i.ytimg.com/vi/DIVZCPfAOeM/1.jpg","height":90,"width":120,"time":"00:01:28.500"}, + {"url":"https://i.ytimg.com/vi/DIVZCPfAOeM/2.jpg","height":90,"width":120,"time":"00:02:57"}, + {"url":"https://i.ytimg.com/vi/DIVZCPfAOeM/3.jpg","height":90,"width":120,"time":"00:04:25.500"}], + "media$title":{"$t":"Title","type":"plain"}, + "yt$duration":{"seconds":"354"} }, + "gd$rating":{"average":4.932159,"max":5,"min":1,"numRaters":1533, + "rel":"http://schemas.google.com/g/2005#overall"}, + "yt$statistics":{"favoriteCount":"0","viewCount":"92464"} } + ] + } + } + """ + response = mock.Mock(text=json) + results = youtube.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM') + self.assertEqual(results[0]['content'], 'Description') + self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/0.jpg') + self.assertTrue('DIVZCPfAOeM' in results[0]['embedded']) + + json = """ + {"feed":{"entry":[{ + "id":{"$t":"http://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM"}, + "published":{"$t":"2015-01-23T21:25:00.000Z"}, + "updated":{"$t":"2015-01-26T14:38:15.000Z"}, + "title":{"$t":"Title", + "type":"text"},"content":{"$t":"Description","type":"text"}, + "link":[{"rel":"http://gdata.youtube.com/schemas/2007#video.related", + "type":"application/atom+xml", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM/related"}, + {"rel":"self","type":"application/atom+xml", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM"}], + "author":[{"name":{"$t":"Cauet"}, + "uri":{"$t":"https://gdata.youtube.com/feeds/api/users/cauetofficiel"} }], + "gd$comments":{"gd$feedLink":{"rel":"http://gdata.youtube.com/schemas/2007#comments", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM/comments", + "countHint":8} }, + "media$group":{"media$category":[{"$t":"Comedy","label":"Comedy", + "scheme":"http://gdata.youtube.com/schemas/2007/categories.cat"}], + "media$content":[{"url":"https://www.youtube.com/v/DIVZCPfAOeM?version=3&f=videos&app=youtube_gdata", + "type":"application/x-shockwave-flash","medium":"video", + "isDefault":"true","expression":"full","duration":354,"yt$format":5}, + {"url":"rtsp://r1---sn-cg07luel.c.youtube.com/CiILENy73wIaGQnjOcD3CFmFDBMYDSANFEgGUgZ2aWRlb3MM/0/0/0/video.3gp", + "type":"video/3gpp","medium":"video","expression":"full","duration":354, + "yt$format":1}, + {"url":"rtsp://r1---sn-cg07luel.c.youtube.com/CiILENy73wIaGQnjOcD3CFmFDBMYESARFEgGUgZ2aWRlb3MM/0/0/0/video.3gp", + "type":"video/3gpp","medium":"video","expression":"full","duration":354,"yt$format":6}], + "media$description":{"$t":"Desc","type":"plain"}, + "media$keywords":{}, + "media$player":[{"url":"https://www.youtube.com/watch?v=DIVZCPfAOeM&feature=youtube_gdata_player"}], + "media$thumbnail":[{"url":"https://i.ytimg.com/vi/DIVZCPfAOeM/0.jpg", + "height":360,"width":480,"time":"00:02:57"}, + {"url":"https://i.ytimg.com/vi/DIVZCPfAOeM/1.jpg","height":90,"width":120,"time":"00:01:28.500"}, + {"url":"https://i.ytimg.com/vi/DIVZCPfAOeM/2.jpg","height":90,"width":120,"time":"00:02:57"}, + {"url":"https://i.ytimg.com/vi/DIVZCPfAOeM/3.jpg","height":90,"width":120,"time":"00:04:25.500"}], + "media$title":{"$t":"Title","type":"plain"}, + "yt$duration":{"seconds":"354"} }, + "gd$rating":{"average":4.932159,"max":5,"min":1,"numRaters":1533, + "rel":"http://schemas.google.com/g/2005#overall"}, + "yt$statistics":{"favoriteCount":"0","viewCount":"92464"} } + ] + } + } + """ + response = mock.Mock(text=json) + results = youtube.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + {"feed":{"entry":[{ + "id":{"$t":"http://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM"}, + "published":{"$t":"2015-01-23T21:25:00.000Z"}, + "updated":{"$t":"2015-01-26T14:38:15.000Z"}, + "title":{"$t":"Title", + "type":"text"},"content":{"$t":"Description","type":"text"}, + "link":[{"rel":"alternate","type":"text/html", + "href":"https://www.youtube.com/watch?v=DIVZCPfAOeM"}, + {"rel":"http://gdata.youtube.com/schemas/2007#video.related", + "type":"application/atom+xml", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM/related"}, + {"rel":"http://gdata.youtube.com/schemas/2007#mobile","type":"text/html", + "href":"https://m.youtube.com/details?v=DIVZCPfAOeM"}, + {"rel":"self","type":"application/atom+xml", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM"}], + "author":[{"name":{"$t":"Cauet"}, + "uri":{"$t":"https://gdata.youtube.com/feeds/api/users/cauetofficiel"} }], + "gd$comments":{"gd$feedLink":{"rel":"http://gdata.youtube.com/schemas/2007#comments", + "href":"https://gdata.youtube.com/feeds/api/videos/DIVZCPfAOeM/comments", + "countHint":8} }, + "media$group":{"media$category":[{"$t":"Comedy","label":"Comedy", + "scheme":"http://gdata.youtube.com/schemas/2007/categories.cat"}], + "media$content":[{"url":"https://www.youtube.com/v/DIVZCPfAOeM?version=3&f=videos&app=youtube_gdata", + "type":"application/x-shockwave-flash","medium":"video", + "isDefault":"true","expression":"full","duration":354,"yt$format":5}, + {"url":"rtsp://r1---sn-cg07luel.c.youtube.com/CiILENy73wIaGQnjOcD3CFmFDBMYDSANFEgGUgZ2aWRlb3MM/0/0/0/video.3gp", + "type":"video/3gpp","medium":"video","expression":"full","duration":354, + "yt$format":1}, + {"url":"rtsp://r1---sn-cg07luel.c.youtube.com/CiILENy73wIaGQnjOcD3CFmFDBMYESARFEgGUgZ2aWRlb3MM/0/0/0/video.3gp", + "type":"video/3gpp","medium":"video","expression":"full","duration":354,"yt$format":6}], + "media$description":{"$t":"Desc","type":"plain"}, + "media$keywords":{}, + "media$player":[{"url":"https://www.youtube.com/watch?v=DIVZCPfAOeM&feature=youtube_gdata_player"}], + "media$title":{"$t":"Title","type":"plain"}, + "yt$duration":{"seconds":"354"} }, + "gd$rating":{"average":4.932159,"max":5,"min":1,"numRaters":1533, + "rel":"http://schemas.google.com/g/2005#overall"}, + "yt$statistics":{"favoriteCount":"0","viewCount":"92464"} } + ] + } + } + """ + response = mock.Mock(text=json) + results = youtube.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM') + self.assertEqual(results[0]['content'], 'Description') + self.assertEqual(results[0]['thumbnail'], '') + self.assertTrue('DIVZCPfAOeM' in results[0]['embedded']) + + json = """ + {"toto":{"entry":[] + } + } + """ + response = mock.Mock(text=json) + results = youtube.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 35280a329..b99c30070 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -4,3 +4,4 @@ from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_github import * # noqa from searx.tests.engines.test_mixcloud import * # noqa +from searx.tests.engines.test_youtube import * # noqa From cfe81d741cdd2517c4587071e4afbdd0adb923bd Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 27 Jan 2015 20:03:33 +0100 Subject: [PATCH 07/28] A bit of utils unit tests --- searx/tests/test_utils.py | 22 ++++++++++++++++++++++ searx/utils.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/searx/tests/test_utils.py b/searx/tests/test_utils.py index 817fd4372..abe411c2b 100644 --- a/searx/tests/test_utils.py +++ b/searx/tests/test_utils.py @@ -10,6 +10,11 @@ class TestUtils(SearxTestCase): self.assertIsNotNone(utils.gen_useragent()) self.assertTrue(utils.gen_useragent().startswith('Mozilla')) + def test_searx_useragent(self): + self.assertIsInstance(utils.searx_useragent(), str) + self.assertIsNotNone(utils.searx_useragent()) + self.assertTrue(utils.searx_useragent().startswith('searx')) + def test_highlight_content(self): self.assertEqual(utils.highlight_content(0, None), None) self.assertEqual(utils.highlight_content(None, None), None) @@ -29,6 +34,23 @@ class TestUtils(SearxTestCase): query = 'a test' self.assertEqual(utils.highlight_content(content, query), content) + def test_html_to_text(self): + html = """ + + """ + self.assertIsInstance(utils.html_to_text(html), unicode) + self.assertIsNotNone(utils.html_to_text(html)) + self.assertEqual(utils.html_to_text(html), "Test text") + class TestHTMLTextExtractor(SearxTestCase): diff --git a/searx/utils.py b/searx/utils.py index f15f8a4bc..59d4b85be 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -115,7 +115,7 @@ class HTMLTextExtractor(HTMLParser): self.result.append(name) def get_text(self): - return u''.join(self.result) + return u''.join(self.result).strip() def html_to_text(html): From eca5de73a7f38958d3ba14930b42aaa5a5fbf989 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 27 Jan 2015 22:37:11 +0100 Subject: [PATCH 08/28] Searchcode code's test unit --- searx/tests/engines/test_searchcode_code.py | 75 +++++++++++++++++++++ searx/tests/test_engines.py | 1 + 2 files changed, 76 insertions(+) create mode 100644 searx/tests/engines/test_searchcode_code.py diff --git a/searx/tests/engines/test_searchcode_code.py b/searx/tests/engines/test_searchcode_code.py new file mode 100644 index 000000000..c0ac2025c --- /dev/null +++ b/searx/tests/engines/test_searchcode_code.py @@ -0,0 +1,75 @@ +from collections import defaultdict +import mock +from searx.engines import searchcode_code +from searx.testing import SearxTestCase + + +class TestSearchcodeCodeEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = searchcode_code.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('searchcode.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, searchcode_code.response, None) + self.assertRaises(AttributeError, searchcode_code.response, []) + self.assertRaises(AttributeError, searchcode_code.response, '') + self.assertRaises(AttributeError, searchcode_code.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(searchcode_code.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(searchcode_code.response(response), []) + + json = """ + { + "matchterm": "test", + "previouspage": null, + "searchterm": "test", + "query": "test", + "total": 1000, + "page": 0, + "nextpage": 1, + "results": [ + { + "repo": "https://repo", + "linescount": 1044, + "location": "/tests", + "name": "Name", + "url": "https://url", + "md5hash": "ecac6e479edd2b9406c9e08603cec655", + "lines": { + "1": "// Test 011", + "2": "// Source: " + }, + "id": 51223527, + "filename": "File.CPP" + } + ] + } + """ + response = mock.Mock(text=json) + results = searchcode_code.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Name - File.CPP') + self.assertEqual(results[0]['url'], 'https://url') + self.assertEqual(results[0]['repository'], 'https://repo') + self.assertEqual(results[0]['code_language'], 'cpp') + + json = """ + {"toto":[ + {"id":200,"name":"Artist Name", + "link":"http:\/\/www.searchcode_code.com\/artist\/1217","type":"artist"} + ]} + """ + response = mock.Mock(text=json) + results = searchcode_code.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index b99c30070..e7648bc08 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -4,4 +4,5 @@ from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_github import * # noqa from searx.tests.engines.test_mixcloud import * # noqa +from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_youtube import * # noqa From 0f81aa8410623e790f4ad01b32e2b37f6258356a Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 27 Jan 2015 22:38:56 +0100 Subject: [PATCH 09/28] Searchcode doc's test unit --- searx/tests/engines/test_searchcode_doc.py | 73 ++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 2 files changed, 74 insertions(+) create mode 100644 searx/tests/engines/test_searchcode_doc.py diff --git a/searx/tests/engines/test_searchcode_doc.py b/searx/tests/engines/test_searchcode_doc.py new file mode 100644 index 000000000..b9dcf380b --- /dev/null +++ b/searx/tests/engines/test_searchcode_doc.py @@ -0,0 +1,73 @@ +from collections import defaultdict +import mock +from searx.engines import searchcode_doc +from searx.testing import SearxTestCase + + +class TestSearchcodeDocEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = searchcode_doc.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('searchcode.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, searchcode_doc.response, None) + self.assertRaises(AttributeError, searchcode_doc.response, []) + self.assertRaises(AttributeError, searchcode_doc.response, '') + self.assertRaises(AttributeError, searchcode_doc.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(searchcode_doc.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(searchcode_doc.response(response), []) + + json = """ + { + "matchterm": "test", + "previouspage": null, + "searchterm": "test", + "query": "test", + "total": 60, + "page": 0, + "nextpage": 1, + "results": [ + { + "synopsis": "Synopsis", + "displayname": null, + "name": "test", + "url": "http://url", + "type": "Type", + "icon": null, + "namespace": "Namespace", + "description": "Description" + } + ] + } + """ + response = mock.Mock(text=json) + results = searchcode_doc.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], '[Type] Namespace test') + self.assertEqual(results[0]['url'], 'http://url') + self.assertIn('Synopsis', results[0]['content']) + self.assertIn('Type', results[0]['content']) + self.assertIn('test', results[0]['content']) + self.assertIn('Description', results[0]['content']) + + json = """ + {"toto":[ + {"id":200,"name":"Artist Name", + "link":"http:\/\/www.searchcode_doc.com\/artist\/1217","type":"artist"} + ]} + """ + response = mock.Mock(text=json) + results = searchcode_doc.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index e7648bc08..f46e3dc2a 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -5,4 +5,5 @@ from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_github import * # noqa from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa +from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_youtube import * # noqa From 3282e62ff92f1c2158cb169d2a21a5988766450c Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 27 Jan 2015 22:39:25 +0100 Subject: [PATCH 10/28] Searchcode engines corrections --- searx/engines/searchcode_code.py | 2 +- searx/engines/searchcode_doc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 655818da2..f276697b1 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -42,7 +42,7 @@ def response(resp): search_results = loads(resp.text) # parse results - for result in search_results['results']: + for result in search_results.get('results', []): href = result['url'] title = "" + result['name'] + " - " + result['filename'] repo = result['repo'] diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index b5b7159be..76da8d752 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -35,7 +35,7 @@ def response(resp): search_results = loads(resp.text) # parse results - for result in search_results['results']: + for result in search_results.get('results', []): href = result['url'] title = "[" + result['type'] + "] " +\ result['namespace'] +\ From 92368a410749a4a057b476eb10c524f0fc133a0b Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 27 Jan 2015 23:20:57 +0100 Subject: [PATCH 11/28] Dailymotion's unit test --- searx/tests/engines/test_dailymotion.py | 74 +++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 2 files changed, 75 insertions(+) create mode 100644 searx/tests/engines/test_dailymotion.py diff --git a/searx/tests/engines/test_dailymotion.py b/searx/tests/engines/test_dailymotion.py new file mode 100644 index 000000000..4c31ff5d5 --- /dev/null +++ b/searx/tests/engines/test_dailymotion.py @@ -0,0 +1,74 @@ +from collections import defaultdict +import mock +from searx.engines import dailymotion +from searx.testing import SearxTestCase + + +class TestDailymotionEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = dailymotion.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('dailymotion.com' in params['url']) + self.assertTrue('fr' in params['url']) + + dicto['language'] = 'all' + params = dailymotion.request(query, dicto) + self.assertTrue('en' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, dailymotion.response, None) + self.assertRaises(AttributeError, dailymotion.response, []) + self.assertRaises(AttributeError, dailymotion.response, '') + self.assertRaises(AttributeError, dailymotion.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(dailymotion.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(dailymotion.response(response), []) + + json = """ + { + "page": 1, + "limit": 5, + "explicit": false, + "total": 289487, + "has_more": true, + "list": [ + { + "created_time": 1422173451, + "title": "Title", + "description": "Description", + "duration": 81, + "url": "http://www.url", + "thumbnail_360_url": "http://thumbnail", + "id": "x2fit7q" + } + ] + } + """ + response = mock.Mock(text=json) + results = dailymotion.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url') + self.assertEqual(results[0]['content'], 'Description') + self.assertIn('x2fit7q', results[0]['embedded']) + + json = """ + {"toto":[ + {"id":200,"name":"Artist Name", + "link":"http:\/\/www.dailymotion.com\/artist\/1217","type":"artist"} + ]} + """ + response = mock.Mock(text=json) + results = dailymotion.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index f46e3dc2a..64d220bcd 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,4 +1,5 @@ from searx.tests.engines.test_bing import * # noqa +from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa From 1d255061c7422045ef912a471500513832e0319f Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Thu, 29 Jan 2015 00:26:12 +0100 Subject: [PATCH 12/28] Digg's unit test --- searx/engines/digg.py | 2 +- searx/tests/engines/test_digg.py | 57 ++++++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 searx/tests/engines/test_digg.py diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 8c457d6b9..1b5f2c8e4 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -44,7 +44,7 @@ def response(resp): search_result = loads(resp.text) - if search_result['html'] == '': + if 'html' not in search_result or search_result['html'] == '': return results dom = html.fromstring(search_result['html']) diff --git a/searx/tests/engines/test_digg.py b/searx/tests/engines/test_digg.py new file mode 100644 index 000000000..7e9006c0d --- /dev/null +++ b/searx/tests/engines/test_digg.py @@ -0,0 +1,57 @@ +from collections import defaultdict +import mock +from searx.engines import digg +from searx.testing import SearxTestCase + + +class TestDiggEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = digg.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('digg.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, digg.response, None) + self.assertRaises(AttributeError, digg.response, []) + self.assertRaises(AttributeError, digg.response, '') + self.assertRaises(AttributeError, digg.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(digg.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(digg.response(response), []) + + json = """ + { + "status": "ok", + "num": 10, + "next_position": 20, + "html": "" + } + """ + response = mock.Mock(text=json) + results = digg.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title of article') + self.assertEqual(results[0]['url'], 'http://url.of.link') + self.assertEqual(results[0]['thumbnail'], 'http://url.of.image.jpeg') + self.assertEqual(results[0]['content'], '') + + json = """ + { + "status": "error", + "num": 10, + "next_position": 20 + } + """ + response = mock.Mock(text=json) + results = digg.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 64d220bcd..309e83f16 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,6 +1,7 @@ from searx.tests.engines.test_bing import * # noqa from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa +from searx.tests.engines.test_digg import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_github import * # noqa From d4957045513d6fb32dcffbc7ea87483479a8cb6e Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Thu, 29 Jan 2015 01:13:33 +0100 Subject: [PATCH 13/28] Deviant Art's unit test --- searx/engines/deviantart.py | 7 +- searx/tests/engines/test_deviantart.py | 118 +++++++++++++++++++++++++ searx/tests/engines/test_digg.py | 46 +++++++++- searx/tests/test_engines.py | 1 + 4 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 searx/tests/engines/test_deviantart.py diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index 6284cf598..4198e8c76 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -14,6 +14,7 @@ from urllib import urlencode from urlparse import urljoin from lxml import html import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['images'] @@ -50,9 +51,9 @@ def response(resp): for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'): link = result.xpath('.//a[contains(@class, "thumb")]')[0] url = urljoin(base_url, link.attrib.get('href')) - title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') # noqa - title = ''.join(title_links[0].xpath('.//text()')) - thumbnail_src = link.xpath('.//img')[0].attrib['src'] + title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') + title = extract_text(title_links[0]) + thumbnail_src = link.xpath('.//img')[0].attrib.get('src') img_src = regex.sub('/', thumbnail_src) # append result diff --git a/searx/tests/engines/test_deviantart.py b/searx/tests/engines/test_deviantart.py new file mode 100644 index 000000000..9cf68d0b8 --- /dev/null +++ b/searx/tests/engines/test_deviantart.py @@ -0,0 +1,118 @@ +from collections import defaultdict +import mock +from searx.engines import deviantart +from searx.testing import SearxTestCase + + +class TestDeviantartEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = deviantart.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('deviantart.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, deviantart.response, None) + self.assertRaises(AttributeError, deviantart.response, []) + self.assertRaises(AttributeError, deviantart.response, '') + self.assertRaises(AttributeError, deviantart.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(deviantart.response(response), []) + + response = mock.Mock(status_code=302) + self.assertEqual(deviantart.response(response), []) + + html = """ +
    + + + + + + + Test + + + + + + + Title of image + + + + 5 years ago + + in Animation + + + + More Like This + + + +
    + """ + response = mock.Mock(text=html) + results = deviantart.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title of image') + self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url') + self.assertNotIn('content', results[0]) + self.assertEqual(results[0]['thumbnail_src'], 'http://url.of.thumbnail') + + html = """ + + + + + + Test + + + + + + + Title of image + + + + 5 years ago + + in Animation + + + + More Like This + + + """ + response = mock.Mock(text=html) + results = deviantart.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_digg.py b/searx/tests/engines/test_digg.py index 7e9006c0d..6e7c9cc99 100644 --- a/searx/tests/engines/test_digg.py +++ b/searx/tests/engines/test_digg.py @@ -32,9 +32,53 @@ class TestDiggEngine(SearxTestCase): "status": "ok", "num": 10, "next_position": 20, - "html": "" + "html": "" } """ + json = json.replace('\r\n', '').replace('\n', '').replace('\r', '') response = mock.Mock(text=json) results = digg.response(response) self.assertEqual(type(results), list) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 309e83f16..561b436ff 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,6 +1,7 @@ from searx.tests.engines.test_bing import * # noqa from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa +from searx.tests.engines.test_deviantart import * # noqa from searx.tests.engines.test_digg import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa From dad0434f34f04ada2b4b0961bbb714e25c752677 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Thu, 29 Jan 2015 20:15:52 +0100 Subject: [PATCH 14/28] Bing images' unit test --- searx/engines/bing_images.py | 5 +- searx/tests/engines/test_bing_images.py | 268 ++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 searx/tests/engines/test_bing_images.py diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 9ae498427..9d1c22f5a 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -33,7 +33,10 @@ def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 # required for cookie - language = 'en-US' + if params['language'] == 'all': + language = 'en-US' + else: + language = params['language'].replace('_', '-') search_path = search_string.format( query=urlencode({'q': query}), diff --git a/searx/tests/engines/test_bing_images.py b/searx/tests/engines/test_bing_images.py new file mode 100644 index 000000000..59c134623 --- /dev/null +++ b/searx/tests/engines/test_bing_images.py @@ -0,0 +1,268 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import bing_images +from searx.testing import SearxTestCase + + +class TestBingImagesEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = bing_images.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('bing.com' in params['url']) + self.assertTrue('SRCHHPGUSR' in params['cookies']) + self.assertTrue('fr' in params['cookies']['SRCHHPGUSR']) + + dicto['language'] = 'all' + params = bing_images.request(query, dicto) + self.assertIn('SRCHHPGUSR', params['cookies']) + self.assertIn('en', params['cookies']['SRCHHPGUSR']) + + def test_response(self): + self.assertRaises(AttributeError, bing_images.response, None) + self.assertRaises(AttributeError, bing_images.response, []) + self.assertRaises(AttributeError, bing_images.response, '') + self.assertRaises(AttributeError, bing_images.response, '[]') + + response = mock.Mock(content='') + self.assertEqual(bing_images.response(response), []) + + response = mock.Mock(content='') + self.assertEqual(bing_images.response(response), []) + + html = """ +
    + + + +
    + """ + html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') + response = mock.Mock(content=html) + results = bing_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Test Query') + self.assertEqual(results[0]['url'], 'http://www.page.url/') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['thumbnail_src'], 'http://ts1.mm.bing.net/th?id=HN.608003696942779811') + self.assertEqual(results[0]['img_src'], 'http://test.url/Test%20Query.jpg') + + html = """ + + + + """ + response = mock.Mock(content=html) + results = bing_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + html = """ +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    +
    + + + +
    + """ + html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') + response = mock.Mock(content=html) + results = bing_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 10) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 561b436ff..fab911d13 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,4 +1,5 @@ from searx.tests.engines.test_bing import * # noqa +from searx.tests.engines.test_bing_images import * # noqa from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_deviantart import * # noqa From efde2c21c8656ad21b24980b516ddbbf2e209523 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Thu, 29 Jan 2015 20:56:57 +0100 Subject: [PATCH 15/28] Bing news' unit test I have no idea why coverage tell 97% and 2 misses in branches. If anyone has an idea... --- searx/engines/bing_news.py | 29 ++-- searx/tests/engines/test_bing_news.py | 236 ++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 249 insertions(+), 17 deletions(-) create mode 100644 searx/tests/engines/test_bing_news.py diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 789a23b89..182bd36b5 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -15,6 +15,7 @@ from lxml import html from datetime import datetime, timedelta from dateutil import parser import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['news'] @@ -42,6 +43,7 @@ def request(query, params): params['cookies']['_FP'] = "ui=en-US" params['url'] = base_url + search_path + return params @@ -55,44 +57,37 @@ def response(resp): for result in dom.xpath('//div[@class="sn_r"]'): link = result.xpath('.//div[@class="newstitle"]/a')[0] url = link.attrib.get('href') - title = ' '.join(link.xpath('.//text()')) - contentXPath = result.xpath('.//div[@class="sn_txt"]/div' - '//span[@class="sn_snip"]//text()') + title = extract_text(link) + contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]') if contentXPath is not None: - content = escape(' '.join(contentXPath)) + content = escape(extract_text(contentXPath)) # parse publishedDate publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div' '//span[contains(@class,"sn_ST")]' - '//span[contains(@class,"sn_tm")]' - '//text()') + '//span[contains(@class,"sn_tm")]') + if publishedDateXPath is not None: - publishedDate = escape(' '.join(publishedDateXPath)) + publishedDate = escape(extract_text(publishedDateXPath)) if re.match("^[0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) - publishedDate = datetime.now()\ - - timedelta(minutes=int(timeNumbers[0])) + publishedDate = datetime.now() - timedelta(minutes=int(timeNumbers[0])) elif re.match("^[0-9]+ hour(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) - publishedDate = datetime.now()\ - - timedelta(hours=int(timeNumbers[0])) - elif re.match("^[0-9]+ hour(s|)," - " [0-9]+ minute(s|) ago$", publishedDate): + publishedDate = datetime.now() - timedelta(hours=int(timeNumbers[0])) + elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) elif re.match("^[0-9]+ day(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) - publishedDate = datetime.now()\ - - timedelta(days=int(timeNumbers[0])) + publishedDate = datetime.now() - timedelta(days=int(timeNumbers[0])) else: try: - # FIXME use params['language'] to parse either mm/dd or dd/mm publishedDate = parser.parse(publishedDate, dayfirst=False) except TypeError: - # FIXME publishedDate = datetime.now() # append result diff --git a/searx/tests/engines/test_bing_news.py b/searx/tests/engines/test_bing_news.py new file mode 100644 index 000000000..f22b80e87 --- /dev/null +++ b/searx/tests/engines/test_bing_news.py @@ -0,0 +1,236 @@ +from collections import defaultdict +import mock +from searx.engines import bing_news +from searx.testing import SearxTestCase + + +class TestBingNewsEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = bing_news.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('bing.com', params['url']) + self.assertIn('fr', params['url']) + self.assertIn('_FP', params['cookies']) + self.assertIn('en', params['cookies']['_FP']) + + dicto['language'] = 'all' + params = bing_news.request(query, dicto) + self.assertIn('en', params['url']) + self.assertIn('_FP', params['cookies']) + self.assertIn('en', params['cookies']['_FP']) + + def test_response(self): + self.assertRaises(AttributeError, bing_news.response, None) + self.assertRaises(AttributeError, bing_news.response, []) + self.assertRaises(AttributeError, bing_news.response, '') + self.assertRaises(AttributeError, bing_news.response, '[]') + + response = mock.Mock(content='') + self.assertEqual(bing_news.response(response), []) + + response = mock.Mock(content='') + self.assertEqual(bing_news.response(response), []) + + html = """ +
    + +
    + + + +
    +
    +
    + Article Content + + metronews.fr +  · + 44 minutes ago + +
    +
    +
    + """ + response = mock.Mock(content=html) + results = bing_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://url.of.article/') + self.assertEqual(results[0]['content'], 'Article Content') + + html = """ +
    + +
    + + + +
    +
    +
    + Article Content + + metronews.fr +  · + 44 minutes ago + +
    +
    +
    +
    + +
    + + + +
    +
    +
    + Article Content + + metronews.fr +  · + 3 hours, 44 minutes ago + +
    +
    +
    +
    + +
    + + + +
    +
    +
    + Article Content + + metronews.fr +  · + 44 hours ago + +
    +
    +
    +
    + +
    + + + +
    +
    +
    + Article Content + + metronews.fr +  · + 2 days ago + +
    +
    +
    +
    + +
    + + + +
    +
    +
    + Article Content + + metronews.fr +  · + 27/01/2015 + +
    +
    +
    +
    + +
    + + + +
    +
    +
    + Article Content + + metronews.fr +  · + Il y a 3 heures + +
    +
    +
    + """ + response = mock.Mock(content=html) + results = bing_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 6) + + html = """ + +
    + + + +
    +
    +
    + Article Content + + metronews.fr +  · + 44 minutes ago + +
    +
    + """ + response = mock.Mock(content=html) + results = bing_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index fab911d13..bfdd1de4c 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,5 +1,6 @@ from searx.tests.engines.test_bing import * # noqa from searx.tests.engines.test_bing_images import * # noqa +from searx.tests.engines.test_bing_news import * # noqa from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_deviantart import * # noqa From 5761d6f0ab071bdae05ecef1966dd3e4cbec6eee Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Thu, 29 Jan 2015 21:19:59 +0100 Subject: [PATCH 16/28] Bing news engine corrections XPath *never* return None. (I found the HTML report of coverage) --- searx/engines/bing_news.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 182bd36b5..e6adb2644 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -59,16 +59,14 @@ def response(resp): url = link.attrib.get('href') title = extract_text(link) contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]') - if contentXPath is not None: - content = escape(extract_text(contentXPath)) + content = escape(extract_text(contentXPath)) # parse publishedDate publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div' '//span[contains(@class,"sn_ST")]' '//span[contains(@class,"sn_tm")]') - if publishedDateXPath is not None: - publishedDate = escape(extract_text(publishedDateXPath)) + publishedDate = escape(extract_text(publishedDateXPath)) if re.match("^[0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) From a3d444ab85dbb85dc3200c686ec3323dbb7008cb Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Fri, 30 Jan 2015 19:52:44 +0100 Subject: [PATCH 17/28] BTDigg's unit test --- searx/engines/btdigg.py | 11 +- searx/tests/engines/test_btdigg.py | 384 +++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 388 insertions(+), 8 deletions(-) create mode 100644 searx/tests/engines/test_btdigg.py diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 973ede9ac..944250628 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -23,11 +23,6 @@ paging = True url = 'https://btdigg.org' search_url = url + '/search?q={search_term}&p={pageno}' -# specific xpath variables -magnet_xpath = './/a[@title="Torrent magnet link"]' -torrent_xpath = './/a[@title="Download torrent file"]' -content_xpath = './/span[@class="font11px lightgrey block"]' - # do search-request def request(query, params): @@ -52,8 +47,8 @@ def response(resp): # parse results for result in search_res: link = result.xpath('.//td[@class="torrent_name"]//a')[0] - href = urljoin(url, link.attrib['href']) - title = escape(extract_text(link.xpath('.//text()'))) + href = urljoin(url, link.attrib.get('href')) + title = escape(extract_text(link)) content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0])) content = "
    ".join(content.split("\n")) @@ -81,7 +76,7 @@ def response(resp): filesize = int(filesize * 1024 * 1024 * 1024) elif filesize_multiplier == 'MB': filesize = int(filesize * 1024 * 1024) - elif filesize_multiplier == 'kb': + elif filesize_multiplier == 'KB': filesize = int(filesize * 1024) except: filesize = None diff --git a/searx/tests/engines/test_btdigg.py b/searx/tests/engines/test_btdigg.py new file mode 100644 index 000000000..4947b71da --- /dev/null +++ b/searx/tests/engines/test_btdigg.py @@ -0,0 +1,384 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import btdigg +from searx.testing import SearxTestCase + + +class TestBtdiggEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = btdigg.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('btdigg.org', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, btdigg.response, None) + self.assertRaises(AttributeError, btdigg.response, []) + self.assertRaises(AttributeError, btdigg.response, '') + self.assertRaises(AttributeError, btdigg.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(btdigg.response(response), []) + + html = """ +
    + + + + + +
    1 + + + + +
    + Should be the title +
    + + + + + + + + + + + +
    + [magnet] + + [cloud] + + Taille: + 8 B + + Fichiers: + 710 + + Téléchargements: + 5 + + Temps: + 417.8 jours + + Dernière mise Ã  jour: + 5.3 jours + + Faux: + Aucun +
    +
    +                            Content
    +                        
    +
    +
    + """ + response = mock.Mock(text=html) + results = btdigg.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Should be the title') + self.assertEqual(results[0]['url'], 'https://btdigg.org/url') + self.assertEqual(results[0]['content'], 'Content') + self.assertEqual(results[0]['seed'], 5) + self.assertEqual(results[0]['leech'], 0) + self.assertEqual(results[0]['filesize'], 8) + self.assertEqual(results[0]['files'], 710) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:magnet&dn=Test') + + html = """ +
    + +
    +
    + """ + response = mock.Mock(text=html) + results = btdigg.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + html = """ +
    + + + + + + + + + + + + + + + + + + + + + +
    1 + + + + +
    + Should be the title +
    + + + + + + + + + + + +
    + [magnet] + + [cloud] + + Taille: + 1 KB + + Fichiers: + 710 + + Téléchargements: + 5 + + Temps: + 417.8 jours + + Dernière mise Ã  jour: + 5.3 jours + + Faux: + Aucun +
    +
    +                            Content
    +                        
    +
    1 + + + + +
    + Should be the title +
    + + + + + + + + + + + +
    + [magnet] + + [cloud] + + Taille: + 1 MB + + Fichiers: + a + + Téléchargements: + 4 + + Temps: + 417.8 jours + + Dernière mise Ã  jour: + 5.3 jours + + Faux: + Aucun +
    +
    +                            Content
    +                        
    +
    1 + + + + +
    + Should be the title +
    + + + + + + + + + + + +
    + [magnet] + + [cloud] + + Taille: + 1 GB + + Fichiers: + 710 + + Téléchargements: + 3 + + Temps: + 417.8 jours + + Dernière mise Ã  jour: + 5.3 jours + + Faux: + Aucun +
    +
    +                            Content
    +                        
    +
    1 + + + + +
    + Should be the title +
    + + + + + + + + + + + +
    + [magnet] + + [cloud] + + Taille: + 1 TB + + Fichiers: + 710 + + Téléchargements: + 2 + + Temps: + 417.8 jours + + Dernière mise Ã  jour: + 5.3 jours + + Faux: + Aucun +
    +
    +                            Content
    +                        
    +
    1 + + + + +
    + Should be the title +
    + + + + + + + + + + + +
    + [magnet] + + [cloud] + + Taille: + a TB + + Fichiers: + 710 + + Téléchargements: + z + + Temps: + 417.8 jours + + Dernière mise Ã  jour: + 5.3 jours + + Faux: + Aucun +
    +
    +                            Content
    +                        
    +
    +
    + """ + response = mock.Mock(text=html) + results = btdigg.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 5) + self.assertEqual(results[0]['title'], 'Should be the title') + self.assertEqual(results[0]['url'], 'https://btdigg.org/url') + self.assertEqual(results[0]['content'], 'Content') + self.assertEqual(results[0]['seed'], 5) + self.assertEqual(results[0]['leech'], 0) + self.assertEqual(results[0]['files'], 710) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:magnet&dn=Test') + self.assertEqual(results[0]['filesize'], 1024) + self.assertEqual(results[1]['filesize'], 1048576) + self.assertEqual(results[2]['filesize'], 1073741824) + self.assertEqual(results[3]['filesize'], 1099511627776) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index bfdd1de4c..b07444e42 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,6 +1,7 @@ from searx.tests.engines.test_bing import * # noqa from searx.tests.engines.test_bing_images import * # noqa from searx.tests.engines.test_bing_news import * # noqa +from searx.tests.engines.test_btdigg import * # noqa from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_deviantart import * # noqa From 52a57ee045e02844a8f650a9d3ae30e0092d86cd Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Fri, 30 Jan 2015 21:00:49 +0100 Subject: [PATCH 18/28] Replace every bunch of whitespaces with only one space in HTML text --- searx/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/searx/utils.py b/searx/utils.py index 59d4b85be..ef221ef8e 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -119,6 +119,8 @@ class HTMLTextExtractor(HTMLParser): def html_to_text(html): + html = html.replace('\n', ' ') + html = ' '.join(html.split()) s = HTMLTextExtractor() s.feed(html) return s.get_text() From 8ea749d6ec0b711c516f3dbdb34a1bd17ae7d945 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Fri, 30 Jan 2015 21:02:17 +0100 Subject: [PATCH 19/28] Kickass' unit test --- searx/engines/kickass.py | 8 +- searx/tests/engines/test_kickass.py | 398 ++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 403 insertions(+), 4 deletions(-) create mode 100644 searx/tests/engines/test_kickass.py diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index ac349283d..8b89e1f47 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -13,6 +13,7 @@ from cgi import escape from urllib import quote from lxml import html from operator import itemgetter +from searx.engines.xpath import extract_text # engine dependent config categories = ['videos', 'music', 'files'] @@ -56,9 +57,8 @@ def response(resp): for result in search_res[1:]: link = result.xpath('.//a[@class="cellMainLink"]')[0] href = urljoin(url, link.attrib['href']) - title = ' '.join(link.xpath('.//text()')) - content = escape(html.tostring(result.xpath(content_xpath)[0], - method="text")) + title = extract_text(link) + content = escape(extract_text(result.xpath(content_xpath))) seed = result.xpath('.//td[contains(@class, "green")]/text()')[0] leech = result.xpath('.//td[contains(@class, "red")]/text()')[0] filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0] @@ -88,7 +88,7 @@ def response(resp): filesize = int(filesize * 1024 * 1024 * 1024) elif filesize_multiplier == 'MB': filesize = int(filesize * 1024 * 1024) - elif filesize_multiplier == 'kb': + elif filesize_multiplier == 'KB': filesize = int(filesize * 1024) except: filesize = None diff --git a/searx/tests/engines/test_kickass.py b/searx/tests/engines/test_kickass.py new file mode 100644 index 000000000..3c20a97e7 --- /dev/null +++ b/searx/tests/engines/test_kickass.py @@ -0,0 +1,398 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import kickass +from searx.testing import SearxTestCase + + +class TestKickassEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = kickass.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('kickass.so', params['url']) + self.assertIn('verify', params) + self.assertFalse(params['verify']) + + def test_response(self): + self.assertRaises(AttributeError, kickass.response, None) + self.assertRaises(AttributeError, kickass.response, []) + self.assertRaises(AttributeError, kickass.response, '') + self.assertRaises(AttributeError, kickass.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(kickass.response(response), []) + + html = """ + + + + + + + + + + + + + + + + + +
    torrent name + size + + files + + age + + seed + + leech +
    + +
    + + +
    + + This should be the title + + + Posted by + riri in + + Other > Unsorted + + +
    +
    449 bytes42 years101
    + """ + response = mock.Mock(text=html) + results = kickass.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This should be the title') + self.assertEqual(results[0]['url'], 'https://kickass.so/url.html') + self.assertEqual(results[0]['content'], 'Posted by riri in Other > Unsorted') + self.assertEqual(results[0]['seed'], 10) + self.assertEqual(results[0]['leech'], 1) + self.assertEqual(results[0]['filesize'], 449) + self.assertEqual(results[0]['files'], 4) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETURL&dn=test') + self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/53917.torrent?title=test') + + html = """ + + + + + + + + + +
    torrent name + size + + files + + age + + seed + + leech +
    + """ + response = mock.Mock(text=html) + results = kickass.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + html = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    torrent name + size + + files + + age + + seed + + leech +
    + +
    + + +
    + + This should be the title + + + Posted by + riri in + + Other > Unsorted + + +
    +
    1 KB42 years101
    + +
    + + +
    + + This should be the title + + + Posted by + riri in + + Other > Unsorted + + +
    +
    1 MB42 years91
    + +
    + + +
    + + This should be the title + + + Posted by + riri in + + Other > Unsorted + + +
    +
    1 GB42 years81
    + +
    + + +
    + + This should be the title + + + Posted by + riri in + + Other > Unsorted + + +
    +
    1 TB42 years71
    + +
    + + +
    + + This should be the title + + + Posted by + riri in + + Other > Unsorted + + +
    +
    z bytesr2 yearsat
    + """ + response = mock.Mock(text=html) + results = kickass.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 5) + self.assertEqual(results[0]['title'], 'This should be the title') + self.assertEqual(results[0]['url'], 'https://kickass.so/url.html') + self.assertEqual(results[0]['content'], 'Posted by riri in Other > Unsorted') + self.assertEqual(results[0]['seed'], 10) + self.assertEqual(results[0]['leech'], 1) + self.assertEqual(results[0]['files'], 4) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETURL&dn=test') + self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/53917.torrent?title=test') + self.assertEqual(results[0]['filesize'], 1024) + self.assertEqual(results[1]['filesize'], 1048576) + self.assertEqual(results[2]['filesize'], 1073741824) + self.assertEqual(results[3]['filesize'], 1099511627776) + self.assertEqual(results[4]['seed'], 0) + self.assertEqual(results[4]['leech'], 0) + self.assertEqual(results[4]['files'], None) + self.assertEqual(results[4]['filesize'], None) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index b07444e42..66f8fbff7 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -9,6 +9,7 @@ from searx.tests.engines.test_digg import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_github import * # noqa +from searx.tests.engines.test_kickass import * # noqa from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa From d5b8005ee10054b5260f57c1800ddebfa03c39cf Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 31 Jan 2015 16:16:30 +0100 Subject: [PATCH 20/28] Google images' unit test --- searx/engines/google_images.py | 6 +- searx/tests/engines/test_google_images.py | 108 ++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 searx/tests/engines/test_google_images.py diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index cc62a4fd2..092ae6639 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -18,7 +18,7 @@ paging = True # search-url url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # noqa +search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # do search-request @@ -45,14 +45,14 @@ def response(resp): for result in search_res['responseData']['results']: href = result['originalContextUrl'] title = result['title'] - if not result['url']: + if 'url' not in result: continue thumbnail_src = result['tbUrl'] # append result results.append({'url': href, 'title': title, - 'content': '', + 'content': result['content'], 'thumbnail_src': thumbnail_src, 'img_src': unquote(result['url']), 'template': 'images.html'}) diff --git a/searx/tests/engines/test_google_images.py b/searx/tests/engines/test_google_images.py new file mode 100644 index 000000000..6870ff52f --- /dev/null +++ b/searx/tests/engines/test_google_images.py @@ -0,0 +1,108 @@ +from collections import defaultdict +import mock +from searx.engines import google_images +from searx.testing import SearxTestCase + + +class TestGoogleImagesEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = google_images.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('googleapis.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, google_images.response, None) + self.assertRaises(AttributeError, google_images.response, []) + self.assertRaises(AttributeError, google_images.response, '') + self.assertRaises(AttributeError, google_images.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(google_images.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(google_images.response(response), []) + + json = """ + { + "responseData": { + "results": [ + { + "GsearchResultClass": "GimageSearch", + "width": "400", + "height": "400", + "imageId": "ANd9GcQbYb9FJuAbG_hT4i8FeC0O0x-P--EHdzgRIF9ao97nHLl7C2mREn6qTQ", + "tbWidth": "124", + "tbHeight": "124", + "unescapedUrl": "http://unescaped.url.jpg", + "url": "http://image.url.jpg", + "visibleUrl": "insolitebuzz.fr", + "title": "This is the title", + "titleNoFormatting": "Petit test sympa qui rend fou tout le monde ! A faire", + "originalContextUrl": "http://this.is.the.url", + "content": "test", + "contentNoFormatting": "test", + "tbUrl": "http://thumbnail.url" + } + ] + }, + "responseDetails": null, + "responseStatus": 200 + } + """ + response = mock.Mock(text=json) + results = google_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url') + self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.url') + self.assertEqual(results[0]['img_src'], 'http://image.url.jpg') + self.assertEqual(results[0]['content'], 'test') + + json = """ + { + "responseData": { + "results": [ + { + "GsearchResultClass": "GimageSearch", + "width": "400", + "height": "400", + "imageId": "ANd9GcQbYb9FJuAbG_hT4i8FeC0O0x-P--EHdzgRIF9ao97nHLl7C2mREn6qTQ", + "tbWidth": "124", + "tbHeight": "124", + "unescapedUrl": "http://unescaped.url.jpg", + "visibleUrl": "insolitebuzz.fr", + "title": "This is the title", + "titleNoFormatting": "Petit test sympa qui rend fou tout le monde ! A faire", + "originalContextUrl": "http://this.is.the.url", + "content": "test", + "contentNoFormatting": "test", + "tbUrl": "http://thumbnail.url" + } + ] + }, + "responseDetails": null, + "responseStatus": 200 + } + """ + response = mock.Mock(text=json) + results = google_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "responseData": {}, + "responseDetails": null, + "responseStatus": 200 + } + """ + response = mock.Mock(text=json) + results = google_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 66f8fbff7..e609f9a5c 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -9,6 +9,7 @@ from searx.tests.engines.test_digg import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_github import * # noqa +from searx.tests.engines.test_google_images import * # noqa from searx.tests.engines.test_kickass import * # noqa from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa From b7dc1fb9d572d53d04c0120d96c76a20a418cc94 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 31 Jan 2015 16:38:03 +0100 Subject: [PATCH 21/28] Google news' unit test --- searx/engines/google_news.py | 6 +- searx/tests/engines/test_google_news.py | 136 ++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 searx/tests/engines/test_google_news.py diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index eb114f9c9..3e4371b99 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -20,7 +20,7 @@ language_support = True # engine dependent config url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa +search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={lang}' # do search-request @@ -33,7 +33,7 @@ def request(query, params): params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), - language=language) + lang=language) return params @@ -52,6 +52,8 @@ def response(resp): for result in search_res['responseData']['results']: # parse publishedDate publishedDate = parser.parse(result['publishedDate']) + if 'url' not in result: + continue # append result results.append({'url': result['unescapedUrl'], diff --git a/searx/tests/engines/test_google_news.py b/searx/tests/engines/test_google_news.py new file mode 100644 index 000000000..31d674121 --- /dev/null +++ b/searx/tests/engines/test_google_news.py @@ -0,0 +1,136 @@ +from collections import defaultdict +import mock +from searx.engines import google_news +from searx.testing import SearxTestCase + + +class TestGoogleNewsEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = google_news.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('googleapis.com', params['url']) + self.assertIn('fr', params['url']) + + dicto['language'] = 'all' + params = google_news.request(query, dicto) + self.assertIn('url', params) + self.assertIn('en', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, google_news.response, None) + self.assertRaises(AttributeError, google_news.response, []) + self.assertRaises(AttributeError, google_news.response, '') + self.assertRaises(AttributeError, google_news.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(google_news.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(google_news.response(response), []) + + json = """ + { + "responseData": { + "results": [ + { + "GsearchResultClass": "GnewsSearch", + "clusterUrl": "http://news.google.com/news/story?ncl=d2d3t1LMDpNIj2MPPhdTT0ycN4sWM&hl=fr&ned=fr", + "content": "This is the content", + "unescapedUrl": "http://this.is.the.url", + "url": "http://this.is.the.url", + "title": "This is the title", + "titleNoFormatting": "This is the title", + "location": "", + "publisher": "Jeux Actu", + "publishedDate": "Fri, 30 Jan 2015 11:00:25 -0800", + "signedRedirectUrl": "http://news.google.com/", + "language": "fr", + "image": { + "url": "http://i.jeuxactus.com/datas/jeux/d/y/dying-light/vu/dying-light-54cc080b568fb.jpg", + "tbUrl": "http://t1.gstatic.com/images?q=tbn:ANd9GcSF4yYrs9Ycw23DGiOSAZ-5SEPXYwG3LNs", + "originalContextUrl": "http://www.jeuxactu.com/test-dying-light-sur-ps4-97208.htm", + "publisher": "Jeux Actu", + "tbWidth": 80, + "tbHeight": 30 + }, + "relatedStories": [ + { + "unescapedUrl": "http://www.jeuxvideo.com/test/415823/dying-light.htm", + "url": "http%3A%2F%2Fwww.jeuxvideo.com%2Ftest%2F415823%2Fdying-light.htm", + "title": "Test du jeu Dying Light - jeuxvideo.com", + "titleNoFormatting": "Test du jeu Dying Light - jeuxvideo.com", + "location": "", + "publisher": "JeuxVideo.com", + "publishedDate": "Fri, 30 Jan 2015 08:52:30 -0800", + "signedRedirectUrl": "http://news.google.com/news/url?sa=T&", + "language": "fr" + } + ] + } + ] + }, + "responseDetails": null, + "responseStatus": 200 + } + """ + response = mock.Mock(text=json) + results = google_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url') + self.assertEqual(results[0]['content'], 'This is the content') + + json = """ + { + "responseData": { + "results": [ + { + "GsearchResultClass": "GnewsSearch", + "clusterUrl": "http://news.google.com/news/story?ncl=d2d3t1LMDpNIj2MPPhdTT0ycN4sWM&hl=fr&ned=fr", + "content": "This is the content", + "unescapedUrl": "http://this.is.the.url", + "title": "This is the title", + "titleNoFormatting": "This is the title", + "location": "", + "publisher": "Jeux Actu", + "publishedDate": "Fri, 30 Jan 2015 11:00:25 -0800", + "signedRedirectUrl": "http://news.google.com/news/", + "language": "fr", + "image": { + "url": "http://i.jeuxactus.com/datas/jeux/d/y/dying-light/vu/dying-light-54cc080b568fb.jpg", + "tbUrl": "http://t1.gstatic.com/images?q=tbn:b_6f-OSAZ-5SEPXYwG3LNs", + "originalContextUrl": "http://www.jeuxactu.com/test-dying-light-sur-ps4-97208.htm", + "publisher": "Jeux Actu", + "tbWidth": 80, + "tbHeight": 30 + } + } + ] + }, + "responseDetails": null, + "responseStatus": 200 + } + """ + response = mock.Mock(text=json) + results = google_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "responseData": {}, + "responseDetails": null, + "responseStatus": 200 + } + """ + response = mock.Mock(text=json) + results = google_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index e609f9a5c..00ac8ffdf 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -10,6 +10,7 @@ from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_github import * # noqa from searx.tests.engines.test_google_images import * # noqa +from searx.tests.engines.test_google_news import * # noqa from searx.tests.engines.test_kickass import * # noqa from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa From 787fee6a09f5569f67e7bddaf73d52e159c0431c Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 31 Jan 2015 17:10:37 +0100 Subject: [PATCH 22/28] Soundcloud's unit test --- searx/tests/engines/test_soundcloud.py | 192 +++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 2 files changed, 193 insertions(+) create mode 100644 searx/tests/engines/test_soundcloud.py diff --git a/searx/tests/engines/test_soundcloud.py b/searx/tests/engines/test_soundcloud.py new file mode 100644 index 000000000..85495dc57 --- /dev/null +++ b/searx/tests/engines/test_soundcloud.py @@ -0,0 +1,192 @@ +from collections import defaultdict +import mock +from searx.engines import soundcloud +from searx.testing import SearxTestCase +from urllib import quote_plus + + +class TestSoundcloudEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = soundcloud.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('soundcloud.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, soundcloud.response, None) + self.assertRaises(AttributeError, soundcloud.response, []) + self.assertRaises(AttributeError, soundcloud.response, '') + self.assertRaises(AttributeError, soundcloud.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(soundcloud.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(soundcloud.response(response), []) + + json = """ + { + "collection": [ + { + "kind": "track", + "id": 159723640, + "created_at": "2014/07/22 00:51:21 +0000", + "user_id": 2976616, + "duration": 303780, + "commentable": true, + "state": "finished", + "original_content_size": 13236349, + "last_modified": "2015/01/31 15:14:50 +0000", + "sharing": "public", + "tag_list": "seekae flume", + "permalink": "seekae-test-recognise-flume-re-work", + "streamable": true, + "embeddable_by": "all", + "downloadable": true, + "purchase_url": "http://www.facebook.com/seekaemusic", + "label_id": null, + "purchase_title": "Seekae", + "genre": "freedownload", + "title": "This is the title", + "description": "This is the content", + "label_name": "Future Classic", + "release": "", + "track_type": "remix", + "key_signature": "", + "isrc": "", + "video_url": null, + "bpm": null, + "release_year": 2014, + "release_month": 7, + "release_day": 22, + "original_format": "mp3", + "license": "all-rights-reserved", + "uri": "https://api.soundcloud.com/tracks/159723640", + "user": { + "id": 2976616, + "kind": "user", + "permalink": "flume", + "username": "Flume", + "last_modified": "2014/11/24 19:21:29 +0000", + "uri": "https://api.soundcloud.com/users/2976616", + "permalink_url": "http://soundcloud.com/flume", + "avatar_url": "https://i1.sndcdn.com/avatars-000044475439-4zi7ii-large.jpg" + }, + "permalink_url": "http://soundcloud.com/this.is.the.url", + "artwork_url": "https://i1.sndcdn.com/artworks-000085857162-xdxy5c-large.jpg", + "waveform_url": "https://w1.sndcdn.com/DWrL1lAN8BkP_m.png", + "stream_url": "https://api.soundcloud.com/tracks/159723640/stream", + "download_url": "https://api.soundcloud.com/tracks/159723640/download", + "playback_count": 2190687, + "download_count": 54856, + "favoritings_count": 49061, + "comment_count": 826, + "likes_count": 49061, + "reposts_count": 15910, + "attachments_uri": "https://api.soundcloud.com/tracks/159723640/attachments", + "policy": "ALLOW" + } + ], + "total_results": 375750, + "next_href": "https://api.soundcloud.com/search?&q=test", + "tx_id": "" + } + """ + response = mock.Mock(text=json) + results = soundcloud.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://soundcloud.com/this.is.the.url') + self.assertEqual(results[0]['content'], 'This is the content') + self.assertIn(quote_plus('https://api.soundcloud.com/tracks/159723640'), results[0]['embedded']) + + json = """ + { + "collection": [ + { + "kind": "user", + "id": 159723640, + "created_at": "2014/07/22 00:51:21 +0000", + "user_id": 2976616, + "duration": 303780, + "commentable": true, + "state": "finished", + "original_content_size": 13236349, + "last_modified": "2015/01/31 15:14:50 +0000", + "sharing": "public", + "tag_list": "seekae flume", + "permalink": "seekae-test-recognise-flume-re-work", + "streamable": true, + "embeddable_by": "all", + "downloadable": true, + "purchase_url": "http://www.facebook.com/seekaemusic", + "label_id": null, + "purchase_title": "Seekae", + "genre": "freedownload", + "title": "This is the title", + "description": "This is the content", + "label_name": "Future Classic", + "release": "", + "track_type": "remix", + "key_signature": "", + "isrc": "", + "video_url": null, + "bpm": null, + "release_year": 2014, + "release_month": 7, + "release_day": 22, + "original_format": "mp3", + "license": "all-rights-reserved", + "uri": "https://api.soundcloud.com/tracks/159723640", + "user": { + "id": 2976616, + "kind": "user", + "permalink": "flume", + "username": "Flume", + "last_modified": "2014/11/24 19:21:29 +0000", + "uri": "https://api.soundcloud.com/users/2976616", + "permalink_url": "http://soundcloud.com/flume", + "avatar_url": "https://i1.sndcdn.com/avatars-000044475439-4zi7ii-large.jpg" + }, + "permalink_url": "http://soundcloud.com/this.is.the.url", + "artwork_url": "https://i1.sndcdn.com/artworks-000085857162-xdxy5c-large.jpg", + "waveform_url": "https://w1.sndcdn.com/DWrL1lAN8BkP_m.png", + "stream_url": "https://api.soundcloud.com/tracks/159723640/stream", + "download_url": "https://api.soundcloud.com/tracks/159723640/download", + "playback_count": 2190687, + "download_count": 54856, + "favoritings_count": 49061, + "comment_count": 826, + "likes_count": 49061, + "reposts_count": 15910, + "attachments_uri": "https://api.soundcloud.com/tracks/159723640/attachments", + "policy": "ALLOW" + } + ], + "total_results": 375750, + "next_href": "https://api.soundcloud.com/search?&q=test", + "tx_id": "" + } + """ + response = mock.Mock(text=json) + results = soundcloud.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "collection": [], + "total_results": 375750, + "next_href": "https://api.soundcloud.com/search?&q=test", + "tx_id": "" + } + """ + response = mock.Mock(text=json) + results = soundcloud.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 00ac8ffdf..4ed1a9bba 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -15,4 +15,5 @@ from searx.tests.engines.test_kickass import * # noqa from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa +from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_youtube import * # noqa From d20ddf9da147647710127385a3ee95ff273d4fea Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 31 Jan 2015 17:29:22 +0100 Subject: [PATCH 23/28] Stackoverflow's unit test --- searx/engines/stackoverflow.py | 8 +- searx/tests/engines/test_stackoverflow.py | 106 ++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 searx/tests/engines/test_stackoverflow.py diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index dcbb1890c..78dba9f68 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -12,6 +12,7 @@ from urlparse import urljoin from cgi import escape from urllib import urlencode from lxml import html +from searx.engines.xpath import extract_text # engine dependent config categories = ['it'] @@ -24,8 +25,7 @@ search_url = url+'search?{query}&page={pageno}' # specific xpath variables results_xpath = '//div[contains(@class,"question-summary")]' link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' -title_xpath = './/text()' -content_xpath = './/div[@class="excerpt"]//text()' +content_xpath = './/div[@class="excerpt"]' # do search-request @@ -46,8 +46,8 @@ def response(resp): for result in dom.xpath(results_xpath): link = result.xpath(link_xpath)[0] href = urljoin(url, link.attrib.get('href')) - title = escape(' '.join(link.xpath(title_xpath))) - content = escape(' '.join(result.xpath(content_xpath))) + title = escape(extract_text(link)) + content = escape(extract_text(result.xpath(content_xpath))) # append result results.append({'url': href, diff --git a/searx/tests/engines/test_stackoverflow.py b/searx/tests/engines/test_stackoverflow.py new file mode 100644 index 000000000..e69bafb4c --- /dev/null +++ b/searx/tests/engines/test_stackoverflow.py @@ -0,0 +1,106 @@ +from collections import defaultdict +import mock +from searx.engines import stackoverflow +from searx.testing import SearxTestCase + + +class TestStackoverflowEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = stackoverflow.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('stackoverflow.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, stackoverflow.response, None) + self.assertRaises(AttributeError, stackoverflow.response, []) + self.assertRaises(AttributeError, stackoverflow.response, '') + self.assertRaises(AttributeError, stackoverflow.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(stackoverflow.response(response), []) + + html = """ +
    +
    +
    +
    +
    +
    + 2583 +
    votes
    +
    +
    +
    +
    +
    + +
    + This is the content +
    +
    +
    +
    + answered nov 23 '09 by + hallski +
    +
    +
    + """ + response = mock.Mock(text=html) + results = stackoverflow.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://stackoverflow.com/questions/this.is.the.url') + self.assertEqual(results[0]['content'], 'This is the content') + + html = """ +
    +
    +
    +
    +
    + 2583 +
    votes
    +
    +
    +
    +
    +
    + +
    + This is the content +
    +
    +
    +
    + answered nov 23 '09 by + hallski +
    +
    + """ + response = mock.Mock(text=html) + results = stackoverflow.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 4ed1a9bba..31ad9cd4e 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -16,4 +16,5 @@ from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa +from searx.tests.engines.test_stackoverflow import * # noqa from searx.tests.engines.test_youtube import * # noqa From 04fa31b7f4d45182fa4ced6d6e23fd9ec4960d2e Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 31 Jan 2015 19:49:54 +0100 Subject: [PATCH 24/28] Vimeo's unit test --- searx/engines/vimeo.py | 3 +- searx/tests/engines/test_vimeo.py | 84 +++++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 searx/tests/engines/test_vimeo.py diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index 39033c591..7577d12e1 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -59,8 +59,7 @@ def response(resp): url = base_url + videoid title = p.unescape(extract_text(result.xpath(title_xpath))) thumbnail = extract_text(result.xpath(content_xpath)[0]) - publishedDate = parser.parse(extract_text( - result.xpath(publishedDate_xpath)[0])) + publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0])) embedded = embedded_url.format(videoid=videoid) # append result diff --git a/searx/tests/engines/test_vimeo.py b/searx/tests/engines/test_vimeo.py new file mode 100644 index 000000000..24b3ad897 --- /dev/null +++ b/searx/tests/engines/test_vimeo.py @@ -0,0 +1,84 @@ +from collections import defaultdict +import mock +from searx.engines import vimeo +from searx.testing import SearxTestCase + + +class TestVimeoEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = vimeo.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('vimeo.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, vimeo.response, None) + self.assertRaises(AttributeError, vimeo.response, []) + self.assertRaises(AttributeError, vimeo.response, '') + self.assertRaises(AttributeError, vimeo.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(vimeo.response(response), []) + + html = """ + + """ + response = mock.Mock(text=html) + results = vimeo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://vimeo.com/videoid') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['thumbnail'], 'http://image.url.webp') + self.assertIn('/videoid', results[0]['embedded']) + + html = """ +
      +
    1. + + +
      +

      + This is the title +

      +

      + +

      +
      +
      +
    2. +
    + """ + response = mock.Mock(text=html) + results = vimeo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 31ad9cd4e..27acc067a 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -17,4 +17,5 @@ from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa +from searx.tests.engines.test_vimeo import * # noqa from searx.tests.engines.test_youtube import * # noqa From f18807955beceb86a99963feedee8355f31c481c Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 31 Jan 2015 22:05:13 +0100 Subject: [PATCH 25/28] [mod] python importable engine names --- searx/engines/{flickr-noapi.py => flickr_noapi.py} | 0 searx/engines/{500px.py => www500px.py} | 0 searx/settings.yml | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename searx/engines/{flickr-noapi.py => flickr_noapi.py} (100%) rename searx/engines/{500px.py => www500px.py} (100%) diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr_noapi.py similarity index 100% rename from searx/engines/flickr-noapi.py rename to searx/engines/flickr_noapi.py diff --git a/searx/engines/500px.py b/searx/engines/www500px.py similarity index 100% rename from searx/engines/500px.py rename to searx/engines/www500px.py diff --git a/searx/settings.yml b/searx/settings.yml index 8f63203c9..b0a2853c7 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -80,7 +80,7 @@ engines: # shortcut : fc - name : 500px - engine : 500px + engine : www500px shortcut : px - name : flickr @@ -91,7 +91,7 @@ engines: # engine : flickr # api_key: 'apikey' # required! # Or you can use the html non-stable engine, activated by default - engine : flickr-noapi + engine : flickr_noapi - name : general-file engine : generalfile From 8cf2ee57216b4dffc419e1762ff1fe4dfd30e227 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 1 Feb 2015 13:43:10 +0100 Subject: [PATCH 26/28] 500px unit test --- searx/engines/www500px.py | 11 ++-- searx/tests/engines/test_www500px.py | 83 ++++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 searx/tests/engines/test_www500px.py diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py index f25678c24..99dba4abf 100644 --- a/searx/engines/www500px.py +++ b/searx/engines/www500px.py @@ -15,6 +15,7 @@ from urllib import urlencode from urlparse import urljoin from lxml import html import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['images'] @@ -22,7 +23,7 @@ paging = True # search-url base_url = 'https://500px.com' -search_url = base_url+'/search?search?page={pageno}&type=photos&{query}' +search_url = base_url + '/search?search?page={pageno}&type=photos&{query}' # do search-request @@ -44,11 +45,11 @@ def response(resp): for result in dom.xpath('//div[@class="photo"]'): link = result.xpath('.//a')[0] url = urljoin(base_url, link.attrib.get('href')) - title = result.xpath('.//div[@class="title"]//text()')[0] - thumbnail_src = link.xpath('.//img')[0].attrib['src'] + title = extract_text(result.xpath('.//div[@class="title"]')) + thumbnail_src = link.xpath('.//img')[0].attrib.get('src') # To have a bigger thumbnail, uncomment the next line - #thumbnail_src = regex.sub('4.jpg', thumbnail_src) - content = result.xpath('.//div[@class="info"]//text()')[0] + # thumbnail_src = regex.sub('4.jpg', thumbnail_src) + content = extract_text(result.xpath('.//div[@class="info"]')) img_src = regex.sub('2048.jpg', thumbnail_src) # append result diff --git a/searx/tests/engines/test_www500px.py b/searx/tests/engines/test_www500px.py new file mode 100644 index 000000000..8df15b945 --- /dev/null +++ b/searx/tests/engines/test_www500px.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import www500px +from searx.testing import SearxTestCase + + +class TestWww500pxImagesEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = www500px.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('500px.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, www500px.response, None) + self.assertRaises(AttributeError, www500px.response, []) + self.assertRaises(AttributeError, www500px.response, '') + self.assertRaises(AttributeError, www500px.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(www500px.response(response), []) + + html = """ + + """ + response = mock.Mock(text=html) + results = www500px.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://500px.com/this.should.be.the.url') + self.assertEqual(results[0]['content'], 'This is the content') + self.assertEqual(results[0]['thumbnail_src'], 'https://image.url/3.jpg?v=0') + self.assertEqual(results[0]['img_src'], 'https://image.url/2048.jpg') + + html = """ + + + + + """ + response = mock.Mock(text=html) + results = www500px.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 27acc067a..94f479dae 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -18,4 +18,5 @@ from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa from searx.tests.engines.test_vimeo import * # noqa +from searx.tests.engines.test_www500px import * # noqa from searx.tests.engines.test_youtube import * # noqa From c6535dd65ebf110d00d633db1170f35cf60b8df0 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 1 Feb 2015 14:31:04 +0100 Subject: [PATCH 27/28] Flickr Noapi unit test --- searx/engines/flickr_noapi.py | 2 +- searx/tests/engines/test_flickr_noapi.py | 442 +++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 3 files changed, 444 insertions(+), 1 deletion(-) create mode 100644 searx/tests/engines/test_flickr_noapi.py diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 66c6f4027..73dff44c4 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -17,7 +17,7 @@ import re categories = ['images'] url = 'https://secure.flickr.com/' -search_url = url+'search/?{query}&page={page}' +search_url = url + 'search/?{query}&page={page}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') diff --git a/searx/tests/engines/test_flickr_noapi.py b/searx/tests/engines/test_flickr_noapi.py new file mode 100644 index 000000000..a1de3a5e4 --- /dev/null +++ b/searx/tests/engines/test_flickr_noapi.py @@ -0,0 +1,442 @@ +from collections import defaultdict +import mock +from searx.engines import flickr_noapi +from searx.testing import SearxTestCase + + +class TestFlickrNoapiEngine(SearxTestCase): + + def test_build_flickr_url(self): + url = flickr_noapi.build_flickr_url("uid", "pid") + self.assertIn("uid", url) + self.assertIn("pid", url) + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = flickr_noapi.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('flickr.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, flickr_noapi.response, None) + self.assertRaises(AttributeError, flickr_noapi.response, []) + self.assertRaises(AttributeError, flickr_noapi.response, '') + self.assertRaises(AttributeError, flickr_noapi.response, '[]') + + response = mock.Mock(text='"search-photos-models","photos":{},"totalItems":') + self.assertEqual(flickr_noapi.response(response), []) + + response = mock.Mock(text='search-photos-models","photos":{"data": []},"totalItems":') + self.assertEqual(flickr_noapi.response(response), []) + + json = """ + "search-photos-models","photos": + { + "_data": [ + { + "_flickrModelRegistry": "photo-models", + "title": "This is the title", + "sizes": { + "c": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_c.jpg", + "width": 541, + "height": 800, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_c.jpg", + "key": "c" + }, + "h": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_761d32237a_h.jpg", + "width": 1081, + "height": 1600, + "url": "//c4.staticflickr.com/8/7246/14001294434_761d32237a_h.jpg", + "key": "h" + }, + "k": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_f145a2c11a_k.jpg", + "width": 1383, + "height": 2048, + "url": "//c4.staticflickr.com/8/7246/14001294434_f145a2c11a_k.jpg", + "key": "k" + }, + "l": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_b.jpg", + "width": 692, + "height": 1024, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_b.jpg", + "key": "l" + }, + "m": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777.jpg", + "width": 338, + "height": 500, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777.jpg", + "key": "m" + }, + "n": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_n.jpg", + "width": 216, + "height": 320, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_n.jpg", + "key": "n" + }, + "q": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_q.jpg", + "width": 150, + "height": 150, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_q.jpg", + "key": "q" + }, + "s": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_m.jpg", + "width": 162, + "height": 240, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_m.jpg", + "key": "s" + }, + "sq": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_s.jpg", + "width": 75, + "height": 75, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_s.jpg", + "key": "sq" + }, + "t": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_t.jpg", + "width": 68, + "height": 100, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_t.jpg", + "key": "t" + }, + "z": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_z.jpg", + "width": 433, + "height": 640, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_z.jpg", + "key": "z" + } + }, + "canComment": false, + "rotation": 0, + "owner": { + "_flickrModelRegistry": "person-models", + "pathAlias": "klink692", + "username": "Owner", + "buddyicon": { + "retina": null, + "large": null, + "medium": null, + "small": null, + "default": "//c1.staticflickr.com/9/8108/buddyicons/59729010@N00.jpg?1361642376#59729010@N00" + }, + "isPro": true, + "id": "59729010@N00" + }, + "engagement": { + "_flickrModelRegistry": "photo-engagement-models", + "ownerNsid": "59729010@N00", + "faveCount": 21, + "commentCount": 14, + "viewCount": 10160, + "id": "14001294434" + }, + "description": "Description", + "isHD": false, + "secret": "410f653777", + "canAddMeta": false, + "license": 0, + "oWidth": 1803, + "oHeight": 2669, + "safetyLevel": 0, + "id": "14001294434" + } + ], + "fetchedStart": true, + "fetchedEnd": false, + "totalItems": "4386039" + },"totalItems": + """ + json = json.replace('\r\n', '').replace('\n', '').replace('\r', '') + response = mock.Mock(text=json) + results = flickr_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://www.flickr.com/photos/59729010@N00/14001294434') + self.assertIn('k.jpg', results[0]['img_src']) + self.assertIn('n.jpg', results[0]['thumbnail_src']) + self.assertIn('Owner', results[0]['content']) + self.assertIn('Description', results[0]['content']) + + json = """ + "search-photos-models","photos": + { + "_data": [ + { + "_flickrModelRegistry": "photo-models", + "title": "This is the title", + "sizes": { + "z": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_z.jpg", + "width": 433, + "height": 640, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_z.jpg", + "key": "z" + } + }, + "canComment": false, + "rotation": 0, + "owner": { + "_flickrModelRegistry": "person-models", + "pathAlias": "klink692", + "username": "Owner", + "buddyicon": { + "retina": null, + "large": null, + "medium": null, + "small": null, + "default": "//c1.staticflickr.com/9/8108/buddyicons/59729010@N00.jpg?1361642376#59729010@N00" + }, + "isPro": true, + "id": "59729010@N00" + }, + "engagement": { + "_flickrModelRegistry": "photo-engagement-models", + "ownerNsid": "59729010@N00", + "faveCount": 21, + "commentCount": 14, + "viewCount": 10160, + "id": "14001294434" + }, + "description": "Description", + "isHD": false, + "secret": "410f653777", + "canAddMeta": false, + "license": 0, + "oWidth": 1803, + "oHeight": 2669, + "safetyLevel": 0, + "id": "14001294434" + } + ], + "fetchedStart": true, + "fetchedEnd": false, + "totalItems": "4386039" + },"totalItems": + """ + response = mock.Mock(text=json) + results = flickr_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://www.flickr.com/photos/59729010@N00/14001294434') + self.assertIn('z.jpg', results[0]['img_src']) + self.assertIn('z.jpg', results[0]['thumbnail_src']) + self.assertIn('Owner', results[0]['content']) + self.assertIn('Description', results[0]['content']) + + json = """ + "search-photos-models","photos": + { + "_data": [ + { + "_flickrModelRegistry": "photo-models", + "title": "This is the title", + "sizes": { + "o": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_o.jpg", + "width": 433, + "height": 640, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_o.jpg", + "key": "o" + } + }, + "canComment": false, + "rotation": 0, + "owner": { + "_flickrModelRegistry": "person-models", + "pathAlias": "klink692", + "username": "Owner", + "buddyicon": { + "retina": null, + "large": null, + "medium": null, + "small": null, + "default": "//c1.staticflickr.com/9/8108/buddyicons/59729010@N00.jpg?1361642376#59729010@N00" + }, + "isPro": true, + "id": "59729010@N00" + }, + "engagement": { + "_flickrModelRegistry": "photo-engagement-models", + "ownerNsid": "59729010@N00", + "faveCount": 21, + "commentCount": 14, + "viewCount": 10160, + "id": "14001294434" + }, + "isHD": false, + "secret": "410f653777", + "canAddMeta": false, + "license": 0, + "oWidth": 1803, + "oHeight": 2669, + "safetyLevel": 0, + "id": "14001294434" + } + ], + "fetchedStart": true, + "fetchedEnd": false, + "totalItems": "4386039" + },"totalItems": + """ + response = mock.Mock(text=json) + results = flickr_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://www.flickr.com/photos/59729010@N00/14001294434') + self.assertIn('o.jpg', results[0]['img_src']) + self.assertIn('o.jpg', results[0]['thumbnail_src']) + self.assertIn('Owner', results[0]['content']) + + json = """ + "search-photos-models","photos": + { + "_data": [ + { + "_flickrModelRegistry": "photo-models", + "title": "This is the title", + "sizes": { + }, + "canComment": false, + "rotation": 0, + "owner": { + "_flickrModelRegistry": "person-models", + "pathAlias": "klink692", + "username": "Owner", + "buddyicon": { + "retina": null, + "large": null, + "medium": null, + "small": null, + "default": "//c1.staticflickr.com/9/8108/buddyicons/59729010@N00.jpg?1361642376#59729010@N00" + }, + "isPro": true, + "id": "59729010@N00" + }, + "engagement": { + "_flickrModelRegistry": "photo-engagement-models", + "ownerNsid": "59729010@N00", + "faveCount": 21, + "commentCount": 14, + "viewCount": 10160, + "id": "14001294434" + }, + "description": "Description", + "isHD": false, + "secret": "410f653777", + "canAddMeta": false, + "license": 0, + "oWidth": 1803, + "oHeight": 2669, + "safetyLevel": 0, + "id": "14001294434" + } + ], + "fetchedStart": true, + "fetchedEnd": false, + "totalItems": "4386039" + },"totalItems": + """ + response = mock.Mock(text=json) + results = flickr_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + "search-photos-models","photos": + { + "_data": [null], + "fetchedStart": true, + "fetchedEnd": false, + "totalItems": "4386039" + },"totalItems": + """ + response = mock.Mock(text=json) + results = flickr_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + "search-photos-models","photos": + { + "_data": [ + { + "_flickrModelRegistry": "photo-models", + "title": "This is the title", + "sizes": { + "o": { + "displayUrl": "//farm8.staticflickr.com/7246/14001294434_410f653777_o.jpg", + "width": 433, + "height": 640, + "url": "//c4.staticflickr.com/8/7246/14001294434_410f653777_o.jpg", + "key": "o" + } + }, + "canComment": false, + "rotation": 0, + "owner": { + "_flickrModelRegistry": "person-models", + "pathAlias": "klink692", + "username": "Owner", + "buddyicon": { + "retina": null, + "large": null, + "medium": null, + "small": null, + "default": "//c1.staticflickr.com/9/8108/buddyicons/59729010@N00.jpg?1361642376#59729010@N00" + }, + "isPro": true + }, + "engagement": { + "_flickrModelRegistry": "photo-engagement-models", + "ownerNsid": "59729010@N00", + "faveCount": 21, + "commentCount": 14, + "viewCount": 10160, + "id": "14001294434" + }, + "description": "Description", + "isHD": false, + "secret": "410f653777", + "canAddMeta": false, + "license": 0, + "oWidth": 1803, + "oHeight": 2669, + "safetyLevel": 0, + "id": "14001294434" + } + ], + "fetchedStart": true, + "fetchedEnd": false, + "totalItems": "4386039" + },"totalItems": + """ + response = mock.Mock(text=json) + results = flickr_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + {"toto":[ + {"id":200,"name":"Artist Name", + "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"} + ]} + """ + response = mock.Mock(text=json) + results = flickr_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 94f479dae..ed5ee42d8 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -8,6 +8,7 @@ from searx.tests.engines.test_deviantart import * # noqa from searx.tests.engines.test_digg import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa +from searx.tests.engines.test_flickr_noapi import * # noqa from searx.tests.engines.test_github import * # noqa from searx.tests.engines.test_google_images import * # noqa from searx.tests.engines.test_google_news import * # noqa From 5a16077455ef9e821a2b5f5f7e975be8a37ce83d Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 1 Feb 2015 15:23:26 +0100 Subject: [PATCH 28/28] PirateBay unit test + reactivation in Settings --- searx/engines/piratebay.py | 12 ++- searx/settings.yml | 6 +- searx/tests/engines/test_piratebay.py | 137 ++++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 4 files changed, 149 insertions(+), 7 deletions(-) create mode 100644 searx/tests/engines/test_piratebay.py diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index f6144faa2..207df276c 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -13,6 +13,7 @@ from cgi import escape from urllib import quote from lxml import html from operator import itemgetter +from searx.engines.xpath import extract_text # engine dependent config categories = ['videos', 'music', 'files'] @@ -29,7 +30,8 @@ search_types = {'files': '0', # specific xpath variables magnet_xpath = './/a[@title="Download this torrent using magnet"]' -content_xpath = './/font[@class="detDesc"]//text()' +torrent_xpath = './/a[@title="Download this torrent"]' +content_xpath = './/font[@class="detDesc"]' # do search-request @@ -59,8 +61,8 @@ def response(resp): for result in search_res[1:]: link = result.xpath('.//div[@class="detName"]//a')[0] href = urljoin(url, link.attrib.get('href')) - title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath(content_xpath))) + title = extract_text(link) + content = escape(extract_text(result.xpath(content_xpath))) seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] # convert seed to int if possible @@ -76,6 +78,7 @@ def response(resp): leech = 0 magnetlink = result.xpath(magnet_xpath)[0] + torrentfile = result.xpath(torrent_xpath)[0] # append result results.append({'url': href, @@ -83,7 +86,8 @@ def response(resp): 'content': content, 'seed': seed, 'leech': leech, - 'magnetlink': magnetlink.attrib['href'], + 'magnetlink': magnetlink.attrib.get('href'), + 'torrentfile': torrentfile.attrib.get('href'), 'template': 'torrent.html'}) # return results sorted by seeder diff --git a/searx/settings.yml b/searx/settings.yml index b0a2853c7..2c9441c34 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -152,9 +152,9 @@ engines: engine : photon shortcut : ph -# - name : piratebay -# engine : piratebay -# shortcut : tpb + - name : piratebay + engine : piratebay + shortcut : tpb - name : kickass engine : kickass diff --git a/searx/tests/engines/test_piratebay.py b/searx/tests/engines/test_piratebay.py new file mode 100644 index 000000000..7207c408a --- /dev/null +++ b/searx/tests/engines/test_piratebay.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import piratebay +from searx.testing import SearxTestCase + + +class TestPiratebayEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['category'] = 'Toto' + params = piratebay.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('piratebay.cr', params['url']) + self.assertIn('0', params['url']) + + dicto['category'] = 'music' + params = piratebay.request(query, dicto) + self.assertIn('100', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, piratebay.response, None) + self.assertRaises(AttributeError, piratebay.response, []) + self.assertRaises(AttributeError, piratebay.response, '') + self.assertRaises(AttributeError, piratebay.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(piratebay.response(response), []) + + html = """ + + + + + + + + + +
    +
    + Anime
    + (Anime) +
    +
    + + + Magnet link + + + Download + + + VIP + + + + This is the content and should be OK + + 13334
    + """ + response = mock.Mock(text=html) + results = piratebay.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link') + self.assertEqual(results[0]['content'], 'This is the content and should be OK') + self.assertEqual(results[0]['seed'], 13) + self.assertEqual(results[0]['leech'], 334) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK') + self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent') + + html = """ + + + + + + + + + +
    +
    + Anime
    + (Anime) +
    +
    + + + Magnet link + + + Download + + + VIP + + + + This is the content and should be OK + + sd
    + """ + response = mock.Mock(text=html) + results = piratebay.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link') + self.assertEqual(results[0]['content'], 'This is the content and should be OK') + self.assertEqual(results[0]['seed'], 0) + self.assertEqual(results[0]['leech'], 0) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK') + self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent') + + html = """ + +
    + """ + response = mock.Mock(text=html) + results = piratebay.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index ed5ee42d8..e66f7db28 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -14,6 +14,7 @@ from searx.tests.engines.test_google_images import * # noqa from searx.tests.engines.test_google_news import * # noqa from searx.tests.engines.test_kickass import * # noqa from searx.tests.engines.test_mixcloud import * # noqa +from searx.tests.engines.test_piratebay import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa