From f965c978222cf48e8dd4b7dd6c9a28ccca9bc62f Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sun, 31 May 2015 00:25:59 +0200 Subject: [PATCH 1/4] Adds two engines : Youtube with or without API The API needs an API_KEY The NOAPI doesn't have the published dates. --- searx/engines/youtube_api.py | 83 ++++++++++++++++ searx/engines/youtube_noapi.py | 72 ++++++++++++++ searx/settings.yml | 7 +- searx/tests/engines/test_youtube_api.py | 111 ++++++++++++++++++++++ searx/tests/engines/test_youtube_noapi.py | 103 ++++++++++++++++++++ searx/tests/test_engines.py | 2 + 6 files changed, 377 insertions(+), 1 deletion(-) create mode 100644 searx/engines/youtube_api.py create mode 100644 searx/engines/youtube_noapi.py create mode 100644 searx/tests/engines/test_youtube_api.py create mode 100644 searx/tests/engines/test_youtube_noapi.py diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py new file mode 100644 index 000000000..8fd939a25 --- /dev/null +++ b/searx/engines/youtube_api.py @@ -0,0 +1,83 @@ +# Youtube (Videos) +# +# @website https://www.youtube.com/ +# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content, publishedDate, thumbnail, embedded + +from json import loads +from urllib import urlencode +from dateutil import parser + +# engine dependent config +categories = ['videos', 'music'] +paging = False +language_support = True +api_key = None + +# search-url +base_url = 'https://www.googleapis.com/youtube/v3/search' +search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}' + +embedded_url = '' + +base_youtube_url = 'https://www.youtube.com/watch?v=' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'q': query}), + api_key=api_key) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'items' not in search_results: + return [] + + # parse results + for result in search_results['items']: + videoid = result['id']['videoId'] + + title = result['snippet']['title'] + content = '' + thumbnail = '' + + pubdate = result['snippet']['publishedAt'] + publishedDate = parser.parse(pubdate) + + thumbnail = result['snippet']['thumbnails']['high']['url'] + + content = result['snippet']['description'] + + url = base_youtube_url + videoid + + embedded = embedded_url.format(videoid=videoid) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail}) + + # return results + return results diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py new file mode 100644 index 000000000..f78e43f0f --- /dev/null +++ b/searx/engines/youtube_noapi.py @@ -0,0 +1,72 @@ +# Youtube (Videos) +# +# @website https://www.youtube.com/ +# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) +# +# @using-api no +# @results HTML +# @stable no +# @parse url, title, content, publishedDate, thumbnail, embedded + +from urllib import quote_plus +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['videos', 'music'] +paging = True +language_support = False + +# search-url +base_url = 'https://www.youtube.com/results' +search_url = base_url + '?search_query={query}&page={page}' + +embedded_url = '' + +base_youtube_url = 'https://www.youtube.com/watch?v=' + +# specific xpath variables +results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]" +url_xpath = './/h3/a/@href' +title_xpath = './/div[@class="yt-lockup-content"]/h3/a' +content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=quote_plus(query), + page=params['pageno']) + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath(results_xpath): + videoid = result.xpath('@data-context-item-id')[0] + + url = base_youtube_url + videoid + thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg' + + title = extract_text(result.xpath(title_xpath)[0]) + content = extract_text(result.xpath(content_xpath)[0]) + + embedded = embedded_url.format(videoid=videoid) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'embedded': embedded, + 'thumbnail': thumbnail}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index d35b1378a..519ea8be1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -242,8 +242,13 @@ engines: shortcut : yhn - name : youtube - engine : youtube shortcut : yt + # You can use the engine using the official stable API, but you need an API key + # See : https://console.developers.google.com/project + # engine : youtube_api + # api_key: 'apikey' # required! + # Or you can use the html non-stable engine, activated by default + engine : youtube_noapi - name : dailymotion engine : dailymotion diff --git a/searx/tests/engines/test_youtube_api.py b/searx/tests/engines/test_youtube_api.py new file mode 100644 index 000000000..0d4d478c3 --- /dev/null +++ b/searx/tests/engines/test_youtube_api.py @@ -0,0 +1,111 @@ +from collections import defaultdict +import mock +from searx.engines import youtube_api +from searx.testing import SearxTestCase + + +class TestYoutubeAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = youtube_api.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertIn('googleapis.com', params['url']) + self.assertIn('youtube', params['url']) + self.assertIn('fr', params['url']) + + dicto['language'] = 'all' + params = youtube_api.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, youtube_api.response, None) + self.assertRaises(AttributeError, youtube_api.response, []) + self.assertRaises(AttributeError, youtube_api.response, '') + self.assertRaises(AttributeError, youtube_api.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(youtube_api.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(youtube_api.response(response), []) + + json = """ + { + "kind": "youtube#searchListResponse", + "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME", + "nextPageToken": "CAUQAA", + "pageInfo": { + "totalResults": 1000000, + "resultsPerPage": 20 + }, + "items": [ + { + "kind": "youtube#searchResult", + "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/IbLO64BMhbHIgWLwLw7MDYe7Hs4", + "id": { + "kind": "youtube#video", + "videoId": "DIVZCPfAOeM" + }, + "snippet": { + "publishedAt": "2015-05-29T22:41:04.000Z", + "channelId": "UCNodmx1ERIjKqvcJLtdzH5Q", + "title": "Title", + "description": "Description", + "thumbnails": { + "default": { + "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/default.jpg" + }, + "medium": { + "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg" + }, + "high": { + "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg" + } + }, + "channelTitle": "MinecraftUniverse", + "liveBroadcastContent": "none" + } + } + ] + } + """ + response = mock.Mock(text=json) + results = youtube_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM') + self.assertEqual(results[0]['content'], 'Description') + self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg') + self.assertTrue('DIVZCPfAOeM' in results[0]['embedded']) + + json = """ + { + "kind": "youtube#searchListResponse", + "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME", + "nextPageToken": "CAUQAA", + "pageInfo": { + "totalResults": 1000000, + "resultsPerPage": 20 + } + } + """ + response = mock.Mock(text=json) + results = youtube_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + {"toto":{"entry":[] + } + } + """ + response = mock.Mock(text=json) + results = youtube_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_youtube_noapi.py b/searx/tests/engines/test_youtube_noapi.py new file mode 100644 index 000000000..b715ed2f1 --- /dev/null +++ b/searx/tests/engines/test_youtube_noapi.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import youtube_noapi +from searx.testing import SearxTestCase + + +class TestYoutubeNoAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = youtube_noapi.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('youtube.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, youtube_noapi.response, None) + self.assertRaises(AttributeError, youtube_noapi.response, []) + self.assertRaises(AttributeError, youtube_noapi.response, '') + self.assertRaises(AttributeError, youtube_noapi.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(youtube_noapi.response(response), []) + + html = """ +
    +
  1. +
    +
    + + + +
    +
    +

    + + Title + + - Durée : 11:35. +

    +
    +
      +
    • il y a 20 heures
    • +
    • 8 424 vues
    • +
    +
    +
    + Description +
    +
    +
      +
    • + Nouveauté +
    • +
    • HD
    • +
    +
    +
    +
    +
    +
    +
    +
    +
    +
  2. +
+ """ + response = mock.Mock(text=html) + results = youtube_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM') + self.assertEqual(results[0]['content'], 'Description') + self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg') + self.assertTrue('DIVZCPfAOeM' in results[0]['embedded']) + + html = """ +
    +
  1. +
  2. +
+ """ + response = mock.Mock(text=html) + results = youtube_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 5770458f3..d0a4de4b8 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -39,4 +39,6 @@ from searx.tests.engines.test_www500px import * # noqa from searx.tests.engines.test_yacy import * # noqa from searx.tests.engines.test_yahoo import * # noqa from searx.tests.engines.test_youtube import * # noqa +from searx.tests.engines.test_youtube_api import * # noqa +from searx.tests.engines.test_youtube_noapi import * # noqa from searx.tests.engines.test_yahoo_news import * # noqa From 884eeb8541e0a4cf3d65c2a17e1c2f788cab7fb1 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 1 Jun 2015 00:00:32 +0200 Subject: [PATCH 2/4] New Qwant engines - Web - Images - News - Social media --- searx/engines/qwant.py | 66 +++++++++++ searx/engines/qwant_images.py | 70 +++++++++++ searx/engines/qwant_news.py | 69 +++++++++++ searx/engines/qwant_social.py | 69 +++++++++++ searx/settings.yml | 16 +++ searx/tests/engines/test_qwant.py | 137 +++++++++++++++++++++ searx/tests/engines/test_qwant_images.py | 145 +++++++++++++++++++++++ searx/tests/engines/test_qwant_news.py | 137 +++++++++++++++++++++ searx/tests/engines/test_qwant_social.py | 140 ++++++++++++++++++++++ searx/tests/test_engines.py | 4 + 10 files changed, 853 insertions(+) create mode 100644 searx/engines/qwant.py create mode 100644 searx/engines/qwant_images.py create mode 100644 searx/engines/qwant_news.py create mode 100644 searx/engines/qwant_social.py create mode 100644 searx/tests/engines/test_qwant.py create mode 100644 searx/tests/engines/test_qwant_images.py create mode 100644 searx/tests/engines/test_qwant_news.py create mode 100644 searx/tests/engines/test_qwant_social.py diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py new file mode 100644 index 000000000..91c12a19e --- /dev/null +++ b/searx/engines/qwant.py @@ -0,0 +1,66 @@ +""" + Qwant (Web) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads + +# engine dependent config +categories = ['general'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/web?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + + # append result + results.append({'title': title, + 'content': content, + 'url': res_url}) + + # return results + return results diff --git a/searx/engines/qwant_images.py b/searx/engines/qwant_images.py new file mode 100644 index 000000000..1c1753389 --- /dev/null +++ b/searx/engines/qwant_images.py @@ -0,0 +1,70 @@ +""" + Qwant (Images) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads + +# engine dependent config +categories = ['images'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/images?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + thumbnail_src = result['thumbnail'] + img_src = result['media'] + + # append result + results.append({'template': 'images.html', + 'url': res_url, + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'img_src': img_src}) + + # return results + return results diff --git a/searx/engines/qwant_news.py b/searx/engines/qwant_news.py new file mode 100644 index 000000000..c4d5be5d3 --- /dev/null +++ b/searx/engines/qwant_news.py @@ -0,0 +1,69 @@ +""" + Qwant (News) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads +from datetime import datetime + +# engine dependent config +categories = ['news'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/news?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + published_date = datetime.fromtimestamp(result['date'], None) + + # append result + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content}) + + # return results + return results diff --git a/searx/engines/qwant_social.py b/searx/engines/qwant_social.py new file mode 100644 index 000000000..474dfac02 --- /dev/null +++ b/searx/engines/qwant_social.py @@ -0,0 +1,69 @@ +""" + Qwant (social media) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads +from datetime import datetime + +# engine dependent config +categories = ['social media'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/social?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + published_date = datetime.fromtimestamp(result['date'], None) + + # append result + results.append({'url': res_url, + 'title': title, + 'content': content, + 'publishedDate': published_date}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index 519ea8be1..7f8229732 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -168,6 +168,22 @@ engines: engine : piratebay shortcut : tpb + - name : qwant + engine : qwant + shortcut : qw + + - name : qwant images + engine : qwant_images + shortcut : qwi + + - name : qwant news + engine : qwant_news + shortcut : qwn + + - name : qwant social + engine : qwant_social + shortcut : qws + - name : kickass engine : kickass shortcut : ka diff --git a/searx/tests/engines/test_qwant.py b/searx/tests/engines/test_qwant.py new file mode 100644 index 000000000..9aa1c7c56 --- /dev/null +++ b/searx/tests/engines/test_qwant.py @@ -0,0 +1,137 @@ +from collections import defaultdict +import mock +from searx.engines import qwant +from searx.testing import SearxTestCase + + +class TestQwantEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = qwant.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('qwant.com', params['url']) + self.assertIn('fr_fr', params['url']) + + dicto['language'] = 'all' + params = qwant.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, qwant.response, None) + self.assertRaises(AttributeError, qwant.response, []) + self.assertRaises(AttributeError, qwant.response, '') + self.assertRaises(AttributeError, qwant.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(qwant.response(response), []) + + response = mock.Mock(text='{"data": {}}') + self.assertEqual(qwant.response(response), []) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": "", + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success" + } + """ + response = mock.Mock(text=json) + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_images.py b/searx/tests/engines/test_qwant_images.py new file mode 100644 index 000000000..bf89f1b01 --- /dev/null +++ b/searx/tests/engines/test_qwant_images.py @@ -0,0 +1,145 @@ +from collections import defaultdict +import mock +from searx.engines import qwant_images +from searx.testing import SearxTestCase + + +class TestQwantImagesEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = qwant_images.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('qwant.com', params['url']) + self.assertIn('fr_fr', params['url']) + + dicto['language'] = 'all' + params = qwant_images.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, qwant_images.response, None) + self.assertRaises(AttributeError, qwant_images.response, []) + self.assertRaises(AttributeError, qwant_images.response, '') + self.assertRaises(AttributeError, qwant_images.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(qwant_images.response(response), []) + + response = mock.Mock(text='{"data": {}}') + self.assertEqual(qwant_images.response(response), []) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "type": "image", + "media": "http://www.url.xyz/fullimage.jpg", + "desc": "", + "thumbnail": "http://www.url.xyz/thumbnail.jpg", + "thumb_width": 365, + "thumb_height": 230, + "width": "365", + "height": "230", + "size": "187.7KB", + "url": "http://www.url.xyz", + "_id": "0ffd93fb26f3e192a6020af8fc16fbb1", + "media_fullsize": "http://www.proxy/fullimage.jpg", + "count": 0 + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['thumbnail_src'], 'http://www.url.xyz/thumbnail.jpg') + self.assertEqual(results[0]['img_src'], 'http://www.url.xyz/fullimage.jpg') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success" + } + """ + response = mock.Mock(text=json) + results = qwant_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_news.py b/searx/tests/engines/test_qwant_news.py new file mode 100644 index 000000000..17cdd3cc1 --- /dev/null +++ b/searx/tests/engines/test_qwant_news.py @@ -0,0 +1,137 @@ +from collections import defaultdict +import mock +from searx.engines import qwant_news +from searx.testing import SearxTestCase + + +class TestQwantNewsEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = qwant_news.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('qwant.com', params['url']) + self.assertIn('fr_fr', params['url']) + + dicto['language'] = 'all' + params = qwant_news.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, qwant_news.response, None) + self.assertRaises(AttributeError, qwant_news.response, []) + self.assertRaises(AttributeError, qwant_news.response, '') + self.assertRaises(AttributeError, qwant_news.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(qwant_news.response(response), []) + + response = mock.Mock(text='{"data": {}}') + self.assertEqual(qwant_news.response(response), []) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": 1433065411, + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success" + } + """ + response = mock.Mock(text=json) + results = qwant_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_social.py b/searx/tests/engines/test_qwant_social.py new file mode 100644 index 000000000..6e87e9898 --- /dev/null +++ b/searx/tests/engines/test_qwant_social.py @@ -0,0 +1,140 @@ +from collections import defaultdict +import mock +from searx.engines import qwant_social +from searx.testing import SearxTestCase + + +class TestQwantSocialEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = qwant_social.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('qwant.com', params['url']) + self.assertIn('fr_fr', params['url']) + + dicto['language'] = 'all' + params = qwant_social.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, qwant_social.response, None) + self.assertRaises(AttributeError, qwant_social.response, []) + self.assertRaises(AttributeError, qwant_social.response, '') + self.assertRaises(AttributeError, qwant_social.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(qwant_social.response(response), []) + + response = mock.Mock(text='{"data": {}}') + self.assertEqual(qwant_social.response(response), []) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "_id": "dc0b3f24c93684c7d7f1b0a4c2d9f1b0", + "__index": 32, + "title": "Title", + "img": "img", + "desc": "Description", + "date": 1432643480, + "type": "twitter", + "card": "XXX", + "post": "603176590856556545", + "url": "http://www.url.xyz", + "userUrl": "https://twitter.com/XXX" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_social.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_social.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_social.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success" + } + """ + response = mock.Mock(text=json) + results = qwant_social.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index d0a4de4b8..4f3088adb 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -25,6 +25,10 @@ from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_openstreetmap import * # noqa from searx.tests.engines.test_photon import * # noqa from searx.tests.engines.test_piratebay import * # noqa +from searx.tests.engines.test_qwant import * # noqa +from searx.tests.engines.test_qwant_images import * # noqa +from searx.tests.engines.test_qwant_news import * # noqa +from searx.tests.engines.test_qwant_social import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa From f05087b93ac1ebef3bdacd353524bac0d8041832 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 2 Jun 2015 20:36:58 +0200 Subject: [PATCH 3/4] Refactor Use only one engine for the four search from Qwant --- searx/engines/qwant.py | 38 +++-- searx/engines/qwant_images.py | 70 --------- searx/engines/qwant_news.py | 69 --------- searx/engines/qwant_social.py | 69 --------- searx/settings.yml | 14 +- searx/tests/engines/test_qwant.py | 176 +++++++++++++++++++++++ searx/tests/engines/test_qwant_images.py | 145 ------------------- searx/tests/engines/test_qwant_news.py | 137 ------------------ searx/tests/engines/test_qwant_social.py | 140 ------------------ searx/tests/test_engines.py | 3 - 10 files changed, 217 insertions(+), 644 deletions(-) delete mode 100644 searx/engines/qwant_images.py delete mode 100644 searx/engines/qwant_news.py delete mode 100644 searx/engines/qwant_social.py delete mode 100644 searx/tests/engines/test_qwant_images.py delete mode 100644 searx/tests/engines/test_qwant_news.py delete mode 100644 searx/tests/engines/test_qwant_social.py diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 91c12a19e..38bafb043 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -1,5 +1,5 @@ """ - Qwant (Web) + Qwant (Web, Images, News, Social) @website https://qwant.com/ @provide-api not officially (https://api.qwant.com/api/search/) @@ -12,21 +12,25 @@ from urllib import urlencode from json import loads +from datetime import datetime # engine dependent config -categories = ['general'] +categories = None paging = True language_support = True +search_url_keyword = None + # search-url -url = 'https://api.qwant.com/api/search/web?count=10&offset={offset}&f=&{query}' +url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}' # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 - params['url'] = url.format(query=urlencode({'q': query}), + params['url'] = url.format(keyword=search_url_keyword, + query=urlencode({'q': query}), offset=offset) # add language tag if specified @@ -57,10 +61,28 @@ def response(resp): res_url = result['url'] content = result['desc'] - # append result - results.append({'title': title, - 'content': content, - 'url': res_url}) + if search_url_keyword == 'web': + results.append({'title': title, + 'content': content, + 'url': res_url}) + + elif search_url_keyword == 'images': + thumbnail_src = result['thumbnail'] + img_src = result['media'] + results.append({'template': 'images.html', + 'url': res_url, + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'img_src': img_src}) + + elif search_url_keyword == 'news' or search_url_keyword == 'social': + published_date = datetime.fromtimestamp(result['date'], None) + + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content}) # return results return results diff --git a/searx/engines/qwant_images.py b/searx/engines/qwant_images.py deleted file mode 100644 index 1c1753389..000000000 --- a/searx/engines/qwant_images.py +++ /dev/null @@ -1,70 +0,0 @@ -""" - Qwant (Images) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads - -# engine dependent config -categories = ['images'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/images?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - thumbnail_src = result['thumbnail'] - img_src = result['media'] - - # append result - results.append({'template': 'images.html', - 'url': res_url, - 'title': title, - 'content': '', - 'thumbnail_src': thumbnail_src, - 'img_src': img_src}) - - # return results - return results diff --git a/searx/engines/qwant_news.py b/searx/engines/qwant_news.py deleted file mode 100644 index c4d5be5d3..000000000 --- a/searx/engines/qwant_news.py +++ /dev/null @@ -1,69 +0,0 @@ -""" - Qwant (News) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads -from datetime import datetime - -# engine dependent config -categories = ['news'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/news?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - content = result['desc'] - published_date = datetime.fromtimestamp(result['date'], None) - - # append result - results.append({'url': res_url, - 'title': title, - 'publishedDate': published_date, - 'content': content}) - - # return results - return results diff --git a/searx/engines/qwant_social.py b/searx/engines/qwant_social.py deleted file mode 100644 index 474dfac02..000000000 --- a/searx/engines/qwant_social.py +++ /dev/null @@ -1,69 +0,0 @@ -""" - Qwant (social media) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads -from datetime import datetime - -# engine dependent config -categories = ['social media'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/social?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - content = result['desc'] - published_date = datetime.fromtimestamp(result['date'], None) - - # append result - results.append({'url': res_url, - 'title': title, - 'content': content, - 'publishedDate': published_date}) - - # return results - return results diff --git a/searx/settings.yml b/searx/settings.yml index 7f8229732..c84b810e1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -171,18 +171,26 @@ engines: - name : qwant engine : qwant shortcut : qw + search_url_keyword : web + categories : general - name : qwant images - engine : qwant_images + engine : qwant shortcut : qwi + search_url_keyword : images + categories : images - name : qwant news - engine : qwant_news + engine : qwant shortcut : qwn + search_url_keyword : news + categories : news - name : qwant social - engine : qwant_social + engine : qwant shortcut : qws + search_url_keyword : social + categories : social media - name : kickass engine : kickass diff --git a/searx/tests/engines/test_qwant.py b/searx/tests/engines/test_qwant.py index 9aa1c7c56..6da4745b1 100644 --- a/searx/tests/engines/test_qwant.py +++ b/searx/tests/engines/test_qwant.py @@ -68,6 +68,7 @@ class TestQwantEngine(SearxTestCase): } """ response = mock.Mock(text=json) + qwant.search_url_keyword = 'web' results = qwant.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -75,6 +76,181 @@ class TestQwantEngine(SearxTestCase): self.assertEqual(results[0]['url'], 'http://www.url.xyz') self.assertEqual(results[0]['content'], 'Description') + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "media": "http://image.jpg", + "desc": "", + "thumbnail": "http://thumbnail.jpg", + "date": "", + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + qwant.search_url_keyword = 'images' + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.jpg') + self.assertEqual(results[0]['img_src'], 'http://image.jpg') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": 1433260920, + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + qwant.search_url_keyword = 'news' + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + self.assertIn('publishedDate', results[0]) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": 1433260920, + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + qwant.search_url_keyword = 'social' + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + self.assertIn('publishedDate', results[0]) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": 1433260920, + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + qwant.search_url_keyword = '' + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + json = """ { "status": "success", diff --git a/searx/tests/engines/test_qwant_images.py b/searx/tests/engines/test_qwant_images.py deleted file mode 100644 index bf89f1b01..000000000 --- a/searx/tests/engines/test_qwant_images.py +++ /dev/null @@ -1,145 +0,0 @@ -from collections import defaultdict -import mock -from searx.engines import qwant_images -from searx.testing import SearxTestCase - - -class TestQwantImagesEngine(SearxTestCase): - - def test_request(self): - query = 'test_query' - dicto = defaultdict(dict) - dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' - params = qwant_images.request(query, dicto) - self.assertIn('url', params) - self.assertIn(query, params['url']) - self.assertIn('qwant.com', params['url']) - self.assertIn('fr_fr', params['url']) - - dicto['language'] = 'all' - params = qwant_images.request(query, dicto) - self.assertFalse('fr' in params['url']) - - def test_response(self): - self.assertRaises(AttributeError, qwant_images.response, None) - self.assertRaises(AttributeError, qwant_images.response, []) - self.assertRaises(AttributeError, qwant_images.response, '') - self.assertRaises(AttributeError, qwant_images.response, '[]') - - response = mock.Mock(text='{}') - self.assertEqual(qwant_images.response(response), []) - - response = mock.Mock(text='{"data": {}}') - self.assertEqual(qwant_images.response(response), []) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "items": [ - { - "title": "Title", - "type": "image", - "media": "http://www.url.xyz/fullimage.jpg", - "desc": "", - "thumbnail": "http://www.url.xyz/thumbnail.jpg", - "thumb_width": 365, - "thumb_height": 230, - "width": "365", - "height": "230", - "size": "187.7KB", - "url": "http://www.url.xyz", - "_id": "0ffd93fb26f3e192a6020af8fc16fbb1", - "media_fullsize": "http://www.proxy/fullimage.jpg", - "count": 0 - } - ], - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'Title') - self.assertEqual(results[0]['url'], 'http://www.url.xyz') - self.assertEqual(results[0]['content'], '') - self.assertEqual(results[0]['thumbnail_src'], 'http://www.url.xyz/thumbnail.jpg') - self.assertEqual(results[0]['img_src'], 'http://www.url.xyz/fullimage.jpg') - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success" - } - """ - response = mock.Mock(text=json) - results = qwant_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_news.py b/searx/tests/engines/test_qwant_news.py deleted file mode 100644 index 17cdd3cc1..000000000 --- a/searx/tests/engines/test_qwant_news.py +++ /dev/null @@ -1,137 +0,0 @@ -from collections import defaultdict -import mock -from searx.engines import qwant_news -from searx.testing import SearxTestCase - - -class TestQwantNewsEngine(SearxTestCase): - - def test_request(self): - query = 'test_query' - dicto = defaultdict(dict) - dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' - params = qwant_news.request(query, dicto) - self.assertIn('url', params) - self.assertIn(query, params['url']) - self.assertIn('qwant.com', params['url']) - self.assertIn('fr_fr', params['url']) - - dicto['language'] = 'all' - params = qwant_news.request(query, dicto) - self.assertFalse('fr' in params['url']) - - def test_response(self): - self.assertRaises(AttributeError, qwant_news.response, None) - self.assertRaises(AttributeError, qwant_news.response, []) - self.assertRaises(AttributeError, qwant_news.response, '') - self.assertRaises(AttributeError, qwant_news.response, '[]') - - response = mock.Mock(text='{}') - self.assertEqual(qwant_news.response(response), []) - - response = mock.Mock(text='{"data": {}}') - self.assertEqual(qwant_news.response(response), []) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "items": [ - { - "title": "Title", - "score": 9999, - "url": "http://www.url.xyz", - "source": "...", - "desc": "Description", - "date": 1433065411, - "_id": "db0aadd62c2a8565567ffc382f5c61fa", - "favicon": "https://s.qwant.com/fav.ico" - } - ], - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_news.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'Title') - self.assertEqual(results[0]['url'], 'http://www.url.xyz') - self.assertEqual(results[0]['content'], 'Description') - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_news.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_news.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success" - } - """ - response = mock.Mock(text=json) - results = qwant_news.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_social.py b/searx/tests/engines/test_qwant_social.py deleted file mode 100644 index 6e87e9898..000000000 --- a/searx/tests/engines/test_qwant_social.py +++ /dev/null @@ -1,140 +0,0 @@ -from collections import defaultdict -import mock -from searx.engines import qwant_social -from searx.testing import SearxTestCase - - -class TestQwantSocialEngine(SearxTestCase): - - def test_request(self): - query = 'test_query' - dicto = defaultdict(dict) - dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' - params = qwant_social.request(query, dicto) - self.assertIn('url', params) - self.assertIn(query, params['url']) - self.assertIn('qwant.com', params['url']) - self.assertIn('fr_fr', params['url']) - - dicto['language'] = 'all' - params = qwant_social.request(query, dicto) - self.assertFalse('fr' in params['url']) - - def test_response(self): - self.assertRaises(AttributeError, qwant_social.response, None) - self.assertRaises(AttributeError, qwant_social.response, []) - self.assertRaises(AttributeError, qwant_social.response, '') - self.assertRaises(AttributeError, qwant_social.response, '[]') - - response = mock.Mock(text='{}') - self.assertEqual(qwant_social.response(response), []) - - response = mock.Mock(text='{"data": {}}') - self.assertEqual(qwant_social.response(response), []) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "items": [ - { - "_id": "dc0b3f24c93684c7d7f1b0a4c2d9f1b0", - "__index": 32, - "title": "Title", - "img": "img", - "desc": "Description", - "date": 1432643480, - "type": "twitter", - "card": "XXX", - "post": "603176590856556545", - "url": "http://www.url.xyz", - "userUrl": "https://twitter.com/XXX" - } - ], - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_social.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'Title') - self.assertEqual(results[0]['url'], 'http://www.url.xyz') - self.assertEqual(results[0]['content'], 'Description') - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_social.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_social.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success" - } - """ - response = mock.Mock(text=json) - results = qwant_social.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 4f3088adb..166184c4d 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -26,9 +26,6 @@ from searx.tests.engines.test_openstreetmap import * # noqa from searx.tests.engines.test_photon import * # noqa from searx.tests.engines.test_piratebay import * # noqa from searx.tests.engines.test_qwant import * # noqa -from searx.tests.engines.test_qwant_images import * # noqa -from searx.tests.engines.test_qwant_news import * # noqa -from searx.tests.engines.test_qwant_social import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa From e0774c849c48373c7a49515d5d769c5868596494 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 2 Jun 2015 22:11:47 +0200 Subject: [PATCH 4/4] Removed the keywords from the settings in qwant engine --- searx/engines/qwant.py | 24 +++++++++++++++++------- searx/settings.yml | 4 ---- searx/tests/engines/test_qwant.py | 14 +++++++++----- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 38bafb043..872bd4e95 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -19,7 +19,10 @@ categories = None paging = True language_support = True -search_url_keyword = None +category_to_keyword = {'general': 'web', + 'images': 'images', + 'news': 'news', + 'social media': 'social'} # search-url url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}' @@ -29,9 +32,15 @@ url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{q def request(query, params): offset = (params['pageno'] - 1) * 10 - params['url'] = url.format(keyword=search_url_keyword, - query=urlencode({'q': query}), - offset=offset) + if categories[0] and categories[0] in category_to_keyword: + + params['url'] = url.format(keyword=category_to_keyword[categories[0]], + query=urlencode({'q': query}), + offset=offset) + else: + params['url'] = url.format(keyword='web', + query=urlencode({'q': query}), + offset=offset) # add language tag if specified if params['language'] != 'all': @@ -61,12 +70,12 @@ def response(resp): res_url = result['url'] content = result['desc'] - if search_url_keyword == 'web': + if category_to_keyword.get(categories[0], '') == 'web': results.append({'title': title, 'content': content, 'url': res_url}) - elif search_url_keyword == 'images': + elif category_to_keyword.get(categories[0], '') == 'images': thumbnail_src = result['thumbnail'] img_src = result['media'] results.append({'template': 'images.html', @@ -76,7 +85,8 @@ def response(resp): 'thumbnail_src': thumbnail_src, 'img_src': img_src}) - elif search_url_keyword == 'news' or search_url_keyword == 'social': + elif (category_to_keyword.get(categories[0], '') == 'news' or + category_to_keyword.get(categories[0], '') == 'social'): published_date = datetime.fromtimestamp(result['date'], None) results.append({'url': res_url, diff --git a/searx/settings.yml b/searx/settings.yml index c84b810e1..1c2b01869 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -171,25 +171,21 @@ engines: - name : qwant engine : qwant shortcut : qw - search_url_keyword : web categories : general - name : qwant images engine : qwant shortcut : qwi - search_url_keyword : images categories : images - name : qwant news engine : qwant shortcut : qwn - search_url_keyword : news categories : news - name : qwant social engine : qwant shortcut : qws - search_url_keyword : social categories : social media - name : kickass diff --git a/searx/tests/engines/test_qwant.py b/searx/tests/engines/test_qwant.py index 6da4745b1..7d79d13d8 100644 --- a/searx/tests/engines/test_qwant.py +++ b/searx/tests/engines/test_qwant.py @@ -11,15 +11,19 @@ class TestQwantEngine(SearxTestCase): dicto = defaultdict(dict) dicto['pageno'] = 0 dicto['language'] = 'fr_FR' + qwant.categories = [''] params = qwant.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) + self.assertIn('web', params['url']) self.assertIn('qwant.com', params['url']) self.assertIn('fr_fr', params['url']) dicto['language'] = 'all' + qwant.categories = ['news'] params = qwant.request(query, dicto) self.assertFalse('fr' in params['url']) + self.assertIn('news', params['url']) def test_response(self): self.assertRaises(AttributeError, qwant.response, None) @@ -68,7 +72,7 @@ class TestQwantEngine(SearxTestCase): } """ response = mock.Mock(text=json) - qwant.search_url_keyword = 'web' + qwant.categories = ['general'] results = qwant.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -113,7 +117,7 @@ class TestQwantEngine(SearxTestCase): } """ response = mock.Mock(text=json) - qwant.search_url_keyword = 'images' + qwant.categories = ['images'] results = qwant.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -158,7 +162,7 @@ class TestQwantEngine(SearxTestCase): } """ response = mock.Mock(text=json) - qwant.search_url_keyword = 'news' + qwant.categories = ['news'] results = qwant.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -202,7 +206,7 @@ class TestQwantEngine(SearxTestCase): } """ response = mock.Mock(text=json) - qwant.search_url_keyword = 'social' + qwant.categories = ['social media'] results = qwant.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -246,7 +250,7 @@ class TestQwantEngine(SearxTestCase): } """ response = mock.Mock(text=json) - qwant.search_url_keyword = '' + qwant.categories = [''] results = qwant.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0)