From cd7849ffc82dc319e25ed80cae03dc021e5c7263 Mon Sep 17 00:00:00 2001 From: Denis Wernert Date: Tue, 2 Oct 2018 15:08:43 +0200 Subject: [PATCH 1/6] Adds the Unsplash image engine --- searx/engines/unsplash.py | 39 +++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 5 +++++ 2 files changed, 44 insertions(+) create mode 100644 searx/engines/unsplash.py diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py new file mode 100644 index 000000000..03db97788 --- /dev/null +++ b/searx/engines/unsplash.py @@ -0,0 +1,39 @@ +""" + Unsplash + + @website https://unsplash.com + @provide-api yes (https://unsplash.com/developers) + + @using-api no + @results JSON (using search portal's infiniscroll API) + @stable no (JSON format could change any time) + @parse url, title, img_src, thumbnail_src +""" + +from searx.url_utils import urlencode +from json import loads + +url = 'https://unsplash.com/' +search_url = url + 'napi/search/photos?' +categories = ['images'] +page_size = 20 +paging = True + + +def request(query, params): + params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) + return params + + +def response(resp): + results = [] + json_data = loads(resp.text) + + for result in json_data['results']: + results.append({'template': 'images.html', + 'url': result['links']['html'], + 'thumbnail_src': result['urls']['thumb'], + 'img_src': result['urls']['full'], + 'title': result['description'], + 'content': ''}) + return results diff --git a/searx/settings.yml b/searx/settings.yml index 6e1b5fb74..b48fada8e 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -642,6 +642,11 @@ engines: # content_xpath : //*[@class="meaning"] # shortcut : ud + - name : unsplash + engine : unsplash + disabled: True + shortcut : us + - name : yahoo engine : yahoo shortcut : yh From 72d063d27d9277b12efe2ec16fe1c95ef651c2a2 Mon Sep 17 00:00:00 2001 From: Denis Wernert Date: Mon, 8 Oct 2018 14:01:35 +0200 Subject: [PATCH 2/6] Uses the raw url for the image result, rather than the full size result. --- searx/engines/unsplash.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 03db97788..04a943297 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -29,11 +29,12 @@ def response(resp): results = [] json_data = loads(resp.text) - for result in json_data['results']: - results.append({'template': 'images.html', - 'url': result['links']['html'], - 'thumbnail_src': result['urls']['thumb'], - 'img_src': result['urls']['full'], - 'title': result['description'], - 'content': ''}) + if 'results' in json_data: + for result in json_data['results']: + results.append({'template': 'images.html', + 'url': result['links']['html'], + 'thumbnail_src': result['urls']['thumb'], + 'img_src': result['urls']['raw'], + 'title': result['description'], + 'content': ''}) return results From ee07a5e75005deb5d846f404f350aefd96b11b2d Mon Sep 17 00:00:00 2001 From: Denis Wernert Date: Mon, 8 Oct 2018 14:01:55 +0200 Subject: [PATCH 3/6] Adds a unit test for the unsplash engine --- tests/unit/engines/test_unsplash.py | 38 ++++ tests/unit/engines/unsplash_fixture.json | 241 +++++++++++++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 tests/unit/engines/test_unsplash.py create mode 100644 tests/unit/engines/unsplash_fixture.json diff --git a/tests/unit/engines/test_unsplash.py b/tests/unit/engines/test_unsplash.py new file mode 100644 index 000000000..cb9e683c4 --- /dev/null +++ b/tests/unit/engines/test_unsplash.py @@ -0,0 +1,38 @@ +from collections import defaultdict +import mock +from searx.testing import SearxTestCase +from searx.engines import unsplash + + +class TestUnsplashEngine(SearxTestCase): + def test_request(self): + query = 'penguin' + _dict = defaultdict(dict) + _dict['pageno'] = 1 + params = unsplash.request(query, _dict) + + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + + def test_response(self): + resp = mock.Mock(text='{}') + result = unsplash.response(resp) + self.assertEqual([], result) + + resp.text = '{"results": []}' + result = unsplash.response(resp) + self.assertEqual([], result) + + # Sourced from https://unsplash.com/napi/search/photos?query=penguin&xp=&per_page=20&page=2 + with open('./tests/unit/engines/unsplash_fixture.json') as fixture: + resp.text = fixture.read() + + result = unsplash.response(resp) + self.assertEqual(len(result), 2) + self.assertEqual(result[0]['title'], 'low angle photography of swimming penguin') + self.assertEqual(result[0]['url'], 'https://unsplash.com/photos/FY8d721UO_4') + self.assertEqual(result[0]['thumbnail_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&q=80\ +&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=a9b9e56e63efc6f4611a87ce7e9a48f8') + self.assertEqual(result[0]['img_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5\ +&ixid=eyJhcHBfaWQiOjEyMDd9&s=095c5fc319c5a77c705f49ad63e0f195') + self.assertEqual(result[0]['content'], '') diff --git a/tests/unit/engines/unsplash_fixture.json b/tests/unit/engines/unsplash_fixture.json new file mode 100644 index 000000000..4c8db2a2c --- /dev/null +++ b/tests/unit/engines/unsplash_fixture.json @@ -0,0 +1,241 @@ +{ + "total": 2, + "total_pages": 1, + "results": [ + { + "id": "FY8d721UO_4", + "created_at": "2018-04-12T14:20:35-04:00", + "updated_at": "2018-08-28T20:58:33-04:00", + "width": 3891, + "height": 5829, + "color": "#152C33", + "description": "low angle photography of swimming penguin", + "urls": { + "raw": "https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&ixid=eyJhcHBfaWQiOjEyMDd9&s=095c5fc319c5a77c705f49ad63e0f195", + "full": "https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&q=85&fm=jpg&crop=entropy&cs=srgb&ixid=eyJhcHBfaWQiOjEyMDd9&s=74be977849c173d6929636d491a760c3", + "regular": "https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&q=80&fm=jpg&crop=entropy&cs=tinysrgb&w=1080&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=ad65df26970bd010085f0ca25434de33", + "small": "https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&q=80&fm=jpg&crop=entropy&cs=tinysrgb&w=400&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=5d2edfd073c31eb8ee7b305222bdc5a2", + "thumb": "https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&q=80&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=a9b9e56e63efc6f4611a87ce7e9a48f8" + }, + "links": { + "self": "https://api.unsplash.com/photos/FY8d721UO_4", + "html": "https://unsplash.com/photos/FY8d721UO_4", + "download": "https://unsplash.com/photos/FY8d721UO_4/download", + "download_location": "https://api.unsplash.com/photos/FY8d721UO_4/download" + }, + "categories": [], + "sponsored": false, + "likes": 31, + "liked_by_user": false, + "current_user_collections": [], + "slug": null, + "user": { + "id": "N4gE4mrG8lE", + "updated_at": "2018-10-03T02:51:19-04:00", + "username": "gaspanik", + "name": "Masaaki Komori", + "first_name": "Masaaki", + "last_name": "Komori", + "twitter_username": "cipher", + "portfolio_url": "https://www.instagram.com/cipher/", + "bio": null, + "location": "Tokyo, JAPAN", + "links": { + "self": "https://api.unsplash.com/users/gaspanik", + "html": "https://unsplash.com/@gaspanik", + "photos": "https://api.unsplash.com/users/gaspanik/photos", + "likes": "https://api.unsplash.com/users/gaspanik/likes", + "portfolio": "https://api.unsplash.com/users/gaspanik/portfolio", + "following": "https://api.unsplash.com/users/gaspanik/following", + "followers": "https://api.unsplash.com/users/gaspanik/followers" + }, + "profile_image": { + "small": "https://images.unsplash.com/profile-fb-1502270358-e7c86c1011ce.jpg?ixlib=rb-0.3.5&q=80&fm=jpg&crop=faces&cs=tinysrgb&fit=crop&h=32&w=32&s=9fe12f6d177bd6fdbd56d233a80c01a3", + "medium": "https://images.unsplash.com/profile-fb-1502270358-e7c86c1011ce.jpg?ixlib=rb-0.3.5&q=80&fm=jpg&crop=faces&cs=tinysrgb&fit=crop&h=64&w=64&s=6ad7d156b62e438ae9dc794cba712fff", + "large": "https://images.unsplash.com/profile-fb-1502270358-e7c86c1011ce.jpg?ixlib=rb-0.3.5&q=80&fm=jpg&crop=faces&cs=tinysrgb&fit=crop&h=128&w=128&s=13a08a2e72e7d11632410e92bd3a9406" + }, + "instagram_username": "cipher", + "total_collections": 0, + "total_likes": 406, + "total_photos": 196 + }, + "tags": [ + { + "title": "animal" + }, + { + "title": "water" + }, + { + "title": "swim" + }, + { + "title": "aquarium" + }, + { + "title": "wallpaper" + }, + { + "title": "blue" + }, + { + "title": "sealife" + }, + { + "title": "wildlife" + }, + { + "title": "bird" + }, + { + "title": "deep sea" + }, + { + "title": "fish" + }, + { + "title": "water life" + } + ], + "photo_tags": [ + { + "title": "animal" + }, + { + "title": "water" + }, + { + "title": "swim" + }, + { + "title": "aquarium" + }, + { + "title": "wallpaper" + } + ] + }, + { + "id": "ayKyc01xLWA", + "created_at": "2018-02-16T23:14:31-05:00", + "updated_at": "2018-08-28T20:48:27-04:00", + "width": 4928, + "height": 3264, + "color": "#161618", + "description": "black and white penguins on ice field", + "urls": { + "raw": "https://images.unsplash.com/photo-1518840801558-9770b4a34eeb?ixlib=rb-0.3.5&ixid=eyJhcHBfaWQiOjEyMDd9&s=4e107a2bc49ab561ba6272eea2ec725d", + "full": "https://images.unsplash.com/photo-1518840801558-9770b4a34eeb?ixlib=rb-0.3.5&q=85&fm=jpg&crop=entropy&cs=srgb&ixid=eyJhcHBfaWQiOjEyMDd9&s=f9b1e4d4572ab44efb2cf3d601d2b4d9", + "regular": "https://images.unsplash.com/photo-1518840801558-9770b4a34eeb?ixlib=rb-0.3.5&q=80&fm=jpg&crop=entropy&cs=tinysrgb&w=1080&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=4430cedb63841f1fe055d5005316cc96", + "small": "https://images.unsplash.com/photo-1518840801558-9770b4a34eeb?ixlib=rb-0.3.5&q=80&fm=jpg&crop=entropy&cs=tinysrgb&w=400&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=ee73c7af22ce445d408e240821ce07af", + "thumb": "https://images.unsplash.com/photo-1518840801558-9770b4a34eeb?ixlib=rb-0.3.5&q=80&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=934302390d383cad8c571905e3a80bac" + }, + "links": { + "self": "https://api.unsplash.com/photos/ayKyc01xLWA", + "html": "https://unsplash.com/photos/ayKyc01xLWA", + "download": "https://unsplash.com/photos/ayKyc01xLWA/download", + "download_location": "https://api.unsplash.com/photos/ayKyc01xLWA/download" + }, + "categories": [], + "sponsored": false, + "likes": 37, + "liked_by_user": false, + "current_user_collections": [], + "slug": null, + "user": { + "id": "tRb_KGw60Xk", + "updated_at": "2018-09-20T11:51:54-04:00", + "username": "ghost_cat", + "name": "Danielle Barnes", + "first_name": "Danielle", + "last_name": "Barnes", + "twitter_username": null, + "portfolio_url": null, + "bio": null, + "location": null, + "links": { + "self": "https://api.unsplash.com/users/ghost_cat", + "html": "https://unsplash.com/@ghost_cat", + "photos": "https://api.unsplash.com/users/ghost_cat/photos", + "likes": "https://api.unsplash.com/users/ghost_cat/likes", + "portfolio": "https://api.unsplash.com/users/ghost_cat/portfolio", + "following": "https://api.unsplash.com/users/ghost_cat/following", + "followers": "https://api.unsplash.com/users/ghost_cat/followers" + }, + "profile_image": { + "small": "https://images.unsplash.com/profile-fb-1508491082-ae77f53e9ac3.jpg?ixlib=rb-0.3.5&q=80&fm=jpg&crop=faces&cs=tinysrgb&fit=crop&h=32&w=32&s=751bf6a557763648d52ffd7e60e79436", + "medium": "https://images.unsplash.com/profile-fb-1508491082-ae77f53e9ac3.jpg?ixlib=rb-0.3.5&q=80&fm=jpg&crop=faces&cs=tinysrgb&fit=crop&h=64&w=64&s=e46cd1c8713035f045130e1b093b981e", + "large": "https://images.unsplash.com/profile-fb-1508491082-ae77f53e9ac3.jpg?ixlib=rb-0.3.5&q=80&fm=jpg&crop=faces&cs=tinysrgb&fit=crop&h=128&w=128&s=352eabcf107c3ce95fe51a18485f116b" + }, + "instagram_username": null, + "total_collections": 0, + "total_likes": 0, + "total_photos": 21 + }, + "tags": [ + { + "title": "ice" + }, + { + "title": "bird" + }, + { + "title": "ice field" + }, + { + "title": "iceberg" + }, + { + "title": "snow" + }, + { + "title": "frozen" + }, + { + "title": "animal" + }, + { + "title": "wildlife" + }, + { + "title": "wild" + }, + { + "title": "antarctica" + }, + { + "title": "sunshine" + }, + { + "title": "daylight" + }, + { + "title": "wilderness" + }, + { + "title": "south pole" + }, + { + "title": "flock" + } + ], + "photo_tags": [ + { + "title": "ice" + }, + { + "title": "bird" + }, + { + "title": "ice field" + }, + { + "title": "iceberg" + }, + { + "title": "snow" + } + ] + } + ] +} \ No newline at end of file From b9ada93b3ade2b4268bdc898e2c67b156b4dba92 Mon Sep 17 00:00:00 2001 From: Denis Wernert Date: Mon, 8 Oct 2018 14:56:20 +0200 Subject: [PATCH 4/6] Removes what looks like tracking parameters --- searx/engines/unsplash.py | 20 ++++++++++++++++---- tests/unit/engines/test_unsplash.py | 6 +++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 04a943297..2e8d6fdfc 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -10,7 +10,7 @@ @parse url, title, img_src, thumbnail_src """ -from searx.url_utils import urlencode +from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl from json import loads url = 'https://unsplash.com/' @@ -20,6 +20,18 @@ page_size = 20 paging = True +def clean_url(url): + parsed = urlparse(url) + query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']] + + return urlunparse((parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + urlencode(query), + parsed.fragment)) + + def request(query, params): params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) return params @@ -32,9 +44,9 @@ def response(resp): if 'results' in json_data: for result in json_data['results']: results.append({'template': 'images.html', - 'url': result['links']['html'], - 'thumbnail_src': result['urls']['thumb'], - 'img_src': result['urls']['raw'], + 'url': clean_url(result['links']['html']), + 'thumbnail_src': clean_url(result['urls']['thumb']), + 'img_src': clean_url(result['urls']['raw']), 'title': result['description'], 'content': ''}) return results diff --git a/tests/unit/engines/test_unsplash.py b/tests/unit/engines/test_unsplash.py index cb9e683c4..4501de906 100644 --- a/tests/unit/engines/test_unsplash.py +++ b/tests/unit/engines/test_unsplash.py @@ -32,7 +32,7 @@ class TestUnsplashEngine(SearxTestCase): self.assertEqual(result[0]['title'], 'low angle photography of swimming penguin') self.assertEqual(result[0]['url'], 'https://unsplash.com/photos/FY8d721UO_4') self.assertEqual(result[0]['thumbnail_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&q=80\ -&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=a9b9e56e63efc6f4611a87ce7e9a48f8') - self.assertEqual(result[0]['img_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5\ -&ixid=eyJhcHBfaWQiOjEyMDd9&s=095c5fc319c5a77c705f49ad63e0f195') +&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max') + self.assertEqual(result[0]['img_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c\ +?ixlib=rb-0.3.5') self.assertEqual(result[0]['content'], '') From c1af891707a62228fd6fe5a6f3a9961a360a5173 Mon Sep 17 00:00:00 2001 From: pelag0s Date: Sat, 12 Jan 2019 11:06:44 +0100 Subject: [PATCH 5/6] docker: allow configuring http proxy via env variables --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 1680c7bb2..51ecf9bd3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,9 @@ RUN adduser -D -h /usr/local/searx -s /bin/sh searx searx \ && echo 'sed -i "s|base_url : False|base_url : $BASE_URL|g" searx/settings.yml' >> run.sh \ && echo 'sed -i "s/image_proxy : False/image_proxy : $IMAGE_PROXY/g" searx/settings.yml' >> run.sh \ && echo 'sed -i "s/ultrasecretkey/`openssl rand -hex 16`/g" searx/settings.yml' >> run.sh \ + && echo 'if [ -n "$HTTP_PROXY_URL" ] || [ -n "$HTTPS_PROXY_URL" ]; then' >> run.sh \ + && echo ' sed -i "s~^# proxies :~ proxies:\\n http: ${HTTP_PROXY_URL}\\n https: ${HTTPS_PROXY_URL}\\n~" searx/settings.yml' >> run.sh \ + && echo 'fi' >> run.sh \ && echo 'python searx/webapp.py' >> run.sh \ && chmod +x run.sh From 2061c59ca1f857ae9b9744c2b873c09f18eae7a6 Mon Sep 17 00:00:00 2001 From: pelag0s Date: Mon, 14 Jan 2019 09:24:57 +0100 Subject: [PATCH 6/6] Define http proxy variables in Dockerfile before using them --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 51ecf9bd3..95e21813f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM alpine:3.8 LABEL maintainer="searx " LABEL description="A privacy-respecting, hackable metasearch engine." -ENV BASE_URL=False IMAGE_PROXY=False +ENV BASE_URL=False IMAGE_PROXY=False HTTP_PROXY_URL= HTTPS_PROXY_URL= EXPOSE 8888 WORKDIR /usr/local/searx CMD ["/sbin/tini","--","/usr/local/searx/run.sh"]