Merge pull request #746 from kvch/moar-time-range-support

Support time range search in more engines
dependabot/pip/master/sphinx-6.1.3
Adam Tauber 8 years ago committed by GitHub
commit e23c8f954b

@ -24,11 +24,16 @@ import re
categories = ['images'] categories = ['images']
paging = True paging = True
safesearch = True safesearch = True
time_range_support = True
# search-url # search-url
base_url = 'https://www.bing.com/' base_url = 'https://www.bing.com/'
search_string = 'images/search?{query}&count=10&first={offset}' search_string = 'images/search?{query}&count=10&first={offset}'
time_range_string = '&qft=+filterui:age-lt{interval}'
thumb_url = "https://www.bing.com/th?id={ihk}" thumb_url = "https://www.bing.com/th?id={ihk}"
time_range_dict = {'day': '1440',
'week': '10080',
'month': '43200'}
# safesearch definitions # safesearch definitions
safesearch_types = {2: 'STRICT', safesearch_types = {2: 'STRICT',
@ -58,6 +63,8 @@ def request(query, params):
'&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
params['url'] = base_url + search_path params['url'] = base_url + search_path
if params['time_range'] in time_range_dict:
params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
return params return params

@ -22,10 +22,15 @@ from searx.utils import list_get
categories = ['news'] categories = ['news']
paging = True paging = True
language_support = True language_support = True
time_range_support = True
# search-url # search-url
base_url = 'https://www.bing.com/' base_url = 'https://www.bing.com/'
search_string = 'news/search?{query}&first={offset}&format=RSS' search_string = 'news/search?{query}&first={offset}&format=RSS'
search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
time_range_dict = {'day': '7',
'week': '8',
'month': '9'}
# remove click # remove click
@ -46,6 +51,19 @@ def image_url_cleanup(url_string):
return url_string return url_string
def _get_url(query, language, offset, time_range):
if time_range in time_range_dict:
search_path = search_string_with_time.format(
query=urlencode({'q': query, 'setmkt': language}),
offset=offset,
interval=time_range_dict[time_range])
else:
search_path = search_string.format(
query=urlencode({'q': query, 'setmkt': language}),
offset=offset)
return base_url + search_path
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
@ -55,11 +73,7 @@ def request(query, params):
else: else:
language = params['language'].replace('_', '-') language = params['language'].replace('_', '-')
search_path = search_string.format( params['url'] = _get_url(query, language, offset, params['time_range'])
query=urlencode({'q': query, 'setmkt': language}),
offset=offset)
params['url'] = base_url + search_path
return params return params

@ -14,6 +14,7 @@
from urllib import urlencode from urllib import urlencode
from json import loads from json import loads
from time import time
import re import re
from searx.engines import logger from searx.engines import logger
@ -24,21 +25,31 @@ categories = ['images']
url = 'https://www.flickr.com/' url = 'https://www.flickr.com/'
search_url = url + 'search?{query}&page={page}' search_url = url + 'search?{query}&page={page}'
time_range_url = '&min_upload_date={start}&max_upload_date={end}'
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
paging = True paging = True
time_range_support = True
time_range_dict = {'day': 60 * 60 * 24,
'week': 60 * 60 * 24 * 7,
'month': 60 * 60 * 24 * 7 * 4}
def build_flickr_url(user_id, photo_id): def build_flickr_url(user_id, photo_id):
return photo_url.format(userid=user_id, photoid=photo_id) return photo_url.format(userid=user_id, photoid=photo_id)
def request(query, params): def _get_time_range_url(time_range):
params['url'] = search_url.format(query=urlencode({'text': query}), if time_range in time_range_dict:
page=params['pageno']) return time_range_url.format(start=time(), end=str(int(time()) - time_range_dict[time_range]))
return ''
def request(query, params):
params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno'])
+ _get_time_range_url(params['time_range']))
return params return params

@ -17,10 +17,15 @@ from searx.utils import list_get
categories = ['videos', 'music'] categories = ['videos', 'music']
paging = True paging = True
language_support = False language_support = False
time_range_support = True
# search-url # search-url
base_url = 'https://www.youtube.com/results' base_url = 'https://www.youtube.com/results'
search_url = base_url + '?search_query={query}&page={page}' search_url = base_url + '?search_query={query}&page={page}'
time_range_url = '&sp=EgII{time_range}%253D%253D'
time_range_dict = {'day': 'Ag',
'week': 'Aw',
'month': 'BA'}
embedded_url = '<iframe width="540" height="304" ' +\ embedded_url = '<iframe width="540" height="304" ' +\
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\ 'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
@ -47,6 +52,8 @@ def extract_text_from_dom(result, xpath):
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=quote_plus(query), params['url'] = search_url.format(query=quote_plus(query),
page=params['pageno']) page=params['pageno'])
if params['time_range'] in time_range_dict:
params['url'] += time_range_url.format(time_range=time_range_dict[params['time_range']])
return params return params

@ -158,6 +158,7 @@
<th>{{ _("Engine name") }}</th> <th>{{ _("Engine name") }}</th>
<th>{{ _("Shortcut") }}</th> <th>{{ _("Shortcut") }}</th>
<th>{{ _("SafeSearch") }}</th> <th>{{ _("SafeSearch") }}</th>
<th>{{ _("Time range") }}</th>
<th>{{ _("Avg. time") }}</th> <th>{{ _("Avg. time") }}</th>
<th>{{ _("Max time") }}</th> <th>{{ _("Max time") }}</th>
{% else %} {% else %}
@ -179,6 +180,7 @@
<th>{{ search_engine.name }}</th> <th>{{ search_engine.name }}</th>
<td>{{ shortcuts[search_engine.name] }}</td> <td>{{ shortcuts[search_engine.name] }}</td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
{% else %} {% else %}

@ -13,6 +13,7 @@ class TestBingImagesEngine(SearxTestCase):
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'fr_FR' dicto['language'] = 'fr_FR'
dicto['safesearch'] = 1 dicto['safesearch'] = 1
dicto['time_range'] = ''
params = bing_images.request(query, dicto) params = bing_images.request(query, dicto)
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])

@ -12,6 +12,7 @@ class TestBingNewsEngine(SearxTestCase):
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'fr_FR' dicto['language'] = 'fr_FR'
dicto['time_range'] = ''
params = bing_news.request(query, dicto) params = bing_news.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])

@ -15,6 +15,7 @@ class TestFlickrNoapiEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['time_range'] = ''
params = flickr_noapi.request(query, dicto) params = flickr_noapi.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])

@ -11,6 +11,7 @@ class TestYoutubeNoAPIEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 0 dicto['pageno'] = 0
dicto['time_range'] = ''
params = youtube_noapi.request(query, dicto) params = youtube_noapi.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])

Loading…
Cancel
Save