Added better multilingual support, updated filter

Results page now includes method for switching to "All Languages" from
whichever language is specified as the primary in the config (see #74).

Also removes the non-Whoogle links from the page footer, leaving only
the page navigation controls

Added support for the date range filter on the results page, though I'd
still recommend using the ":past <unit>" query instead.
pull/86/head
Ben Busby 4 years ago
parent 6ec65f8754
commit 4324fcd8f8

@ -116,14 +116,11 @@ class Filter:
for script in soup('script'):
script.decompose()
# Remove google's language/time config
st_card = soup.find('div', id='st-card')
if st_card:
st_card.decompose()
footer = soup.find('div', id='sfooter')
# Update default footer and header
footer = soup.find('footer')
if footer:
footer.decompose()
# Remove divs that have multiple links beyond just page navigation
[_.decompose() for _ in footer.find_all('div', recursive=False) if len(_.find_all('a', href=True)) > 2]
header = soup.find('header')
if header:
@ -144,12 +141,12 @@ class Filter:
return
question_divs = [_ for _ in self.main_divs.find_all('div', recursive=False) if len(_.find_all('h2')) > 0]
for x in question_divs:
questions = [_ for _ in x.find_all('div', recursive=True) if _.text.endswith('?')]
for question_div in question_divs:
questions = [_ for _ in question_div.find_all('div', recursive=True) if _.text.endswith('?')]
for question in questions:
question['style'] = 'padding: 10px; font-style: italic;'
def update_element_src(self, element, mimetype):
def update_element_src(self, element, mime):
element_src = element['src']
if element_src.startswith('//'):
element_src = 'https:' + element_src
@ -163,7 +160,7 @@ class Filter:
return
element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \
'&type=' + urlparse.quote(mimetype)
'&type=' + urlparse.quote(mime)
# TODO: Non-mobile image results link to website instead of image
# if not self.mobile:
# img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser'))

@ -12,7 +12,7 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
# Valid query params
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source']
def gen_user_agent(is_mobile):
@ -28,11 +28,22 @@ def gen_user_agent(is_mobile):
def gen_query(query, args, config, near_city=None):
param_dict = {key: '' for key in VALID_PARAMS}
# Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month"
if ':past' in query:
sub_lang = ''
if ':past' in query and 'tbs' not in args:
time_range = str.strip(query.split(':past', 1)[-1])
param_dict['tbs'] = '&tbs=qdr:' + str.lower(time_range[0])
param_dict['tbs'] = '&tbs=' + ('qdr:' + str.lower(time_range[0]))
elif 'tbs' in args:
result_tbs = args.get('tbs')
param_dict['tbs'] = '&tbs=' + result_tbs
# Occasionally the 'tbs' param provided by google also contains a field for 'lr', but formatted
# strangely. This is a (admittedly not very elegant) solution for this.
# Ex/ &tbs=qdr:h,lr:lang_1pl --> the lr param needs to be extracted and have the "1" digit removed in this case
sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
sub_lang = sub_lang[0][sub_lang[0].find('lr:') + 3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
# Ensure search query is parsable
query = urlparse.quote(query)
@ -49,13 +60,20 @@ def gen_query(query, args, config, near_city=None):
if near_city:
param_dict['near'] = '&near=' + urlparse.quote(near_city)
# Set language for results (lr) and interface (hl)
param_dict['lr'] = '&lr=' + config.lang + '&hl=' + config.lang.replace('lang_', '')
# Set language for results (lr) if source isn't set, otherwise use the result
# language param provided by google (but with the strange digit(s) removed)
if 'source' in args:
param_dict['source'] = '&source=' + args.get('source')
param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else ''
else:
param_dict['lr'] = '&lr=' + config.lang
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
param_dict['hl'] = '&hl=' + config.lang.replace('lang_', '')
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
for val in param_dict.values():
if not val or val is None:
if not val:
continue
query += val

@ -87,6 +87,7 @@ def after_request_func(response):
for key in session_list:
session.pop(key)
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
return response

Loading…
Cancel
Save