@ -30,9 +30,11 @@ class Filter:
return page
def clean ( self , soup ) :
# Remove all ads
main_divs = soup . find ( ' div ' , { ' id ' : ' main ' } )
if main_divs is not None :
def remove_ads ( ) :
main_divs = soup . find ( ' div ' , { ' id ' : ' main ' } )
if main_divs is None :
return
result_divs = main_divs . findAll ( ' div ' , recursive = False )
# Only ads/sponsored content use classes in the list of result divs
@ -40,78 +42,92 @@ class Filter:
for div in ad_divs :
div . decompose ( )
# Remove unnecessary button(s)
for button in soup . find_all ( ' button ' ) :
button . decompose ( )
# Remove svg logos
for svg in soup . find_all ( ' svg ' ) :
svg . decompose ( )
# Update logo
logo = soup . find ( ' a ' , { ' class ' : ' l ' } )
if logo and self . mobile :
logo [ ' style ' ] = ' display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px; '
# Fix search bar length on mobile
try :
search_bar = soup . find ( ' header ' ) . find ( ' form ' ) . find ( ' div ' )
search_bar [ ' style ' ] = ' width: 100 % ; '
except AttributeError :
pass
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup . find_all ( ' a ' , href = True ) :
href = a [ ' href ' ]
if ' /advanced_search ' in href :
a . decompose ( )
continue
if ' url?q= ' in href :
# Strip unneeded arguments
result_link = urlparse . urlparse ( href )
result_link = parse_qs ( result_link . query ) [ ' q ' ] [ 0 ]
parsed_link = urlparse . urlparse ( result_link )
link_args = parse_qs ( parsed_link . query )
safe_args = { }
for arg in link_args . keys ( ) :
if arg in SKIP_ARGS :
continue
safe_args [ arg ] = link_args [ arg ]
# Remove original link query and replace with filtered args
result_link = result_link . replace ( parsed_link . query , ' ' )
if len ( safe_args ) > 1 :
result_link = result_link + urlparse . urlencode ( safe_args )
else :
result_link = result_link . replace ( ' ? ' , ' ' )
a [ ' href ' ] = result_link
# Add no-js option
if self . nojs :
nojs_link = soup . new_tag ( ' a ' )
nojs_link [ ' href ' ] = ' /window?location= ' + result_link
nojs_link [ ' style ' ] = ' display:block;width:100 % ; '
nojs_link . string = ' NoJS Link: ' + nojs_link [ ' href ' ]
a . append ( BeautifulSoup ( ' <br><hr><br> ' , ' html.parser ' ) )
a . append ( nojs_link )
# Set up dark mode if active
if self . dark :
soup . find ( ' html ' ) [ ' style ' ] = ' scrollbar-color: #333 #111; '
for input_element in soup . findAll ( ' input ' ) :
input_element [ ' style ' ] = ' color:#fff; '
# Ensure no extra scripts passed through
try :
for script in soup ( ' script ' ) :
script . decompose ( )
soup . find ( ' div ' , id = ' sfooter ' ) . decompose ( )
except Exception :
pass
def sync_images ( ) :
for img in soup . find_all ( ' img ' ) :
if img [ ' src ' ] . startswith ( ' // ' ) :
img [ ' src ' ] = ' https: ' + img [ ' src ' ]
img [ ' src ' ] = ' /tmp?image_url= ' + img [ ' src ' ]
def update_styling ( ) :
# Remove unnecessary button(s)
for button in soup . find_all ( ' button ' ) :
button . decompose ( )
# Remove svg logos
for svg in soup . find_all ( ' svg ' ) :
svg . decompose ( )
# Update logo
logo = soup . find ( ' a ' , { ' class ' : ' l ' } )
if logo and self . mobile :
logo [ ' style ' ] = ' display:flex; justify-content:center; align-items:center; color:#685e79; ' \
' font-size:18px; '
# Fix search bar length on mobile
try :
search_bar = soup . find ( ' header ' ) . find ( ' form ' ) . find ( ' div ' )
search_bar [ ' style ' ] = ' width: 100 % ; '
except AttributeError :
pass
# Set up dark mode if active
if self . dark :
soup . find ( ' html ' ) [ ' style ' ] = ' scrollbar-color: #333 #111; '
for input_element in soup . findAll ( ' input ' ) :
input_element [ ' style ' ] = ' color:#fff; '
def update_links ( ) :
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup . find_all ( ' a ' , href = True ) :
href = a [ ' href ' ]
if ' /advanced_search ' in href :
a . decompose ( )
continue
if ' url?q= ' in href :
# Strip unneeded arguments
result_link = urlparse . urlparse ( href )
result_link = parse_qs ( result_link . query ) [ ' q ' ] [ 0 ]
parsed_link = urlparse . urlparse ( result_link )
link_args = parse_qs ( parsed_link . query )
safe_args = { }
for arg in link_args . keys ( ) :
if arg in SKIP_ARGS :
continue
safe_args [ arg ] = link_args [ arg ]
# Remove original link query and replace with filtered args
result_link = result_link . replace ( parsed_link . query , ' ' )
if len ( safe_args ) > 1 :
result_link = result_link + urlparse . urlencode ( safe_args )
else :
result_link = result_link . replace ( ' ? ' , ' ' )
a [ ' href ' ] = result_link
# Add no-js option
if self . nojs :
nojs_link = soup . new_tag ( ' a ' )
nojs_link [ ' href ' ] = ' /window?location= ' + result_link
nojs_link [ ' style ' ] = ' display:block;width:100 % ; '
nojs_link . string = ' NoJS Link: ' + nojs_link [ ' href ' ]
a . append ( BeautifulSoup ( ' <br><hr><br> ' , ' html.parser ' ) )
a . append ( nojs_link )
# Ensure no extra scripts passed through
try :
for script in soup ( ' script ' ) :
script . decompose ( )
soup . find ( ' div ' , id = ' sfooter ' ) . decompose ( )
except Exception :
pass
remove_ads ( )
sync_images ( )
update_styling ( )
update_links ( )
return soup