diff --git a/app/filter.py b/app/filter.py index acdd570..6708368 100644 --- a/app/filter.py +++ b/app/filter.py @@ -30,9 +30,11 @@ class Filter: return page def clean(self, soup): - # Remove all ads - main_divs = soup.find('div', {'id': 'main'}) - if main_divs is not None: + def remove_ads(): + main_divs = soup.find('div', {'id': 'main'}) + if main_divs is None: + return + result_divs = main_divs.findAll('div', recursive=False) # Only ads/sponsored content use classes in the list of result divs @@ -40,78 +42,92 @@ class Filter: for div in ad_divs: div.decompose() - # Remove unnecessary button(s) - for button in soup.find_all('button'): - button.decompose() - - # Remove svg logos - for svg in soup.find_all('svg'): - svg.decompose() - - # Update logo - logo = soup.find('a', {'class': 'l'}) - if logo and self.mobile: - logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px;' - - # Fix search bar length on mobile - try: - search_bar = soup.find('header').find('form').find('div') - search_bar['style'] = 'width: 100%;' - except AttributeError: - pass - - # Replace hrefs with only the intended destination (no "utm" type tags) - for a in soup.find_all('a', href=True): - href = a['href'] - if '/advanced_search' in href: - a.decompose() - continue - - if 'url?q=' in href: - # Strip unneeded arguments - result_link = urlparse.urlparse(href) - result_link = parse_qs(result_link.query)['q'][0] - - parsed_link = urlparse.urlparse(result_link) - link_args = parse_qs(parsed_link.query) - safe_args = {} - - for arg in link_args.keys(): - if arg in SKIP_ARGS: - continue - - safe_args[arg] = link_args[arg] - - # Remove original link query and replace with filtered args - result_link = result_link.replace(parsed_link.query, '') - if len(safe_args) > 1: - result_link = result_link + urlparse.urlencode(safe_args) - else: - result_link = result_link.replace('?', '') - - a['href'] = result_link - - # Add no-js option - if self.nojs: - nojs_link = soup.new_tag('a') - nojs_link['href'] = '/window?location=' + result_link - nojs_link['style'] = 'display:block;width:100%;' - nojs_link.string = 'NoJS Link: ' + nojs_link['href'] - a.append(BeautifulSoup('


', 'html.parser')) - a.append(nojs_link) - - # Set up dark mode if active - if self.dark: - soup.find('html')['style'] = 'scrollbar-color: #333 #111;' - for input_element in soup.findAll('input'): - input_element['style'] = 'color:#fff;' - - # Ensure no extra scripts passed through - try: - for script in soup('script'): - script.decompose() - soup.find('div', id='sfooter').decompose() - except Exception: - pass - + def sync_images(): + for img in soup.find_all('img'): + if img['src'].startswith('//'): + img['src'] = 'https:' + img['src'] + + img['src'] = '/tmp?image_url=' + img['src'] + + def update_styling(): + # Remove unnecessary button(s) + for button in soup.find_all('button'): + button.decompose() + + # Remove svg logos + for svg in soup.find_all('svg'): + svg.decompose() + + # Update logo + logo = soup.find('a', {'class': 'l'}) + if logo and self.mobile: + logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \ + 'font-size:18px; ' + + # Fix search bar length on mobile + try: + search_bar = soup.find('header').find('form').find('div') + search_bar['style'] = 'width: 100%;' + except AttributeError: + pass + + # Set up dark mode if active + if self.dark: + soup.find('html')['style'] = 'scrollbar-color: #333 #111;' + for input_element in soup.findAll('input'): + input_element['style'] = 'color:#fff;' + + def update_links(): + # Replace hrefs with only the intended destination (no "utm" type tags) + for a in soup.find_all('a', href=True): + href = a['href'] + if '/advanced_search' in href: + a.decompose() + continue + + if 'url?q=' in href: + # Strip unneeded arguments + result_link = urlparse.urlparse(href) + result_link = parse_qs(result_link.query)['q'][0] + + parsed_link = urlparse.urlparse(result_link) + link_args = parse_qs(parsed_link.query) + safe_args = {} + + for arg in link_args.keys(): + if arg in SKIP_ARGS: + continue + + safe_args[arg] = link_args[arg] + + # Remove original link query and replace with filtered args + result_link = result_link.replace(parsed_link.query, '') + if len(safe_args) > 1: + result_link = result_link + urlparse.urlencode(safe_args) + else: + result_link = result_link.replace('?', '') + + a['href'] = result_link + + # Add no-js option + if self.nojs: + nojs_link = soup.new_tag('a') + nojs_link['href'] = '/window?location=' + result_link + nojs_link['style'] = 'display:block;width:100%;' + nojs_link.string = 'NoJS Link: ' + nojs_link['href'] + a.append(BeautifulSoup('


', 'html.parser')) + a.append(nojs_link) + + # Ensure no extra scripts passed through + try: + for script in soup('script'): + script.decompose() + soup.find('div', id='sfooter').decompose() + except Exception: + pass + + remove_ads() + sync_images() + update_styling() + update_links() return soup diff --git a/app/request.py b/app/request.py index 6325def..bb3be7a 100644 --- a/app/request.py +++ b/app/request.py @@ -1,5 +1,4 @@ from app import rhyme -from app.filter import Filter from io import BytesIO import pycurl import urllib.parse as urlparse @@ -60,7 +59,7 @@ class Request: def __getitem__(self, name): return getattr(self, name) - def send(self, base_url=SEARCH_URL, query=''): + def send(self, base_url=SEARCH_URL, query='', return_bytes=False): response_header = [] b_obj = BytesIO() @@ -73,4 +72,7 @@ class Request: crl.perform() crl.close() - return b_obj.getvalue().decode('utf-8', 'ignore') + if return_bytes: + return b_obj.getvalue() + else: + return b_obj.getvalue().decode('utf-8', 'ignore') diff --git a/app/routes.py b/app/routes.py index a2a8603..db66656 100644 --- a/app/routes.py +++ b/app/routes.py @@ -2,7 +2,8 @@ from app import app from app.filter import Filter from app.request import Request, gen_query from bs4 import BeautifulSoup -from flask import g, make_response, request, redirect, render_template +from flask import g, make_response, request, redirect, render_template, send_file +import io import json import os import urllib.parse as urlparse @@ -18,6 +19,11 @@ def before_request_func(): g.user_request = Request(request.headers.get('User-Agent')) +# @app.after_request +# def after_request(response): +# return response + + @app.route('/', methods=['GET']) def index(): bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff' @@ -87,6 +93,21 @@ def imgres(): return redirect(request.args.get('imgurl')) +@app.route('/tmp') +def tmp(): + file_data = g.user_request.send(base_url=request.args.get('image_url'), return_bytes=True) + tmp_mem = io.BytesIO() + tmp_mem.write(file_data) + tmp_mem.seek(0) + + return send_file( + tmp_mem, + as_attachment=True, + attachment_filename='tmp.png', + mimetype='image/png' + ) + + @app.route('/window') def window(): get_body = g.user_request.send(base_url=request.args.get('location'))