diff --git a/app/filter.py b/app/filter.py index 6b4e284..8931ffb 100644 --- a/app/filter.py +++ b/app/filter.py @@ -2,11 +2,12 @@ from app.models.config import Config from app.models.endpoint import Endpoint from app.models.g_classes import GClasses from app.request import VALID_PARAMS, MAPS_URL -from app.utils.misc import read_config_bool +from app.utils.misc import get_abs_url, read_config_bool from app.utils.results import * from bs4 import BeautifulSoup from bs4.element import ResultSet, Tag from cryptography.fernet import Fernet +import cssutils from flask import render_template import re import urllib.parse as urlparse @@ -53,17 +54,50 @@ def clean_query(query: str) -> str: return query[:query.find('-site:')] if '-site:' in query else query +def clean_css(css: str, page_url: str) -> str: + """Removes all remote URLs from a CSS string. + + Args: + css: The CSS string + + Returns: + str: The filtered CSS, with URLs proxied through Whoogle + """ + sheet = cssutils.parseString(css) + urls = cssutils.getUrls(sheet) + + for url in urls: + abs_url = get_abs_url(url, page_url) + if abs_url.startswith('data:'): + continue + css = css.replace( + url, + f'/element?type=image/png&url={abs_url}' + ) + + return css + + class Filter: # Limit used for determining if a result is a "regular" result or a list # type result (such as "people also asked", "related searches", etc) RESULT_CHILD_LIMIT = 7 - def __init__(self, user_key: str, config: Config, mobile=False) -> None: + def __init__( + self, + user_key: str, + config: Config, + root_url='', + page_url='', + mobile=False) -> None: self.config = config self.mobile = mobile self.user_key = user_key + self.root_url = root_url + self.page_url = page_url self.main_divs = ResultSet('') self._elements = 0 + self._av = set() def __getitem__(self, name): return getattr(self, name) @@ -89,6 +123,7 @@ class Filter: self.remove_block_titles() self.remove_block_url() self.collapse_sections() + self.update_css(soup) self.update_styling(soup) self.remove_block_tabs(soup) @@ -264,7 +299,7 @@ class Filter: # enabled parent.decompose() - def update_element_src(self, element: Tag, mime: str) -> None: + def update_element_src(self, element: Tag, mime: str, attr='src') -> None: """Encrypts the original src of an element and rewrites the element src to use the "/element?src=" pass-through. @@ -272,10 +307,12 @@ class Filter: None (The soup element is modified directly) """ - src = element['src'] + src = element[attr].split(' ')[0] if src.startswith('//'): src = 'https:' + src + elif src.startswith('data:'): + return if src.startswith(LOGO_URL): # Re-brand with Whoogle logo @@ -287,9 +324,29 @@ class Filter: element['src'] = BLANK_B64 return - element['src'] = f'{Endpoint.element}?url=' + self.encrypt_path( - src, - is_element=True) + '&type=' + urlparse.quote(mime) + element[attr] = f'{self.root_url}/{Endpoint.element}?url=' + ( + self.encrypt_path( + src, + is_element=True + ) + '&type=' + urlparse.quote(mime) + ) + + def update_css(self, soup) -> None: + """Updates URLs used in inline styles to be proxied by Whoogle + using the /element endpoint. + + Returns: + None (The soup element is modified directly) + + """ + # Filter all