mirror of
https://github.com/benbusby/whoogle-search
synced 2024-11-18 09:25:33 +00:00
9317d9217f
* Expand `/window` endpoint to behave like a proxy The `/window` endpoint was previously used as a type of proxy, but only for removing Javascript from the result page. This expands the existing functionality to allow users to proxy search result pages (with or without Javascript) through their Whoogle instance. * Implement filtering of remote content from css * Condense NoJS feature into Anonymous View Enabling NoJS now removes Javascript from the Anonymous View, rather than creating a separate option. * Exclude 'data:' urls from filter, add translations The 'data:' url must be allowed in results to view certain elements on the page, such as stars for review based results. Add translations for the remaining languages. * Add cssutils to requirements
62 lines
1.8 KiB
Python
62 lines
1.8 KiB
Python
from bs4 import BeautifulSoup as bsoup
|
|
from flask import Request
|
|
import hashlib
|
|
import os
|
|
from requests import exceptions, get
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
def gen_file_hash(path: str, static_file: str) -> str:
|
|
file_contents = open(os.path.join(path, static_file), 'rb').read()
|
|
file_hash = hashlib.md5(file_contents).hexdigest()[:8]
|
|
filename_split = os.path.splitext(static_file)
|
|
|
|
return filename_split[0] + '.' + file_hash + filename_split[-1]
|
|
|
|
|
|
def read_config_bool(var: str) -> bool:
|
|
val = os.getenv(var, '0')
|
|
if val.isdigit():
|
|
return bool(int(val))
|
|
return False
|
|
|
|
|
|
def get_client_ip(r: Request) -> str:
|
|
if r.environ.get('HTTP_X_FORWARDED_FOR') is None:
|
|
return r.environ['REMOTE_ADDR']
|
|
else:
|
|
return r.environ['HTTP_X_FORWARDED_FOR']
|
|
|
|
|
|
def get_request_url(url: str) -> str:
|
|
if os.getenv('HTTPS_ONLY', False):
|
|
return url.replace('http://', 'https://', 1)
|
|
|
|
return url
|
|
|
|
|
|
def check_for_update(version_url: str, current: str) -> int:
|
|
# Check for the latest version of Whoogle
|
|
try:
|
|
update = bsoup(get(version_url).text, 'html.parser')
|
|
latest = update.select_one('[class="Link--primary"]').string[1:]
|
|
current = int(''.join(filter(str.isdigit, current)))
|
|
latest = int(''.join(filter(str.isdigit, latest)))
|
|
has_update = '' if current >= latest else latest
|
|
except (exceptions.ConnectionError, AttributeError):
|
|
# Ignore failures, assume current version is up to date
|
|
has_update = ''
|
|
|
|
return has_update
|
|
|
|
|
|
def get_abs_url(url, page_url):
|
|
# Creates a valid absolute URL using a partial or relative URL
|
|
if url.startswith('//'):
|
|
return f'https:{url}'
|
|
elif url.startswith('/'):
|
|
return f'{urlparse(page_url).netloc}{url}'
|
|
elif url.startswith('./'):
|
|
return f'{page_url}{url[2:]}'
|
|
return url
|