whoogle-search/app/utils/misc.py
Ben Busby 9317d9217f
Support proxying results through Whoogle (aka "anonymous view") (#682)
* Expand `/window` endpoint to behave like a proxy

The `/window` endpoint was previously used as a type of proxy, but only
for removing Javascript from the result page. This expands the existing
functionality to allow users to proxy search result pages (with or without
Javascript) through their Whoogle instance.

* Implement filtering of remote content from css

* Condense NoJS feature into Anonymous View

Enabling NoJS now removes Javascript from the Anonymous View, rather
than creating a separate option.

* Exclude 'data:' urls from filter, add translations

The 'data:' url must be allowed in results to view certain elements on
the page, such as stars for review based results.

Add translations for the remaining languages.

* Add cssutils to requirements
2022-04-13 11:29:07 -06:00

62 lines
1.8 KiB
Python

from bs4 import BeautifulSoup as bsoup
from flask import Request
import hashlib
import os
from requests import exceptions, get
from urllib.parse import urlparse
def gen_file_hash(path: str, static_file: str) -> str:
file_contents = open(os.path.join(path, static_file), 'rb').read()
file_hash = hashlib.md5(file_contents).hexdigest()[:8]
filename_split = os.path.splitext(static_file)
return filename_split[0] + '.' + file_hash + filename_split[-1]
def read_config_bool(var: str) -> bool:
val = os.getenv(var, '0')
if val.isdigit():
return bool(int(val))
return False
def get_client_ip(r: Request) -> str:
if r.environ.get('HTTP_X_FORWARDED_FOR') is None:
return r.environ['REMOTE_ADDR']
else:
return r.environ['HTTP_X_FORWARDED_FOR']
def get_request_url(url: str) -> str:
if os.getenv('HTTPS_ONLY', False):
return url.replace('http://', 'https://', 1)
return url
def check_for_update(version_url: str, current: str) -> int:
# Check for the latest version of Whoogle
try:
update = bsoup(get(version_url).text, 'html.parser')
latest = update.select_one('[class="Link--primary"]').string[1:]
current = int(''.join(filter(str.isdigit, current)))
latest = int(''.join(filter(str.isdigit, latest)))
has_update = '' if current >= latest else latest
except (exceptions.ConnectionError, AttributeError):
# Ignore failures, assume current version is up to date
has_update = ''
return has_update
def get_abs_url(url, page_url):
# Creates a valid absolute URL using a partial or relative URL
if url.startswith('//'):
return f'https:{url}'
elif url.startswith('/'):
return f'{urlparse(page_url).netloc}{url}'
elif url.startswith('./'):
return f'{page_url}{url[2:]}'
return url