2023-10-11 21:41:48 +00:00
|
|
|
import base64
|
2022-02-14 19:19:02 +00:00
|
|
|
from bs4 import BeautifulSoup as bsoup
|
2023-10-16 22:28:36 +00:00
|
|
|
from cryptography.fernet import Fernet
|
2021-10-29 03:31:24 +00:00
|
|
|
from flask import Request
|
2021-06-30 23:00:01 +00:00
|
|
|
import hashlib
|
2023-10-11 23:26:12 +00:00
|
|
|
import io
|
2021-06-30 23:00:01 +00:00
|
|
|
import os
|
2023-01-30 19:13:46 +00:00
|
|
|
import re
|
2022-02-14 19:19:02 +00:00
|
|
|
from requests import exceptions, get
|
2022-04-13 17:29:07 +00:00
|
|
|
from urllib.parse import urlparse
|
2021-06-30 23:00:01 +00:00
|
|
|
|
2023-10-11 23:26:12 +00:00
|
|
|
ddg_favicon_site = 'http://icons.duckduckgo.com/ip2'
|
|
|
|
|
2023-10-11 21:41:48 +00:00
|
|
|
empty_gif = base64.b64decode(
|
|
|
|
'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
|
|
|
|
|
|
|
|
placeholder_img = base64.b64decode(
|
|
|
|
'iVBORw0KGgoAAAANSUhEUgAAABkAAAAZCAYAAADE6YVjAAABF0lEQVRIS8XWPw9EMBQA8Eok' \
|
|
|
|
'JBKrMFqMBt//GzAYLTZ/VomExPDu6uLiaPteqVynBn0/75W2Vp7nEIYhe6p1XcespmmAd7Is' \
|
|
|
|
'M+4URcGiKPogvMMvmIS2eN9MOMKbKWgf54SYgI4vKkTuQKJKSJErkKzUSkQHUs0lilAg7GMh' \
|
|
|
|
'ISoIA/hYMiKCKIA2soeowCWEMkfHtUmrXLcyGYYBfN9HF8djiaglWzNZlgVs21YisoAUaEXG' \
|
|
|
|
'cQTP86QIFgi7vyLzPIPjOEIEC7ANQv/4aZrAdd0TUtc1i+MYnSsMWjPp+x6CIPgJVlUVS5KE' \
|
|
|
|
'DKig/+wnVzM4pnzaGeHd+ENlWbI0TbVLJBtw2uMfP63wc9d2kDCWxi5Q27bsBerSJ9afJbeL' \
|
|
|
|
'AAAAAElFTkSuQmCC'
|
|
|
|
)
|
|
|
|
|
2021-06-30 23:00:01 +00:00
|
|
|
|
2023-10-11 23:26:12 +00:00
|
|
|
def fetch_favicon(url: str) -> bytes:
|
|
|
|
"""Fetches a favicon using DuckDuckGo's favicon retriever
|
|
|
|
|
|
|
|
Args:
|
|
|
|
url: The url to fetch the favicon from
|
|
|
|
Returns:
|
|
|
|
bytes - the favicon bytes, or a placeholder image if one
|
|
|
|
was not returned
|
|
|
|
"""
|
|
|
|
domain = urlparse(url).netloc
|
|
|
|
|
|
|
|
response = get(f'{ddg_favicon_site}/{domain}.ico')
|
|
|
|
|
|
|
|
if response.status_code == 200 and len(response.content) > 0:
|
|
|
|
tmp_mem = io.BytesIO()
|
|
|
|
tmp_mem.write(response.content)
|
|
|
|
tmp_mem.seek(0)
|
|
|
|
|
|
|
|
return tmp_mem.read()
|
|
|
|
else:
|
|
|
|
return placeholder_img
|
|
|
|
|
|
|
|
|
2021-06-30 23:00:01 +00:00
|
|
|
def gen_file_hash(path: str, static_file: str) -> str:
|
|
|
|
file_contents = open(os.path.join(path, static_file), 'rb').read()
|
|
|
|
file_hash = hashlib.md5(file_contents).hexdigest()[:8]
|
|
|
|
filename_split = os.path.splitext(static_file)
|
|
|
|
|
|
|
|
return filename_split[0] + '.' + file_hash + filename_split[-1]
|
2021-10-15 00:58:13 +00:00
|
|
|
|
|
|
|
|
2023-12-20 18:27:20 +00:00
|
|
|
def read_config_bool(var: str, default: bool=False) -> bool:
|
|
|
|
val = os.getenv(var, '1' if default else '0')
|
2022-09-07 18:54:43 +00:00
|
|
|
# user can specify one of the following values as 'true' inputs (all
|
|
|
|
# variants with upper case letters will also work):
|
|
|
|
# ('true', 't', '1', 'yes', 'y')
|
|
|
|
val = val.lower() in ('true', 't', '1', 'yes', 'y')
|
|
|
|
return val
|
2021-10-29 03:31:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_client_ip(r: Request) -> str:
|
|
|
|
if r.environ.get('HTTP_X_FORWARDED_FOR') is None:
|
|
|
|
return r.environ['REMOTE_ADDR']
|
|
|
|
else:
|
|
|
|
return r.environ['HTTP_X_FORWARDED_FOR']
|
2021-11-22 06:21:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_request_url(url: str) -> str:
|
|
|
|
if os.getenv('HTTPS_ONLY', False):
|
|
|
|
return url.replace('http://', 'https://', 1)
|
|
|
|
|
|
|
|
return url
|
2022-02-14 19:19:02 +00:00
|
|
|
|
|
|
|
|
2022-08-02 16:55:45 +00:00
|
|
|
def get_proxy_host_url(r: Request, default: str, root=False) -> str:
|
|
|
|
scheme = r.headers.get('X-Forwarded-Proto', 'https')
|
2022-07-05 16:01:47 +00:00
|
|
|
http_host = r.headers.get('X-Forwarded-Host')
|
2023-01-30 19:13:46 +00:00
|
|
|
|
|
|
|
full_path = r.full_path if not root else ''
|
|
|
|
if full_path.startswith('/'):
|
|
|
|
full_path = f'/{full_path}'
|
|
|
|
|
2022-07-05 16:01:47 +00:00
|
|
|
if http_host:
|
2023-01-30 19:13:46 +00:00
|
|
|
prefix = os.environ.get('WHOOGLE_URL_PREFIX', '')
|
|
|
|
if prefix:
|
|
|
|
prefix = f'/{re.sub("[^0-9a-zA-Z]+", "", prefix)}'
|
|
|
|
return f'{scheme}://{http_host}{prefix}{full_path}'
|
2022-07-05 16:01:47 +00:00
|
|
|
|
|
|
|
return default
|
|
|
|
|
|
|
|
|
2022-02-14 19:19:02 +00:00
|
|
|
def check_for_update(version_url: str, current: str) -> int:
|
|
|
|
# Check for the latest version of Whoogle
|
|
|
|
try:
|
|
|
|
update = bsoup(get(version_url).text, 'html.parser')
|
|
|
|
latest = update.select_one('[class="Link--primary"]').string[1:]
|
|
|
|
current = int(''.join(filter(str.isdigit, current)))
|
|
|
|
latest = int(''.join(filter(str.isdigit, latest)))
|
|
|
|
has_update = '' if current >= latest else latest
|
|
|
|
except (exceptions.ConnectionError, AttributeError):
|
|
|
|
# Ignore failures, assume current version is up to date
|
|
|
|
has_update = ''
|
|
|
|
|
|
|
|
return has_update
|
2022-04-13 17:29:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_abs_url(url, page_url):
|
|
|
|
# Creates a valid absolute URL using a partial or relative URL
|
|
|
|
if url.startswith('//'):
|
|
|
|
return f'https:{url}'
|
|
|
|
elif url.startswith('/'):
|
|
|
|
return f'{urlparse(page_url).netloc}{url}'
|
|
|
|
elif url.startswith('./'):
|
|
|
|
return f'{page_url}{url[2:]}'
|
|
|
|
return url
|
2023-05-19 18:15:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
def list_to_dict(lst: list) -> dict:
|
|
|
|
if len(lst) < 2:
|
|
|
|
return {}
|
|
|
|
return {lst[i].replace(' ', ''): lst[i+1].replace(' ', '')
|
|
|
|
for i in range(0, len(lst), 2)}
|
2023-10-16 22:28:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
def encrypt_string(key: bytes, string: str) -> str:
|
|
|
|
cipher_suite = Fernet(key)
|
|
|
|
return cipher_suite.encrypt(string.encode()).decode()
|
|
|
|
|
|
|
|
|
|
|
|
def decrypt_string(key: bytes, string: str) -> str:
|
|
|
|
cipher_suite = Fernet(g.session_key)
|
|
|
|
return cipher_suite.decrypt(string.encode()).decode()
|