Merge pull request #2 from benbusby/main

Implement changes from upstream
pull/1134/head
Thomas Fournier 7 months ago committed by GitHub
commit 2e2f955f05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -15,10 +15,17 @@ RUN pip install --prefix /install --no-warn-script-location --no-cache-dir -r re
FROM python:3.11.0a5-alpine FROM python:3.11.0a5-alpine
RUN apk add --update --no-cache tor curl openrc libstdc++ RUN apk add --update --no-cache tor curl openrc libstdc++
# git go //for obfs4proxy
# libcurl4-openssl-dev # libcurl4-openssl-dev
RUN apk -U upgrade RUN apk -U upgrade
# uncomment to build obfs4proxy
# RUN git clone https://gitlab.com/yawning/obfs4.git
# WORKDIR /obfs4
# RUN go build -o obfs4proxy/obfs4proxy ./obfs4proxy
# RUN cp ./obfs4proxy/obfs4proxy /usr/bin/obfs4proxy
ARG DOCKER_USER=whoogle ARG DOCKER_USER=whoogle
ARG DOCKER_USERID=927 ARG DOCKER_USERID=927
ARG config_dir=/config ARG config_dir=/config

@ -102,7 +102,7 @@ ___
### [Fly.io](https://fly.io) ### [Fly.io](https://fly.io)
You will need a **PAID** [Fly.io](https://fly.io) account to deploy Whoogle. You will need a [Fly.io](https://fly.io) account to deploy Whoogle. The [free allowances](https://fly.io/docs/about/pricing/#free-allowances) are enough for personal use.
#### Install the CLI: https://fly.io/docs/hands-on/installing/ #### Install the CLI: https://fly.io/docs/hands-on/installing/
@ -234,6 +234,7 @@ ExecStart=<python_install_dir>/python3 <whoogle_install_dir>/whoogle-search --ho
ExecStart=<whoogle_repo_dir>/run ExecStart=<whoogle_repo_dir>/run
# For example: # For example:
# ExecStart=/var/www/whoogle-search/run # ExecStart=/var/www/whoogle-search/run
WorkingDirectory=<whoogle_repo_dir>
ExecReload=/bin/kill -HUP $MAINPID ExecReload=/bin/kill -HUP $MAINPID
Restart=always Restart=always
RestartSec=3 RestartSec=3
@ -446,6 +447,7 @@ These environment variables allow setting default config values, but can be over
| WHOOGLE_CONFIG_STYLE | The custom CSS to use for styling (should be single line) | | WHOOGLE_CONFIG_STYLE | The custom CSS to use for styling (should be single line) |
| WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED | Encrypt preferences token, requires preferences key | | WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED | Encrypt preferences token, requires preferences key |
| WHOOGLE_CONFIG_PREFERENCES_KEY | Key to encrypt preferences in URL (REQUIRED to show url) | | WHOOGLE_CONFIG_PREFERENCES_KEY | Key to encrypt preferences in URL (REQUIRED to show url) |
| WHOOGLE_CONFIG_ANON_VIEW | Include the "anonymous view" option for each search result |
## Usage ## Usage
Same as most search engines, with the exception of filtering by time range. Same as most search engines, with the exception of filtering by time range.
@ -654,7 +656,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching
| [https://search.sethforprivacy.com](https://search.sethforprivacy.com) | 🇩🇪 DE | English | | | [https://search.sethforprivacy.com](https://search.sethforprivacy.com) | 🇩🇪 DE | English | |
| [https://whoogle.dcs0.hu](https://whoogle.dcs0.hu) | 🇭🇺 HU | Multi-choice | | | [https://whoogle.dcs0.hu](https://whoogle.dcs0.hu) | 🇭🇺 HU | Multi-choice | |
| [https://gowogle.voring.me](https://gowogle.voring.me) | 🇺🇸 US | Multi-choice | | | [https://gowogle.voring.me](https://gowogle.voring.me) | 🇺🇸 US | Multi-choice | |
| [https://whoogle.privacydev.net](https://whoogle.privacydev.net) | 🇩🇪 DE | English | | | [https://whoogle.privacydev.net](https://whoogle.privacydev.net) | 🇫🇷 FR | English | |
| [https://wg.vern.cc](https://wg.vern.cc) | 🇺🇸 US | English | | | [https://wg.vern.cc](https://wg.vern.cc) | 🇺🇸 US | English | |
| [https://whoogle.hxvy0.gq](https://whoogle.hxvy0.gq) | 🇨🇦 CA | Turkish Only | ✅ | | [https://whoogle.hxvy0.gq](https://whoogle.hxvy0.gq) | 🇨🇦 CA | Turkish Only | ✅ |
| [https://whoogle.hostux.net](https://whoogle.hostux.net) | 🇫🇷 FR | Multi-choice | | | [https://whoogle.hostux.net](https://whoogle.hostux.net) | 🇫🇷 FR | Multi-choice | |
@ -664,6 +666,9 @@ A lot of the app currently piggybacks on Google's existing support for fetching
| [https://search.rubberverse.xyz](https://search.rubberverse.xyz) | 🇵🇱 PL | English | | | [https://search.rubberverse.xyz](https://search.rubberverse.xyz) | 🇵🇱 PL | English | |
| [https://whoogle.ftw.lol](https://whoogle.ftw.lol) | 🇩🇪 DE | Multi-choice | | | [https://whoogle.ftw.lol](https://whoogle.ftw.lol) | 🇩🇪 DE | Multi-choice | |
| [https://whoogle-search--replitcomreside.repl.co](https://whoogle-search--replitcomreside.repl.co) | 🇺🇸 US | English | | | [https://whoogle-search--replitcomreside.repl.co](https://whoogle-search--replitcomreside.repl.co) | 🇺🇸 US | English | |
| [https://search.notrustverify.ch](https://search.notrustverify.ch) | 🇨🇭 CH | Multi-choice | |
| [https://whoogle.datura.network](https://whoogle.datura.network) | 🇩🇪 DE | Multi-choice | |
| [https://whoogle.yepserver.xyz](https://whoogle.yepserver.xyz) | 🇺🇦 UA | Multi-choice | |
* A checkmark in the "Cloudflare" category here refers to the use of the reverse proxy, [Cloudflare](https://cloudflare.com). The checkmark will not be listed for a site which uses Cloudflare DNS but rather the proxying service which grants Cloudflare the ability to monitor traffic to the website. * A checkmark in the "Cloudflare" category here refers to the use of the reverse proxy, [Cloudflare](https://cloudflare.com). The checkmark will not be listed for a site which uses Cloudflare DNS but rather the proxying service which grants Cloudflare the ability to monitor traffic to the website.
@ -675,7 +680,8 @@ A lot of the app currently piggybacks on Google's existing support for fetching
| [http://whoglqjdkgt2an4tdepberwqz3hk7tjo4kqgdnuj77rt7nshw2xqhqad.onion](http://whoglqjdkgt2an4tdepberwqz3hk7tjo4kqgdnuj77rt7nshw2xqhqad.onion) | 🇺🇸 US | Multi-choice | [http://whoglqjdkgt2an4tdepberwqz3hk7tjo4kqgdnuj77rt7nshw2xqhqad.onion](http://whoglqjdkgt2an4tdepberwqz3hk7tjo4kqgdnuj77rt7nshw2xqhqad.onion) | 🇺🇸 US | Multi-choice
| [http://nuifgsnbb2mcyza74o7illtqmuaqbwu4flam3cdmsrnudwcmkqur37qd.onion](http://nuifgsnbb2mcyza74o7illtqmuaqbwu4flam3cdmsrnudwcmkqur37qd.onion) | 🇩🇪 DE | English | [http://nuifgsnbb2mcyza74o7illtqmuaqbwu4flam3cdmsrnudwcmkqur37qd.onion](http://nuifgsnbb2mcyza74o7illtqmuaqbwu4flam3cdmsrnudwcmkqur37qd.onion) | 🇩🇪 DE | English
| [http://whoogle.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion](http://whoogle.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion/) | 🇺🇸 US | English | | [http://whoogle.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion](http://whoogle.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion/) | 🇺🇸 US | English |
| [http://whoogle.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion](http://whoogle.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion/) | 🇩🇪 DE | English | | [http://whoogle.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion](http://whoogle.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion/) | 🇫🇷 FR | English |
| [http://whoogle.daturab6drmkhyeia4ch5gvfc2f3wgo6bhjrv3pz6n7kxmvoznlkq4yd.onion](http://whoogle.daturab6drmkhyeia4ch5gvfc2f3wgo6bhjrv3pz6n7kxmvoznlkq4yd.onion/) | 🇩🇪 DE | Multi-choice | |
#### I2P Instances #### I2P Instances

@ -3,6 +3,7 @@ from bs4 import BeautifulSoup
from bs4.element import ResultSet, Tag from bs4.element import ResultSet, Tag
from cryptography.fernet import Fernet from cryptography.fernet import Fernet
from flask import render_template from flask import render_template
import html
import urllib.parse as urlparse import urllib.parse as urlparse
from urllib.parse import parse_qs from urllib.parse import parse_qs
import re import re
@ -44,7 +45,7 @@ def extract_q(q_str: str, href: str) -> str:
Returns: Returns:
str: The 'q' element of the link, or an empty string str: The 'q' element of the link, or an empty string
""" """
return parse_qs(q_str)['q'][0] if ('&q=' in href or '?q=' in href) else '' return parse_qs(q_str, keep_blank_values=True)['q'][0] if ('&q=' in href or '?q=' in href) else ''
def build_map_url(href: str) -> str: def build_map_url(href: str) -> str:
@ -160,14 +161,22 @@ class Filter:
self.update_styling() self.update_styling()
self.remove_block_tabs() self.remove_block_tabs()
# self.main_divs is only populated for the main page of search results
# (i.e. not images/news/etc).
if self.main_divs:
for div in self.main_divs:
self.sanitize_div(div)
for img in [_ for _ in self.soup.find_all('img') if 'src' in _.attrs]: for img in [_ for _ in self.soup.find_all('img') if 'src' in _.attrs]:
self.update_element_src(img, 'image/png') self.update_element_src(img, 'image/png')
for audio in [_ for _ in self.soup.find_all('audio') if 'src' in _.attrs]: for audio in [_ for _ in self.soup.find_all('audio') if 'src' in _.attrs]:
self.update_element_src(audio, 'audio/mpeg') self.update_element_src(audio, 'audio/mpeg')
audio['controls'] = ''
for link in self.soup.find_all('a', href=True): for link in self.soup.find_all('a', href=True):
self.update_link(link) self.update_link(link)
self.add_favicon(link)
if self.config.alts: if self.config.alts:
self.site_alt_swap() self.site_alt_swap()
@ -201,6 +210,85 @@ class Filter:
self.remove_site_blocks(self.soup) self.remove_site_blocks(self.soup)
return self.soup return self.soup
def sanitize_div(self, div) -> None:
"""Removes escaped script and iframe tags from results
Returns:
None (The soup object is modified directly)
"""
if not div:
return
for d in div.find_all('div', recursive=True):
d_text = d.find(text=True, recursive=False)
# Ensure we're working with tags that contain text content
if not d_text or not d.string:
continue
d.string = html.unescape(d_text)
div_soup = BeautifulSoup(d.string, 'html.parser')
# Remove all valid script or iframe tags in the div
for script in div_soup.find_all('script'):
script.decompose()
for iframe in div_soup.find_all('iframe'):
iframe.decompose()
d.string = str(div_soup)
def add_favicon(self, link) -> None:
"""Adds icons for each returned result, using the result site's favicon
Returns:
None (The soup object is modified directly)
"""
# Skip empty, parentless, or internal links
if not link or not link.parent or not link['href'].startswith('http'):
return
parent = link.parent
is_result_div = False
# Check each parent to make sure that the div doesn't already have a
# favicon attached, and that the div is a result div
while parent:
p_cls = parent.attrs.get('class') or []
if 'has-favicon' in p_cls or GClasses.scroller_class in p_cls:
return
elif GClasses.result_class_a not in p_cls:
parent = parent.parent
else:
is_result_div = True
break
if not is_result_div:
return
# Construct the html for inserting the icon into the parent div
parsed = urlparse.urlparse(link['href'])
favicon = self.encrypt_path(
f'{parsed.scheme}://{parsed.netloc}/favicon.ico',
is_element=True)
src = f'{self.root_url}/{Endpoint.element}?url={favicon}' + \
'&type=image/x-icon'
html = f'<img class="site-favicon" src="{src}">'
favicon = BeautifulSoup(html, 'html.parser')
link.parent.insert(0, favicon)
# Update all parents to indicate that a favicon has been attached
parent = link.parent
while parent:
p_cls = parent.get('class') or []
p_cls.append('has-favicon')
parent['class'] = p_cls
parent = parent.parent
if GClasses.result_class_a in p_cls:
break
def remove_site_blocks(self, soup) -> None: def remove_site_blocks(self, soup) -> None:
if not self.config.block or not soup.body: if not self.config.block or not soup.body:
return return

@ -14,6 +14,7 @@ class GClasses:
footer = 'TuS8Ad' footer = 'TuS8Ad'
result_class_a = 'ZINbbc' result_class_a = 'ZINbbc'
result_class_b = 'luh4td' result_class_b = 'luh4td'
scroller_class = 'idg8be'
result_classes = { result_classes = {
result_class_a: ['Gx5Zad'], result_class_a: ['Gx5Zad'],

@ -307,9 +307,8 @@ class Request:
# view is suppressed correctly # view is suppressed correctly
now = datetime.now() now = datetime.now()
cookies = { cookies = {
'CONSENT': 'YES+cb.{:d}{:02d}{:02d}-17-p0.de+F+678'.format( 'CONSENT': 'PENDING+987',
now.year, now.month, now.day 'SOCS': 'CAESHAgBEhIaAB',
)
} }
# Validate Tor conn and request new identity if the last one failed # Validate Tor conn and request new identity if the last one failed

@ -4,8 +4,10 @@ import io
import json import json
import os import os
import pickle import pickle
import re
import urllib.parse as urlparse import urllib.parse as urlparse
import uuid import uuid
import validators
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import wraps from functools import wraps
@ -15,10 +17,11 @@ from app.models.config import Config
from app.models.endpoint import Endpoint from app.models.endpoint import Endpoint
from app.request import Request, TorError from app.request import Request, TorError
from app.utils.bangs import resolve_bang from app.utils.bangs import resolve_bang
from app.utils.misc import get_proxy_host_url from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \
fetch_favicon
from app.filter import Filter from app.filter import Filter
from app.utils.misc import read_config_bool, get_client_ip, get_request_url, \ from app.utils.misc import read_config_bool, get_client_ip, get_request_url, \
check_for_update check_for_update, encrypt_string
from app.utils.widgets import * from app.utils.widgets import *
from app.utils.results import bold_search_terms,\ from app.utils.results import bold_search_terms,\
add_currency_card, check_currency, get_tabs_content add_currency_card, check_currency, get_tabs_content
@ -31,6 +34,7 @@ from requests import exceptions
from requests.models import PreparedRequest from requests.models import PreparedRequest
from cryptography.fernet import Fernet, InvalidToken from cryptography.fernet import Fernet, InvalidToken
from cryptography.exceptions import InvalidSignature from cryptography.exceptions import InvalidSignature
from werkzeug.datastructures import MultiDict
# Load DDG bang json files only on init # Load DDG bang json files only on init
bang_json = json.load(open(app.config['BANG_FILE'])) or {} bang_json = json.load(open(app.config['BANG_FILE'])) or {}
@ -181,6 +185,7 @@ def before_request_func():
def after_request_func(resp): def after_request_func(resp):
resp.headers['X-Content-Type-Options'] = 'nosniff' resp.headers['X-Content-Type-Options'] = 'nosniff'
resp.headers['X-Frame-Options'] = 'DENY' resp.headers['X-Frame-Options'] = 'DENY'
resp.headers['Cache-Control'] = 'max-age=86400'
if os.getenv('WHOOGLE_CSP', False): if os.getenv('WHOOGLE_CSP', False):
resp.headers['Content-Security-Policy'] = app.config['CSP'] resp.headers['Content-Security-Policy'] = app.config['CSP']
@ -298,6 +303,13 @@ def autocomplete():
@session_required @session_required
@auth_required @auth_required
def search(): def search():
if request.method == 'POST':
# Redirect as a GET request with an encrypted query
post_data = MultiDict(request.form)
post_data['q'] = encrypt_string(g.session_key, post_data['q'])
get_req_str = urlparse.urlencode(post_data)
return redirect(url_for('.search') + '?' + get_req_str)
search_util = Search(request, g.user_config, g.session_key) search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query() query = search_util.new_search_query()
@ -420,13 +432,18 @@ def config():
config_disabled = ( config_disabled = (
app.config['CONFIG_DISABLE'] or app.config['CONFIG_DISABLE'] or
not valid_user_session(session)) not valid_user_session(session))
name = ''
if 'name' in request.args:
name = os.path.normpath(request.args.get('name'))
if not re.match(r'^[A-Za-z0-9_.+-]+$', name):
return make_response('Invalid config name', 400)
if request.method == 'GET': if request.method == 'GET':
return json.dumps(g.user_config.__dict__) return json.dumps(g.user_config.__dict__)
elif request.method == 'PUT' and not config_disabled: elif request.method == 'PUT' and not config_disabled:
if 'name' in request.args: if name:
config_pkl = os.path.join( config_pkl = os.path.join(app.config['CONFIG_PATH'], name)
app.config['CONFIG_PATH'],
request.args.get('name'))
session['config'] = (pickle.load(open(config_pkl, 'rb')) session['config'] = (pickle.load(open(config_pkl, 'rb'))
if os.path.exists(config_pkl) if os.path.exists(config_pkl)
else session['config']) else session['config'])
@ -444,7 +461,7 @@ def config():
config_data, config_data,
open(os.path.join( open(os.path.join(
app.config['CONFIG_PATH'], app.config['CONFIG_PATH'],
request.args.get('name')), 'wb')) name), 'wb'))
session['config'] = config_data session['config'] = config_data
return redirect(config_data['url']) return redirect(config_data['url'])
@ -475,8 +492,23 @@ def element():
src_type = request.args.get('type') src_type = request.args.get('type')
# Ensure requested element is from a valid domain
domain = urlparse.urlparse(src_url).netloc
if not validators.domain(domain):
return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
try: try:
file_data = g.user_request.send(base_url=src_url).content response = g.user_request.send(base_url=src_url)
# Display an empty gif if the requested element couldn't be retrieved
if response.status_code != 200 or len(response.content) == 0:
if 'favicon' in src_url:
favicon = fetch_favicon(src_url)
return send_file(io.BytesIO(favicon), mimetype='image/png')
else:
return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
file_data = response.content
tmp_mem = io.BytesIO() tmp_mem = io.BytesIO()
tmp_mem.write(file_data) tmp_mem.write(file_data)
tmp_mem.seek(0) tmp_mem.seek(0)
@ -485,8 +517,6 @@ def element():
except exceptions.RequestException: except exceptions.RequestException:
pass pass
empty_gif = base64.b64decode(
'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
return send_file(io.BytesIO(empty_gif), mimetype='image/gif') return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
@ -504,6 +534,13 @@ def window():
root_url=request.url_root, root_url=request.url_root,
config=g.user_config) config=g.user_config)
target = urlparse.urlparse(target_url) target = urlparse.urlparse(target_url)
# Ensure requested URL has a valid domain
if not validators.domain(target.netloc):
return render_template(
'error.html',
error_message='Invalid location'), 400
host_url = f'{target.scheme}://{target.netloc}' host_url = f'{target.scheme}://{target.netloc}'
get_body = g.user_request.send(base_url=target_url).text get_body = g.user_request.send(base_url=target_url).text

@ -65,6 +65,26 @@ details summary span {
text-align: center; text-align: center;
} }
.site-favicon {
float: left;
width: 25px;
padding-right: 5px;
}
.sCuL3 {
padding-left: 30px;
}
#flex_text_audio_icon_chunk {
display: none;
}
audio {
display: block;
margin-right: auto;
padding-bottom: 5px;
}
@media (min-width: 801px) { @media (min-width: 801px) {
body { body {
min-width: 736px !important; min-width: 736px !important;

@ -13,7 +13,7 @@
}, },
"maps": { "maps": {
"tbm": null, "tbm": null,
"href": "https://maps.google.com/maps?q={query}", "href": "https://maps.google.com/maps?q={map_query}",
"name": "Maps", "name": "Maps",
"selected": false "selected": false
}, },

@ -1,11 +1,52 @@
import base64
from bs4 import BeautifulSoup as bsoup from bs4 import BeautifulSoup as bsoup
from cryptography.fernet import Fernet
from flask import Request from flask import Request
import hashlib import hashlib
import io
import os import os
import re import re
from requests import exceptions, get from requests import exceptions, get
from urllib.parse import urlparse from urllib.parse import urlparse
ddg_favicon_site = 'http://icons.duckduckgo.com/ip2'
empty_gif = base64.b64decode(
'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
placeholder_img = base64.b64decode(
'iVBORw0KGgoAAAANSUhEUgAAABkAAAAZCAYAAADE6YVjAAABF0lEQVRIS8XWPw9EMBQA8Eok' \
'JBKrMFqMBt//GzAYLTZ/VomExPDu6uLiaPteqVynBn0/75W2Vp7nEIYhe6p1XcespmmAd7Is' \
'M+4URcGiKPogvMMvmIS2eN9MOMKbKWgf54SYgI4vKkTuQKJKSJErkKzUSkQHUs0lilAg7GMh' \
'ISoIA/hYMiKCKIA2soeowCWEMkfHtUmrXLcyGYYBfN9HF8djiaglWzNZlgVs21YisoAUaEXG' \
'cQTP86QIFgi7vyLzPIPjOEIEC7ANQv/4aZrAdd0TUtc1i+MYnSsMWjPp+x6CIPgJVlUVS5KE' \
'DKig/+wnVzM4pnzaGeHd+ENlWbI0TbVLJBtw2uMfP63wc9d2kDCWxi5Q27bsBerSJ9afJbeL' \
'AAAAAElFTkSuQmCC'
)
def fetch_favicon(url: str) -> bytes:
"""Fetches a favicon using DuckDuckGo's favicon retriever
Args:
url: The url to fetch the favicon from
Returns:
bytes - the favicon bytes, or a placeholder image if one
was not returned
"""
domain = urlparse(url).netloc
response = get(f'{ddg_favicon_site}/{domain}.ico')
if response.status_code == 200 and len(response.content) > 0:
tmp_mem = io.BytesIO()
tmp_mem.write(response.content)
tmp_mem.seek(0)
return tmp_mem.read()
else:
return placeholder_img
def gen_file_hash(path: str, static_file: str) -> str: def gen_file_hash(path: str, static_file: str) -> str:
file_contents = open(os.path.join(path, static_file), 'rb').read() file_contents = open(os.path.join(path, static_file), 'rb').read()
@ -86,3 +127,13 @@ def list_to_dict(lst: list) -> dict:
return {} return {}
return {lst[i].replace(' ', ''): lst[i+1].replace(' ', '') return {lst[i].replace(' ', ''): lst[i+1].replace(' ', '')
for i in range(0, len(lst), 2)} for i in range(0, len(lst), 2)}
def encrypt_string(key: bytes, string: str) -> str:
cipher_suite = Fernet(key)
return cipher_suite.encrypt(string.encode()).decode()
def decrypt_string(key: bytes, string: str) -> str:
cipher_suite = Fernet(g.session_key)
return cipher_suite.decrypt(string.encode()).decode()

@ -418,6 +418,10 @@ def get_tabs_content(tabs: dict,
Returns: Returns:
dict: contains the name, the href and if the tab is selected or not dict: contains the name, the href and if the tab is selected or not
""" """
map_query = full_query
if '-site:' in full_query:
block_idx = full_query.index('-site:')
map_query = map_query[:block_idx]
tabs = copy.deepcopy(tabs) tabs = copy.deepcopy(tabs)
for tab_id, tab_content in tabs.items(): for tab_id, tab_content in tabs.items():
# update name to desired language # update name to desired language
@ -433,7 +437,9 @@ def get_tabs_content(tabs: dict,
if preferences: if preferences:
query = f"{query}&preferences={preferences}" query = f"{query}&preferences={preferences}"
tab_content['href'] = tab_content['href'].format(query=query) tab_content['href'] = tab_content['href'].format(
query=query,
map_query=map_query)
# update if selected tab (default all tab is selected) # update if selected tab (default all tab is selected)
if tab_content['tbm'] == search_type: if tab_content['tbm'] == search_type:

@ -109,7 +109,9 @@ class Search:
self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" + self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" +
"($|( *[^a-z0-9] *(((addres|address|adres|" + "($|( *[^a-z0-9] *(((addres|address|adres|" +
"adress)|a)? *$)))", self.query.lower()) else self.widget "adress)|a)? *$)))", self.query.lower()) else self.widget
self.widget = 'calculator' if re.search("calculator|calc|calclator|math", self.query.lower()) else self.widget self.widget = 'calculator' if re.search(
r"\bcalculator\b|\bcalc\b|\bcalclator\b|\bmath\b",
self.query.lower()) else self.widget
return self.query return self.query
def generate_response(self) -> str: def generate_response(self) -> str:

@ -1,5 +1,10 @@
from pathlib import Path
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# root
BASE_DIR = Path(__file__).parent.parent.parent
def add_ip_card(html_soup: BeautifulSoup, ip: str) -> BeautifulSoup: def add_ip_card(html_soup: BeautifulSoup, ip: str) -> BeautifulSoup:
"""Adds the client's IP address to the search results """Adds the client's IP address to the search results
if query contains keywords if query contains keywords
@ -48,7 +53,8 @@ def add_calculator_card(html_soup: BeautifulSoup) -> BeautifulSoup:
""" """
main_div = html_soup.select_one('#main') main_div = html_soup.select_one('#main')
if main_div: if main_div:
widget_file = open('app/static/widgets/calculator.html') # absolute path
widget_file = open(BASE_DIR / 'app/static/widgets/calculator.html', encoding="utf8")
widget_tag = html_soup.new_tag('div') widget_tag = html_soup.new_tag('div')
widget_tag['class'] = 'ZINbbc xpd O9g5cc uUPGi' widget_tag['class'] = 'ZINbbc xpd O9g5cc uUPGi'
widget_tag['id'] = 'calculator-wrapper' widget_tag['id'] = 'calculator-wrapper'
@ -56,7 +62,7 @@ def add_calculator_card(html_soup: BeautifulSoup) -> BeautifulSoup:
calculator_text['class'] = 'kCrYT ip-address-div' calculator_text['class'] = 'kCrYT ip-address-div'
calculator_text.string = 'Calculator' calculator_text.string = 'Calculator'
calculator_widget = html_soup.new_tag('div') calculator_widget = html_soup.new_tag('div')
calculator_widget.append(BeautifulSoup(widget_file, 'html.parser')); calculator_widget.append(BeautifulSoup(widget_file, 'html.parser'))
calculator_widget['class'] = 'kCrYT ip-text-div' calculator_widget['class'] = 'kCrYT ip-text-div'
widget_tag.append(calculator_text) widget_tag.append(calculator_text)
widget_tag.append(calculator_widget) widget_tag.append(calculator_widget)

@ -4,4 +4,4 @@ optional_dev_tag = ''
if os.getenv('DEV_BUILD'): if os.getenv('DEV_BUILD'):
optional_dev_tag = '.dev' + os.getenv('DEV_BUILD') optional_dev_tag = '.dev' + os.getenv('DEV_BUILD')
__version__ = '0.8.2' + optional_dev_tag __version__ = '0.8.3' + optional_dev_tag

@ -3,7 +3,7 @@ name: whoogle
description: A self hosted search engine on Kubernetes description: A self hosted search engine on Kubernetes
type: application type: application
version: 0.1.0 version: 0.1.0
appVersion: 0.8.2 appVersion: 0.8.3
icon: https://github.com/benbusby/whoogle-search/raw/main/app/static/img/favicon/favicon-96x96.png icon: https://github.com/benbusby/whoogle-search/raw/main/app/static/img/favicon/favicon-96x96.png

@ -52,10 +52,20 @@ spec:
httpGet: httpGet:
path: / path: /
port: http port: http
{{- if and .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS }}
httpHeaders:
- name: Authorization
value: Basic {{ b64enc (printf "%s:%s" .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS) }}
{{- end }}
readinessProbe: readinessProbe:
httpGet: httpGet:
path: / path: /
port: http port: http
{{- if and .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS }}
httpHeaders:
- name: Authorization
value: Basic {{ b64enc (printf "%s:%s" .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS) }}
{{- end }}
resources: resources:
{{- toYaml .Values.resources | nindent 12 }} {{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.nodeSelector }} {{- with .Values.nodeSelector }}

@ -18,3 +18,6 @@ https://whoogle.no-logs.com
https://search.rubberverse.xyz https://search.rubberverse.xyz
https://whoogle.ftw.lol https://whoogle.ftw.lol
https://whoogle-search--replitcomreside.repl.co https://whoogle-search--replitcomreside.repl.co
https://search.notrustverify.ch
https://whoogle.datura.network
https://whoogle.yepserver.xyz

@ -7,3 +7,6 @@ ExtORPortCookieAuthFileGroupReadable 1
CacheDirectoryGroupReadable 1 CacheDirectoryGroupReadable 1
CookieAuthFile /var/lib/tor/control_auth_cookie CookieAuthFile /var/lib/tor/control_auth_cookie
Log debug-notice file /dev/null Log debug-notice file /dev/null
# UseBridges 1
# ClientTransportPlugin obfs4 exec /usr/bin/obfs4proxy
# Bridge obfs4 ip and so on

@ -2,12 +2,12 @@ attrs==22.2.0
beautifulsoup4==4.11.2 beautifulsoup4==4.11.2
brotli==1.0.9 brotli==1.0.9
cachelib==0.10.2 cachelib==0.10.2
certifi==2022.12.7 certifi==2023.7.22
cffi==1.15.1 cffi==1.15.1
chardet==5.1.0 chardet==5.1.0
click==8.1.3 click==8.1.3
cryptography==3.3.2; platform_machine == 'armv7l' cryptography==3.3.2; platform_machine == 'armv7l'
cryptography==41.0.0; platform_machine != 'armv7l' cryptography==41.0.4; platform_machine != 'armv7l'
cssutils==2.6.0 cssutils==2.6.0
defusedxml==0.7.1 defusedxml==0.7.1
Flask==2.3.2 Flask==2.3.2
@ -29,7 +29,8 @@ python-dateutil==2.8.2
requests==2.31.0 requests==2.31.0
soupsieve==2.4 soupsieve==2.4
stem==1.8.1 stem==1.8.1
urllib3==1.26.14 urllib3==1.26.17
validators==0.22.0
waitress==2.1.2 waitress==2.1.2
wcwidth==0.2.6 wcwidth==0.2.6
Werkzeug==2.3.3 Werkzeug==2.3.3

1
run

@ -29,6 +29,7 @@ else
python3 -um app \ python3 -um app \
--unix-socket "$UNIX_SOCKET" --unix-socket "$UNIX_SOCKET"
else else
echo "Running on http://${ADDRESS:-0.0.0.0}:${PORT:-"${EXPOSE_PORT:-5000}"}"
python3 -um app \ python3 -um app \
--host "${ADDRESS:-0.0.0.0}" \ --host "${ADDRESS:-0.0.0.0}" \
--port "${PORT:-"${EXPOSE_PORT:-5000}"}" --port "${PORT:-"${EXPOSE_PORT:-5000}"}"

@ -44,17 +44,11 @@ def test_get_results(client):
def test_post_results(client): def test_post_results(client):
rv = client.post(f'/{Endpoint.search}', data=dict(q='test')) rv = client.post(f'/{Endpoint.search}', data=dict(q='test'))
assert rv._status_code == 200 assert rv._status_code == 302
# Depending on the search, there can be more
# than 10 result divs
results = get_search_results(rv.data)
assert len(results) >= 10
assert len(results) <= 15
def test_translate_search(client): def test_translate_search(client):
rv = client.post(f'/{Endpoint.search}', data=dict(q='translate hola')) rv = client.get(f'/{Endpoint.search}?q=translate hola')
assert rv._status_code == 200 assert rv._status_code == 200
# Pretty weak test, but better than nothing # Pretty weak test, but better than nothing
@ -64,7 +58,7 @@ def test_translate_search(client):
def test_block_results(client): def test_block_results(client):
rv = client.post(f'/{Endpoint.search}', data=dict(q='pinterest')) rv = client.get(f'/{Endpoint.search}?q=pinterest')
assert rv._status_code == 200 assert rv._status_code == 200
has_pinterest = False has_pinterest = False
@ -79,7 +73,7 @@ def test_block_results(client):
rv = client.post(f'/{Endpoint.config}', data=demo_config) rv = client.post(f'/{Endpoint.config}', data=demo_config)
assert rv._status_code == 302 assert rv._status_code == 302
rv = client.post(f'/{Endpoint.search}', data=dict(q='pinterest')) rv = client.get(f'/{Endpoint.search}?q=pinterest')
assert rv._status_code == 200 assert rv._status_code == 200
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True): for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
@ -90,7 +84,7 @@ def test_block_results(client):
def test_view_my_ip(client): def test_view_my_ip(client):
rv = client.post(f'/{Endpoint.search}', data=dict(q='my ip address')) rv = client.get(f'/{Endpoint.search}?q=my ip address')
assert rv._status_code == 200 assert rv._status_code == 200
# Pretty weak test, but better than nothing # Pretty weak test, but better than nothing
@ -107,7 +101,7 @@ def test_recent_results(client):
} }
for time, num_days in times.items(): for time, num_days in times.items():
rv = client.post(f'/{Endpoint.search}', data=dict(q='test :' + time)) rv = client.get(f'/{Endpoint.search}?q=test :' + time)
result_divs = get_search_results(rv.data) result_divs = get_search_results(rv.data)
current_date = datetime.now() current_date = datetime.now()

Loading…
Cancel
Save