Compare commits

...

5 Commits

Author SHA1 Message Date
Ben Busby 9bfdd88a5e
Fix input cursor position bug on mobile
Refactors how search suggestions are added to the view

Fixes #1133
1 month ago
Ben Busby 37ff61dfac
Fall back to random secret key on permission exception
Fixes #1136
1 month ago
David Shen 80e41e6b44
Attempt to query on error condition and print trace (#1131) 1 month ago
David Shen f18bf07ac3
Fix feeling lucky (#1130)
* Fix feeling lucky, fall through to display results if doesn't work

* Allow lucky bang anywhere

* Update feeling lucky test
1 month ago
David Shen fd20135af0
Add support for custom bangs (#1132)
Add the possibility for user-defined bangs, stored in app/static/bangs. 

These are parsed in alphabetical order, with the DDG bangs parsed first.
1 month ago

4
.gitignore vendored

@ -1,4 +1,5 @@
venv/ venv/
.venv/
.idea/ .idea/
__pycache__/ __pycache__/
*.pyc *.pyc
@ -10,7 +11,8 @@ test/static
flask_session/ flask_session/
app/static/config app/static/config
app/static/custom_config app/static/custom_config
app/static/bangs app/static/bangs/*
!app/static/bangs/00-whoogle.json
# pip stuff # pip stuff
/build/ /build/

@ -35,6 +35,7 @@ Contents
6. [Extra Steps](#extra-steps) 6. [Extra Steps](#extra-steps)
1. [Set Primary Search Engine](#set-whoogle-as-your-primary-search-engine) 1. [Set Primary Search Engine](#set-whoogle-as-your-primary-search-engine)
2. [Custom Redirecting](#custom-redirecting) 2. [Custom Redirecting](#custom-redirecting)
2. [Custom Bangs](#custom-bangs)
3. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only) 3. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only)
4. [Manual HTTPS Enforcement](#https-enforcement) 4. [Manual HTTPS Enforcement](#https-enforcement)
5. [Using with Firefox Containers](#using-with-firefox-containers) 5. [Using with Firefox Containers](#using-with-firefox-containers)
@ -61,6 +62,7 @@ Contents
- Randomly generated User Agent - Randomly generated User Agent
- Easy to install/deploy - Easy to install/deploy
- DDG-style bang (i.e. `!<tag> <query>`) searches - DDG-style bang (i.e. `!<tag> <query>`) searches
- User-defined [custom bangs](#custom-bangs)
- Optional location-based searching (i.e. results near \<city\>) - Optional location-based searching (i.e. results near \<city\>)
- Optional NoJS mode to view search results in a separate window with JavaScript blocked - Optional NoJS mode to view search results in a separate window with JavaScript blocked
@ -539,6 +541,14 @@ WHOOGLE_REDIRECTS="badA.com:goodA.com,badB.com:goodB.com"
NOTE: Do not include "http(s)://" when defining your redirect. NOTE: Do not include "http(s)://" when defining your redirect.
### Custom Bangs
You can create your own custom bangs. By default, bangs are stored in
`app/static/bangs`. See [`00-whoogle.json`](https://github.com/benbusby/whoogle-search/blob/main/app/static/bangs/00-whoogle.json)
for an example. These are parsed in alphabetical order with later files
overriding bangs set in earlier files, with the exception that DDG bangs
(downloaded to `app/static/bangs/bangs.json`) are always parsed first. Thus,
any custom bangs will always override the DDG ones.
### Prevent Downtime (Heroku only) ### Prevent Downtime (Heroku only)
Part of the deal with Heroku's free tier is that you're allocated 550 hours/month (meaning it can't stay active 24/7), and the app is temporarily shut down after 30 minutes of inactivity. Once it becomes inactive, any Whoogle searches will still work, but it'll take an extra 10-15 seconds for the app to come back online before displaying the result, which can be frustrating if you're in a hurry. Part of the deal with Heroku's free tier is that you're allocated 550 hours/month (meaning it can't stay active 24/7), and the app is temporarily shut down after 30 minutes of inactivity. Once it becomes inactive, any Whoogle searches will still work, but it'll take an extra 10-15 seconds for the app to come back online before displaying the result, which can be frustrating if you're in a hurry.

@ -1,7 +1,7 @@
from app.filter import clean_query from app.filter import clean_query
from app.request import send_tor_signal from app.request import send_tor_signal
from app.utils.session import generate_key from app.utils.session import generate_key
from app.utils.bangs import gen_bangs_json from app.utils.bangs import gen_bangs_json, load_all_bangs
from app.utils.misc import gen_file_hash, read_config_bool from app.utils.misc import gen_file_hash, read_config_bool
from base64 import b64encode from base64 import b64encode
from bs4 import MarkupResemblesLocatorWarning from bs4 import MarkupResemblesLocatorWarning
@ -101,7 +101,10 @@ if not os.path.exists(app.config['BUILD_FOLDER']):
# Session values # Session values
app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key') app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key')
if os.path.exists(app_key_path): if os.path.exists(app_key_path):
app.config['SECRET_KEY'] = open(app_key_path, 'r').read() try:
app.config['SECRET_KEY'] = open(app_key_path, 'r').read()
except PermissionError:
app.config['SECRET_KEY'] = str(b64encode(os.urandom(32)))
else: else:
app.config['SECRET_KEY'] = str(b64encode(os.urandom(32))) app.config['SECRET_KEY'] = str(b64encode(os.urandom(32)))
with open(app_key_path, 'w') as key_file: with open(app_key_path, 'w') as key_file:
@ -139,7 +142,9 @@ app.config['CSP'] = 'default-src \'none\';' \
'connect-src \'self\';' 'connect-src \'self\';'
# Generate DDG bang filter # Generate DDG bang filter
generating_bangs = False
if not os.path.exists(app.config['BANG_FILE']): if not os.path.exists(app.config['BANG_FILE']):
generating_bangs = True
json.dump({}, open(app.config['BANG_FILE'], 'w')) json.dump({}, open(app.config['BANG_FILE'], 'w'))
bangs_thread = threading.Thread( bangs_thread = threading.Thread(
target=gen_bangs_json, target=gen_bangs_json,
@ -181,6 +186,11 @@ warnings.simplefilter('ignore', MarkupResemblesLocatorWarning)
from app import routes # noqa from app import routes # noqa
# The gen_bangs_json function takes care of loading bangs, so skip it here if
# it's already being loaded
if not generating_bangs:
load_all_bangs(app.config['BANG_FILE'])
# Disable logging from imported modules # Disable logging from imported modules
logging.config.dictConfig({ logging.config.dictConfig({
'version': 1, 'version': 1,

@ -8,6 +8,8 @@ import re
import urllib.parse as urlparse import urllib.parse as urlparse
import uuid import uuid
import validators import validators
import sys
import traceback
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import wraps from functools import wraps
@ -16,7 +18,7 @@ from app import app
from app.models.config import Config from app.models.config import Config
from app.models.endpoint import Endpoint from app.models.endpoint import Endpoint
from app.request import Request, TorError from app.request import Request, TorError
from app.utils.bangs import resolve_bang from app.utils.bangs import suggest_bang, resolve_bang
from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \ from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \
fetch_favicon fetch_favicon
from app.filter import Filter from app.filter import Filter
@ -36,9 +38,6 @@ from cryptography.fernet import Fernet, InvalidToken
from cryptography.exceptions import InvalidSignature from cryptography.exceptions import InvalidSignature
from werkzeug.datastructures import MultiDict from werkzeug.datastructures import MultiDict
# Load DDG bang json files only on init
bang_json = json.load(open(app.config['BANG_FILE'])) or {}
ac_var = 'WHOOGLE_AUTOCOMPLETE' ac_var = 'WHOOGLE_AUTOCOMPLETE'
autocomplete_enabled = os.getenv(ac_var, '1') autocomplete_enabled = os.getenv(ac_var, '1')
@ -130,7 +129,6 @@ def session_required(f):
@app.before_request @app.before_request
def before_request_func(): def before_request_func():
global bang_json
session.permanent = True session.permanent = True
# Check for latest version if needed # Check for latest version if needed
@ -172,15 +170,6 @@ def before_request_func():
g.app_location = g.user_config.url g.app_location = g.user_config.url
# Attempt to reload bangs json if not generated yet
if not bang_json and os.path.getsize(app.config['BANG_FILE']) > 4:
try:
bang_json = json.load(open(app.config['BANG_FILE']))
except json.decoder.JSONDecodeError:
# Ignore decoding error, can occur if file is still
# being written
pass
@app.after_request @app.after_request
def after_request_func(resp): def after_request_func(resp):
@ -282,8 +271,7 @@ def autocomplete():
# Search bangs if the query begins with "!", but not "! " (feeling lucky) # Search bangs if the query begins with "!", but not "! " (feeling lucky)
if q.startswith('!') and len(q) > 1 and not q.startswith('! '): if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if return jsonify([q, suggest_bang(q)])
_.startswith(q)]])
if not q and not request.data: if not q and not request.data:
return jsonify({'?': []}) return jsonify({'?': []})
@ -314,7 +302,7 @@ def search():
search_util = Search(request, g.user_config, g.session_key) search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query() query = search_util.new_search_query()
bang = resolve_bang(query, bang_json) bang = resolve_bang(query)
if bang: if bang:
return redirect(bang) return redirect(bang)
@ -622,6 +610,15 @@ def internal_error(e):
else: else:
query = request.args.get('q') query = request.args.get('q')
# Attempt to parse the query
try:
search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query()
except Exception:
pass
print(traceback.format_exc(), file=sys.stderr)
localization_lang = g.user_config.get_localization_lang() localization_lang = g.user_config.get_localization_lang()
translation = app.config['TRANSLATIONS'][localization_lang] translation = app.config['TRANSLATIONS'][localization_lang]
return render_template( return render_template(

@ -0,0 +1,14 @@
{
"!i": {
"url": "search?q={}&tbm=isch",
"suggestion": "!i (Whoogle Images)"
},
"!v": {
"url": "search?q={}&tbm=vid",
"suggestion": "!v (Whoogle Videos)"
},
"!n": {
"url": "search?q={}&tbm=nws",
"suggestion": "!n (Whoogle News)"
}
}

@ -21,16 +21,6 @@ const handleUserInput = () => {
xhrRequest.send('q=' + searchInput.value); xhrRequest.send('q=' + searchInput.value);
}; };
const closeAllLists = el => {
// Close all autocomplete suggestions
let suggestions = document.getElementsByClassName("autocomplete-items");
for (let i = 0; i < suggestions.length; i++) {
if (el !== suggestions[i] && el !== searchInput) {
suggestions[i].parentNode.removeChild(suggestions[i]);
}
}
};
const removeActive = suggestion => { const removeActive = suggestion => {
// Remove "autocomplete-active" class from previously active suggestion // Remove "autocomplete-active" class from previously active suggestion
for (let i = 0; i < suggestion.length; i++) { for (let i = 0; i < suggestion.length; i++) {
@ -71,7 +61,7 @@ const addActive = (suggestion) => {
const autocompleteInput = (e) => { const autocompleteInput = (e) => {
// Handle navigation between autocomplete suggestions // Handle navigation between autocomplete suggestions
let suggestion = document.getElementById(this.id + "-autocomplete-list"); let suggestion = document.getElementById("autocomplete-list");
if (suggestion) suggestion = suggestion.getElementsByTagName("div"); if (suggestion) suggestion = suggestion.getElementsByTagName("div");
if (e.keyCode === 40) { // down if (e.keyCode === 40) { // down
e.preventDefault(); e.preventDefault();
@ -92,29 +82,28 @@ const autocompleteInput = (e) => {
}; };
const updateAutocompleteList = () => { const updateAutocompleteList = () => {
let autocompleteList, autocompleteItem, i; let autocompleteItem, i;
let val = originalSearch; let val = originalSearch;
closeAllLists();
let autocompleteList = document.getElementById("autocomplete-list");
autocompleteList.innerHTML = "";
if (!val || !autocompleteResults) { if (!val || !autocompleteResults) {
return false; return false;
} }
currentFocus = -1; currentFocus = -1;
autocompleteList = document.createElement("div");
autocompleteList.setAttribute("id", this.id + "-autocomplete-list");
autocompleteList.setAttribute("class", "autocomplete-items");
searchInput.parentNode.appendChild(autocompleteList);
for (i = 0; i < autocompleteResults.length; i++) { for (i = 0; i < autocompleteResults.length; i++) {
if (autocompleteResults[i].substr(0, val.length).toUpperCase() === val.toUpperCase()) { if (autocompleteResults[i].substr(0, val.length).toUpperCase() === val.toUpperCase()) {
autocompleteItem = document.createElement("div"); autocompleteItem = document.createElement("div");
autocompleteItem.setAttribute("class", "autocomplete-item");
autocompleteItem.innerHTML = "<strong>" + autocompleteResults[i].substr(0, val.length) + "</strong>"; autocompleteItem.innerHTML = "<strong>" + autocompleteResults[i].substr(0, val.length) + "</strong>";
autocompleteItem.innerHTML += autocompleteResults[i].substr(val.length); autocompleteItem.innerHTML += autocompleteResults[i].substr(val.length);
autocompleteItem.innerHTML += "<input type=\"hidden\" value=\"" + autocompleteResults[i] + "\">"; autocompleteItem.innerHTML += "<input type=\"hidden\" value=\"" + autocompleteResults[i] + "\">";
autocompleteItem.addEventListener("click", function () { autocompleteItem.addEventListener("click", function () {
searchInput.value = this.getElementsByTagName("input")[0].value; searchInput.value = this.getElementsByTagName("input")[0].value;
closeAllLists(); autocompleteList.innerHTML = "";
document.getElementById("search-form").submit(); document.getElementById("search-form").submit();
}); });
autocompleteList.appendChild(autocompleteItem); autocompleteList.appendChild(autocompleteItem);
@ -123,10 +112,16 @@ const updateAutocompleteList = () => {
}; };
document.addEventListener("DOMContentLoaded", function() { document.addEventListener("DOMContentLoaded", function() {
let autocompleteList = document.createElement("div");
autocompleteList.setAttribute("id", "autocomplete-list");
autocompleteList.setAttribute("class", "autocomplete-items");
searchInput = document.getElementById("search-bar"); searchInput = document.getElementById("search-bar");
searchInput.parentNode.appendChild(autocompleteList);
searchInput.addEventListener("keydown", (event) => autocompleteInput(event)); searchInput.addEventListener("keydown", (event) => autocompleteInput(event));
document.addEventListener("click", function (e) { document.addEventListener("click", function (e) {
closeAllLists(e.target); autocompleteList.innerHTML = "";
}); });
}); });

@ -1,10 +1,58 @@
import json import json
import requests import requests
import urllib.parse as urlparse import urllib.parse as urlparse
import os
import glob
bangs_dict = {}
DDG_BANGS = 'https://duckduckgo.com/bang.js' DDG_BANGS = 'https://duckduckgo.com/bang.js'
def load_all_bangs(ddg_bangs_file: str, ddg_bangs: dict = {}):
"""Loads all the bang files in alphabetical order
Args:
ddg_bangs_file: The str path to the new DDG bangs json file
ddg_bangs: The dict of ddg bangs. If this is empty, it will load the
bangs from the file
Returns:
None
"""
global bangs_dict
ddg_bangs_file = os.path.normpath(ddg_bangs_file)
if (bangs_dict and not ddg_bangs) or os.path.getsize(ddg_bangs_file) <= 4:
return
bangs = {}
bangs_dir = os.path.dirname(ddg_bangs_file)
bang_files = glob.glob(os.path.join(bangs_dir, '*.json'))
# Normalize the paths
bang_files = [os.path.normpath(f) for f in bang_files]
# Move the ddg bangs file to the beginning
bang_files = sorted([f for f in bang_files if f != ddg_bangs_file])
if ddg_bangs:
bangs |= ddg_bangs
else:
bang_files.insert(0, ddg_bangs_file)
for i, bang_file in enumerate(bang_files):
try:
bangs |= json.load(open(bang_file))
except json.decoder.JSONDecodeError:
# Ignore decoding error only for the ddg bangs file, since this can
# occur if file is still being written
if i != 0:
raise
bangs_dict = dict(sorted(bangs.items()))
def gen_bangs_json(bangs_file: str) -> None: def gen_bangs_json(bangs_file: str) -> None:
"""Generates a json file from the DDG bangs list """Generates a json file from the DDG bangs list
@ -37,22 +85,35 @@ def gen_bangs_json(bangs_file: str) -> None:
json.dump(bangs_data, open(bangs_file, 'w')) json.dump(bangs_data, open(bangs_file, 'w'))
print('* Finished creating ddg bangs json') print('* Finished creating ddg bangs json')
load_all_bangs(bangs_file, bangs_data)
def suggest_bang(query: str) -> list[str]:
"""Suggests bangs for a user's query
Args:
query: The search query
Returns:
list[str]: A list of bang suggestions
"""
global bangs_dict
return [bangs_dict[_]['suggestion'] for _ in bangs_dict if _.startswith(query)]
def resolve_bang(query: str, bangs_dict: dict) -> str: def resolve_bang(query: str) -> str:
"""Transform's a user's query to a bang search, if an operator is found """Transform's a user's query to a bang search, if an operator is found
Args: Args:
query: The search query query: The search query
bangs_dict: The dict of available bang operators, with corresponding
format string search URLs
(i.e. "!w": "https://en.wikipedia.org...?search={}")
Returns: Returns:
str: A formatted redirect for a bang search, or an empty str if there str: A formatted redirect for a bang search, or an empty str if there
wasn't a match or didn't contain a bang operator wasn't a match or didn't contain a bang operator
""" """
global bangs_dict
#if ! not in query simply return (speed up processing) #if ! not in query simply return (speed up processing)
if '!' not in query: if '!' not in query:

@ -144,12 +144,26 @@ def get_first_link(soup: BeautifulSoup) -> str:
str: A str link to the first result str: A str link to the first result
""" """
first_link = ''
orig_details = []
# Temporarily remove details so we don't grab those links
for details in soup.find_all('details'):
temp_details = soup.new_tag('removed_details')
orig_details.append(details.replace_with(temp_details))
# Replace hrefs with only the intended destination (no "utm" type tags) # Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True): for a in soup.find_all('a', href=True):
# Return the first search result URL # Return the first search result URL
if 'url?q=' in a['href']: if a['href'].startswith('http://') or a['href'].startswith('https://'):
return filter_link_args(a['href']) first_link = a['href']
return '' break
# Add the details back
for orig_detail, details in zip(orig_details, soup.find_all('removed_details')):
details.replace_with(orig_detail)
return first_link
def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str: def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:

@ -102,9 +102,15 @@ class Search:
except InvalidToken: except InvalidToken:
pass pass
# Strip leading '! ' for "feeling lucky" queries # Strip '!' for "feeling lucky" queries
self.feeling_lucky = q.startswith('! ') if match := re.search("(^|\s)!($|\s)", q):
self.query = q[2:] if self.feeling_lucky else q self.feeling_lucky = True
start, end = match.span()
self.query = " ".join([seg for seg in [q[:start], q[end:]] if seg])
else:
self.feeling_lucky = False
self.query = q
# Check for possible widgets # Check for possible widgets
self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" + self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" +
"($|( *[^a-z0-9] *(((addres|address|adres|" + "($|( *[^a-z0-9] *(((addres|address|adres|" +
@ -161,22 +167,25 @@ class Search:
if g.user_request.tor_valid: if g.user_request.tor_valid:
html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser')) html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser'))
formatted_results = content_filter.clean(html_soup)
if self.feeling_lucky: if self.feeling_lucky:
return get_first_link(html_soup) if lucky_link := get_first_link(formatted_results):
else: return lucky_link
formatted_results = content_filter.clean(html_soup)
# Fall through to regular search if unable to find link
# Append user config to all search links, if available self.feeling_lucky = False
param_str = ''.join('&{}={}'.format(k, v)
for k, v in # Append user config to all search links, if available
self.request_params.to_dict(flat=True).items() param_str = ''.join('&{}={}'.format(k, v)
if self.config.is_safe_key(k)) for k, v in
for link in formatted_results.find_all('a', href=True): self.request_params.to_dict(flat=True).items()
link['rel'] = "nofollow noopener noreferrer" if self.config.is_safe_key(k))
if 'search?' not in link['href'] or link['href'].index( for link in formatted_results.find_all('a', href=True):
'search?') > 1: link['rel'] = "nofollow noopener noreferrer"
continue if 'search?' not in link['href'] or link['href'].index(
link['href'] += param_str 'search?') > 1:
continue
return str(formatted_results) link['href'] += param_str
return str(formatted_results)

@ -17,8 +17,15 @@ def test_search(client):
def test_feeling_lucky(client): def test_feeling_lucky(client):
rv = client.get(f'/{Endpoint.search}?q=!%20test') # Bang at beginning of query
rv = client.get(f'/{Endpoint.search}?q=!%20wikipedia')
assert rv._status_code == 303 assert rv._status_code == 303
assert rv.headers.get('Location').startswith('https://www.wikipedia.org')
# Move bang to end of query
rv = client.get(f'/{Endpoint.search}?q=github%20!')
assert rv._status_code == 303
assert rv.headers.get('Location').startswith('https://github.com')
def test_ddg_bang(client): def test_ddg_bang(client):
@ -48,6 +55,13 @@ def test_ddg_bang(client):
assert rv.headers.get('Location').startswith('https://github.com') assert rv.headers.get('Location').startswith('https://github.com')
def test_custom_bang(client):
# Bang at beginning of query
rv = client.get(f'/{Endpoint.search}?q=!i%20whoogle')
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('search?q=')
def test_config(client): def test_config(client):
rv = client.post(f'/{Endpoint.config}', data=demo_config) rv = client.post(f'/{Endpoint.config}', data=demo_config)
assert rv._status_code == 302 assert rv._status_code == 302

Loading…
Cancel
Save