Compare commits

...

5 Commits

Author SHA1 Message Date
Ben Busby 9bfdd88a5e
Fix input cursor position bug on mobile
Refactors how search suggestions are added to the view

Fixes #1133
3 weeks ago
Ben Busby 37ff61dfac
Fall back to random secret key on permission exception
Fixes #1136
3 weeks ago
David Shen 80e41e6b44
Attempt to query on error condition and print trace (#1131) 3 weeks ago
David Shen f18bf07ac3
Fix feeling lucky (#1130)
* Fix feeling lucky, fall through to display results if doesn't work

* Allow lucky bang anywhere

* Update feeling lucky test
3 weeks ago
David Shen fd20135af0
Add support for custom bangs (#1132)
Add the possibility for user-defined bangs, stored in app/static/bangs. 

These are parsed in alphabetical order, with the DDG bangs parsed first.
3 weeks ago

4
.gitignore vendored

@ -1,4 +1,5 @@
venv/
.venv/
.idea/
__pycache__/
*.pyc
@ -10,7 +11,8 @@ test/static
flask_session/
app/static/config
app/static/custom_config
app/static/bangs
app/static/bangs/*
!app/static/bangs/00-whoogle.json
# pip stuff
/build/

@ -35,6 +35,7 @@ Contents
6. [Extra Steps](#extra-steps)
1. [Set Primary Search Engine](#set-whoogle-as-your-primary-search-engine)
2. [Custom Redirecting](#custom-redirecting)
2. [Custom Bangs](#custom-bangs)
3. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only)
4. [Manual HTTPS Enforcement](#https-enforcement)
5. [Using with Firefox Containers](#using-with-firefox-containers)
@ -61,6 +62,7 @@ Contents
- Randomly generated User Agent
- Easy to install/deploy
- DDG-style bang (i.e. `!<tag> <query>`) searches
- User-defined [custom bangs](#custom-bangs)
- Optional location-based searching (i.e. results near \<city\>)
- Optional NoJS mode to view search results in a separate window with JavaScript blocked
@ -539,6 +541,14 @@ WHOOGLE_REDIRECTS="badA.com:goodA.com,badB.com:goodB.com"
NOTE: Do not include "http(s)://" when defining your redirect.
### Custom Bangs
You can create your own custom bangs. By default, bangs are stored in
`app/static/bangs`. See [`00-whoogle.json`](https://github.com/benbusby/whoogle-search/blob/main/app/static/bangs/00-whoogle.json)
for an example. These are parsed in alphabetical order with later files
overriding bangs set in earlier files, with the exception that DDG bangs
(downloaded to `app/static/bangs/bangs.json`) are always parsed first. Thus,
any custom bangs will always override the DDG ones.
### Prevent Downtime (Heroku only)
Part of the deal with Heroku's free tier is that you're allocated 550 hours/month (meaning it can't stay active 24/7), and the app is temporarily shut down after 30 minutes of inactivity. Once it becomes inactive, any Whoogle searches will still work, but it'll take an extra 10-15 seconds for the app to come back online before displaying the result, which can be frustrating if you're in a hurry.

@ -1,7 +1,7 @@
from app.filter import clean_query
from app.request import send_tor_signal
from app.utils.session import generate_key
from app.utils.bangs import gen_bangs_json
from app.utils.bangs import gen_bangs_json, load_all_bangs
from app.utils.misc import gen_file_hash, read_config_bool
from base64 import b64encode
from bs4 import MarkupResemblesLocatorWarning
@ -101,7 +101,10 @@ if not os.path.exists(app.config['BUILD_FOLDER']):
# Session values
app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key')
if os.path.exists(app_key_path):
app.config['SECRET_KEY'] = open(app_key_path, 'r').read()
try:
app.config['SECRET_KEY'] = open(app_key_path, 'r').read()
except PermissionError:
app.config['SECRET_KEY'] = str(b64encode(os.urandom(32)))
else:
app.config['SECRET_KEY'] = str(b64encode(os.urandom(32)))
with open(app_key_path, 'w') as key_file:
@ -139,7 +142,9 @@ app.config['CSP'] = 'default-src \'none\';' \
'connect-src \'self\';'
# Generate DDG bang filter
generating_bangs = False
if not os.path.exists(app.config['BANG_FILE']):
generating_bangs = True
json.dump({}, open(app.config['BANG_FILE'], 'w'))
bangs_thread = threading.Thread(
target=gen_bangs_json,
@ -181,6 +186,11 @@ warnings.simplefilter('ignore', MarkupResemblesLocatorWarning)
from app import routes # noqa
# The gen_bangs_json function takes care of loading bangs, so skip it here if
# it's already being loaded
if not generating_bangs:
load_all_bangs(app.config['BANG_FILE'])
# Disable logging from imported modules
logging.config.dictConfig({
'version': 1,

@ -8,6 +8,8 @@ import re
import urllib.parse as urlparse
import uuid
import validators
import sys
import traceback
from datetime import datetime, timedelta
from functools import wraps
@ -16,7 +18,7 @@ from app import app
from app.models.config import Config
from app.models.endpoint import Endpoint
from app.request import Request, TorError
from app.utils.bangs import resolve_bang
from app.utils.bangs import suggest_bang, resolve_bang
from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \
fetch_favicon
from app.filter import Filter
@ -36,9 +38,6 @@ from cryptography.fernet import Fernet, InvalidToken
from cryptography.exceptions import InvalidSignature
from werkzeug.datastructures import MultiDict
# Load DDG bang json files only on init
bang_json = json.load(open(app.config['BANG_FILE'])) or {}
ac_var = 'WHOOGLE_AUTOCOMPLETE'
autocomplete_enabled = os.getenv(ac_var, '1')
@ -130,7 +129,6 @@ def session_required(f):
@app.before_request
def before_request_func():
global bang_json
session.permanent = True
# Check for latest version if needed
@ -172,15 +170,6 @@ def before_request_func():
g.app_location = g.user_config.url
# Attempt to reload bangs json if not generated yet
if not bang_json and os.path.getsize(app.config['BANG_FILE']) > 4:
try:
bang_json = json.load(open(app.config['BANG_FILE']))
except json.decoder.JSONDecodeError:
# Ignore decoding error, can occur if file is still
# being written
pass
@app.after_request
def after_request_func(resp):
@ -282,8 +271,7 @@ def autocomplete():
# Search bangs if the query begins with "!", but not "! " (feeling lucky)
if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if
_.startswith(q)]])
return jsonify([q, suggest_bang(q)])
if not q and not request.data:
return jsonify({'?': []})
@ -314,7 +302,7 @@ def search():
search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query()
bang = resolve_bang(query, bang_json)
bang = resolve_bang(query)
if bang:
return redirect(bang)
@ -622,6 +610,15 @@ def internal_error(e):
else:
query = request.args.get('q')
# Attempt to parse the query
try:
search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query()
except Exception:
pass
print(traceback.format_exc(), file=sys.stderr)
localization_lang = g.user_config.get_localization_lang()
translation = app.config['TRANSLATIONS'][localization_lang]
return render_template(

@ -0,0 +1,14 @@
{
"!i": {
"url": "search?q={}&tbm=isch",
"suggestion": "!i (Whoogle Images)"
},
"!v": {
"url": "search?q={}&tbm=vid",
"suggestion": "!v (Whoogle Videos)"
},
"!n": {
"url": "search?q={}&tbm=nws",
"suggestion": "!n (Whoogle News)"
}
}

@ -21,16 +21,6 @@ const handleUserInput = () => {
xhrRequest.send('q=' + searchInput.value);
};
const closeAllLists = el => {
// Close all autocomplete suggestions
let suggestions = document.getElementsByClassName("autocomplete-items");
for (let i = 0; i < suggestions.length; i++) {
if (el !== suggestions[i] && el !== searchInput) {
suggestions[i].parentNode.removeChild(suggestions[i]);
}
}
};
const removeActive = suggestion => {
// Remove "autocomplete-active" class from previously active suggestion
for (let i = 0; i < suggestion.length; i++) {
@ -71,7 +61,7 @@ const addActive = (suggestion) => {
const autocompleteInput = (e) => {
// Handle navigation between autocomplete suggestions
let suggestion = document.getElementById(this.id + "-autocomplete-list");
let suggestion = document.getElementById("autocomplete-list");
if (suggestion) suggestion = suggestion.getElementsByTagName("div");
if (e.keyCode === 40) { // down
e.preventDefault();
@ -92,29 +82,28 @@ const autocompleteInput = (e) => {
};
const updateAutocompleteList = () => {
let autocompleteList, autocompleteItem, i;
let autocompleteItem, i;
let val = originalSearch;
closeAllLists();
let autocompleteList = document.getElementById("autocomplete-list");
autocompleteList.innerHTML = "";
if (!val || !autocompleteResults) {
return false;
}
currentFocus = -1;
autocompleteList = document.createElement("div");
autocompleteList.setAttribute("id", this.id + "-autocomplete-list");
autocompleteList.setAttribute("class", "autocomplete-items");
searchInput.parentNode.appendChild(autocompleteList);
for (i = 0; i < autocompleteResults.length; i++) {
if (autocompleteResults[i].substr(0, val.length).toUpperCase() === val.toUpperCase()) {
autocompleteItem = document.createElement("div");
autocompleteItem.setAttribute("class", "autocomplete-item");
autocompleteItem.innerHTML = "<strong>" + autocompleteResults[i].substr(0, val.length) + "</strong>";
autocompleteItem.innerHTML += autocompleteResults[i].substr(val.length);
autocompleteItem.innerHTML += "<input type=\"hidden\" value=\"" + autocompleteResults[i] + "\">";
autocompleteItem.addEventListener("click", function () {
searchInput.value = this.getElementsByTagName("input")[0].value;
closeAllLists();
autocompleteList.innerHTML = "";
document.getElementById("search-form").submit();
});
autocompleteList.appendChild(autocompleteItem);
@ -123,10 +112,16 @@ const updateAutocompleteList = () => {
};
document.addEventListener("DOMContentLoaded", function() {
let autocompleteList = document.createElement("div");
autocompleteList.setAttribute("id", "autocomplete-list");
autocompleteList.setAttribute("class", "autocomplete-items");
searchInput = document.getElementById("search-bar");
searchInput.parentNode.appendChild(autocompleteList);
searchInput.addEventListener("keydown", (event) => autocompleteInput(event));
document.addEventListener("click", function (e) {
closeAllLists(e.target);
autocompleteList.innerHTML = "";
});
});
});

@ -1,10 +1,58 @@
import json
import requests
import urllib.parse as urlparse
import os
import glob
bangs_dict = {}
DDG_BANGS = 'https://duckduckgo.com/bang.js'
def load_all_bangs(ddg_bangs_file: str, ddg_bangs: dict = {}):
"""Loads all the bang files in alphabetical order
Args:
ddg_bangs_file: The str path to the new DDG bangs json file
ddg_bangs: The dict of ddg bangs. If this is empty, it will load the
bangs from the file
Returns:
None
"""
global bangs_dict
ddg_bangs_file = os.path.normpath(ddg_bangs_file)
if (bangs_dict and not ddg_bangs) or os.path.getsize(ddg_bangs_file) <= 4:
return
bangs = {}
bangs_dir = os.path.dirname(ddg_bangs_file)
bang_files = glob.glob(os.path.join(bangs_dir, '*.json'))
# Normalize the paths
bang_files = [os.path.normpath(f) for f in bang_files]
# Move the ddg bangs file to the beginning
bang_files = sorted([f for f in bang_files if f != ddg_bangs_file])
if ddg_bangs:
bangs |= ddg_bangs
else:
bang_files.insert(0, ddg_bangs_file)
for i, bang_file in enumerate(bang_files):
try:
bangs |= json.load(open(bang_file))
except json.decoder.JSONDecodeError:
# Ignore decoding error only for the ddg bangs file, since this can
# occur if file is still being written
if i != 0:
raise
bangs_dict = dict(sorted(bangs.items()))
def gen_bangs_json(bangs_file: str) -> None:
"""Generates a json file from the DDG bangs list
@ -37,22 +85,35 @@ def gen_bangs_json(bangs_file: str) -> None:
json.dump(bangs_data, open(bangs_file, 'w'))
print('* Finished creating ddg bangs json')
load_all_bangs(bangs_file, bangs_data)
def suggest_bang(query: str) -> list[str]:
"""Suggests bangs for a user's query
Args:
query: The search query
Returns:
list[str]: A list of bang suggestions
"""
global bangs_dict
return [bangs_dict[_]['suggestion'] for _ in bangs_dict if _.startswith(query)]
def resolve_bang(query: str, bangs_dict: dict) -> str:
def resolve_bang(query: str) -> str:
"""Transform's a user's query to a bang search, if an operator is found
Args:
query: The search query
bangs_dict: The dict of available bang operators, with corresponding
format string search URLs
(i.e. "!w": "https://en.wikipedia.org...?search={}")
Returns:
str: A formatted redirect for a bang search, or an empty str if there
wasn't a match or didn't contain a bang operator
"""
global bangs_dict
#if ! not in query simply return (speed up processing)
if '!' not in query:

@ -144,12 +144,26 @@ def get_first_link(soup: BeautifulSoup) -> str:
str: A str link to the first result
"""
first_link = ''
orig_details = []
# Temporarily remove details so we don't grab those links
for details in soup.find_all('details'):
temp_details = soup.new_tag('removed_details')
orig_details.append(details.replace_with(temp_details))
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
# Return the first search result URL
if 'url?q=' in a['href']:
return filter_link_args(a['href'])
return ''
if a['href'].startswith('http://') or a['href'].startswith('https://'):
first_link = a['href']
break
# Add the details back
for orig_detail, details in zip(orig_details, soup.find_all('removed_details')):
details.replace_with(orig_detail)
return first_link
def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:

@ -102,9 +102,15 @@ class Search:
except InvalidToken:
pass
# Strip leading '! ' for "feeling lucky" queries
self.feeling_lucky = q.startswith('! ')
self.query = q[2:] if self.feeling_lucky else q
# Strip '!' for "feeling lucky" queries
if match := re.search("(^|\s)!($|\s)", q):
self.feeling_lucky = True
start, end = match.span()
self.query = " ".join([seg for seg in [q[:start], q[end:]] if seg])
else:
self.feeling_lucky = False
self.query = q
# Check for possible widgets
self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" +
"($|( *[^a-z0-9] *(((addres|address|adres|" +
@ -161,22 +167,25 @@ class Search:
if g.user_request.tor_valid:
html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser'))
formatted_results = content_filter.clean(html_soup)
if self.feeling_lucky:
return get_first_link(html_soup)
else:
formatted_results = content_filter.clean(html_soup)
# Append user config to all search links, if available
param_str = ''.join('&{}={}'.format(k, v)
for k, v in
self.request_params.to_dict(flat=True).items()
if self.config.is_safe_key(k))
for link in formatted_results.find_all('a', href=True):
link['rel'] = "nofollow noopener noreferrer"
if 'search?' not in link['href'] or link['href'].index(
'search?') > 1:
continue
link['href'] += param_str
return str(formatted_results)
if lucky_link := get_first_link(formatted_results):
return lucky_link
# Fall through to regular search if unable to find link
self.feeling_lucky = False
# Append user config to all search links, if available
param_str = ''.join('&{}={}'.format(k, v)
for k, v in
self.request_params.to_dict(flat=True).items()
if self.config.is_safe_key(k))
for link in formatted_results.find_all('a', href=True):
link['rel'] = "nofollow noopener noreferrer"
if 'search?' not in link['href'] or link['href'].index(
'search?') > 1:
continue
link['href'] += param_str
return str(formatted_results)

@ -17,8 +17,15 @@ def test_search(client):
def test_feeling_lucky(client):
rv = client.get(f'/{Endpoint.search}?q=!%20test')
# Bang at beginning of query
rv = client.get(f'/{Endpoint.search}?q=!%20wikipedia')
assert rv._status_code == 303
assert rv.headers.get('Location').startswith('https://www.wikipedia.org')
# Move bang to end of query
rv = client.get(f'/{Endpoint.search}?q=github%20!')
assert rv._status_code == 303
assert rv.headers.get('Location').startswith('https://github.com')
def test_ddg_bang(client):
@ -48,6 +55,13 @@ def test_ddg_bang(client):
assert rv.headers.get('Location').startswith('https://github.com')
def test_custom_bang(client):
# Bang at beginning of query
rv = client.get(f'/{Endpoint.search}?q=!i%20whoogle')
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('search?q=')
def test_config(client):
rv = client.post(f'/{Endpoint.config}', data=demo_config)
assert rv._status_code == 302

Loading…
Cancel
Save