diff --git a/README.md b/README.md
index 1139450..4905bc5 100644
--- a/README.md
+++ b/README.md
@@ -422,6 +422,8 @@ There are a few optional environment variables available for customizing a Whoog
| WHOOGLE_TOR_SERVICE | Enable/disable the Tor service on startup. Default on -- use '0' to disable. |
| WHOOGLE_TOR_USE_PASS | Use password authentication for tor control port. |
| WHOOGLE_TOR_CONF | The absolute path to the config file containing the password for the tor control port. Default: ./misc/tor/control.conf WHOOGLE_TOR_PASS must be 1 for this to work.|
+| WHOOGLE_SHOW_FAVICONS | Show/hide favicons next to search result URLs. Default on. |
+| WHOOGLE_UPDATE_CHECK | Enable/disable the automatic daily check for new versions of Whoogle. Default on. |
### Config Environment Variables
These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time.
@@ -663,12 +665,12 @@ A lot of the app currently piggybacks on Google's existing support for fetching
| [https://whoogle.lunar.icu](https://whoogle.lunar.icu) | 🇩🇪 DE | Multi-choice | ✅ |
| [https://wgl.frail.duckdns.org](https://wgl.frail.duckdns.org) | 🇧🇷 BR | Multi-choice | |
| [https://whoogle.no-logs.com](https://whoogle.no-logs.com/) | 🇸🇪 SE | Multi-choice | |
-| [https://search.rubberverse.xyz](https://search.rubberverse.xyz) | 🇵🇱 PL | English | |
| [https://whoogle.ftw.lol](https://whoogle.ftw.lol) | 🇩🇪 DE | Multi-choice | |
| [https://whoogle-search--replitcomreside.repl.co](https://whoogle-search--replitcomreside.repl.co) | 🇺🇸 US | English | |
| [https://search.notrustverify.ch](https://search.notrustverify.ch) | 🇨🇠CH | Multi-choice | |
| [https://whoogle.datura.network](https://whoogle.datura.network) | 🇩🇪 DE | Multi-choice | |
| [https://whoogle.yepserver.xyz](https://whoogle.yepserver.xyz) | 🇺🇦 UA | Multi-choice | |
+| [https://search.nezumi.party](https://search.nezumi.party) | 🇮🇹 IT | Multi-choice | |
* A checkmark in the "Cloudflare" category here refers to the use of the reverse proxy, [Cloudflare](https://cloudflare.com). The checkmark will not be listed for a site which uses Cloudflare DNS but rather the proxying service which grants Cloudflare the ability to monitor traffic to the website.
diff --git a/app/filter.py b/app/filter.py
index 29aafd1..ff529fb 100644
--- a/app/filter.py
+++ b/app/filter.py
@@ -29,9 +29,12 @@ unsupported_g_pages = [
'google.com/preferences',
'google.com/intl',
'advanced_search',
- 'tbm=shop'
+ 'tbm=shop',
+ 'ageverification.google.co.kr'
]
+unsupported_g_divs = ['google.com/preferences?hl=', 'ageverification.google.co.kr']
+
def extract_q(q_str: str, href: str) -> str:
"""Extracts the 'q' element from a result link. This is typically
@@ -245,7 +248,9 @@ class Filter:
None (The soup object is modified directly)
"""
# Skip empty, parentless, or internal links
- if not link or not link.parent or not link['href'].startswith('http'):
+ show_favicons = read_config_bool('WHOOGLE_SHOW_FAVICONS', True)
+ is_valid_link = link and link.parent and link['href'].startswith('http')
+ if not show_favicons or not is_valid_link:
return
parent = link.parent
@@ -558,7 +563,7 @@ class Filter:
link['href'] = link_netloc
parent = link.parent
- if 'google.com/preferences?hl=' in link_netloc:
+ if any(divlink in link_netloc for divlink in unsupported_g_divs):
# Handle case where a search is performed in a different
# language than what is configured. This usually returns a
# div with the same classes as normal search results, but with
diff --git a/app/routes.py b/app/routes.py
index 3fcd0cf..4efc343 100644
--- a/app/routes.py
+++ b/app/routes.py
@@ -135,7 +135,8 @@ def before_request_func():
# Check for latest version if needed
now = datetime.now()
- if now - timedelta(hours=24) > app.config['LAST_UPDATE_CHECK']:
+ needs_update_check = now - timedelta(hours=24) > app.config['LAST_UPDATE_CHECK']
+ if read_config_bool('WHOOGLE_UPDATE_CHECK', True) and needs_update_check:
app.config['LAST_UPDATE_CHECK'] = now
app.config['HAS_UPDATE'] = check_for_update(
app.config['RELEASES_URL'],
@@ -608,6 +609,26 @@ def page_not_found(e):
return render_template('error.html', error_message=str(e)), 404
+@app.errorhandler(Exception)
+def internal_error(e):
+ query = ''
+ if request.method == 'POST':
+ query = request.form.get('q')
+ else:
+ query = request.args.get('q')
+
+ localization_lang = g.user_config.get_localization_lang()
+ translation = app.config['TRANSLATIONS'][localization_lang]
+ return render_template(
+ 'error.html',
+ error_message='Internal server error (500)',
+ translation=translation,
+ farside='https://farside.link',
+ config=g.user_config,
+ query=urlparse.unquote(query),
+ params=g.user_config.to_params(keys=['preferences'])), 500
+
+
def run_app() -> None:
parser = argparse.ArgumentParser(
description='Whoogle Search console runner')
@@ -626,6 +647,11 @@ def run_app() -> None:
default='',
metavar='',
help='Listen for app on unix socket instead of host:port')
+ parser.add_argument(
+ '--unix-socket-perms',
+ default='600',
+ metavar='',
+ help='Octal permissions to use for the Unix domain socket (default 600)')
parser.add_argument(
'--debug',
default=False,
@@ -677,7 +703,7 @@ def run_app() -> None:
if args.debug:
app.run(host=args.host, port=args.port, debug=args.debug)
elif args.unix_socket:
- waitress.serve(app, unix_socket=args.unix_socket)
+ waitress.serve(app, unix_socket=args.unix_socket, unix_socket_perms=args.unix_socket_perms)
else:
waitress.serve(
app,
diff --git a/app/static/css/search.css b/app/static/css/search.css
index 12c467d..30a1797 100644
--- a/app/static/css/search.css
+++ b/app/static/css/search.css
@@ -71,7 +71,7 @@ details summary span {
padding-right: 5px;
}
-.sCuL3 {
+.has-favicon .sCuL3 {
padding-left: 30px;
}
diff --git a/app/templates/error.html b/app/templates/error.html
index d302270..99e87b3 100644
--- a/app/templates/error.html
+++ b/app/templates/error.html
@@ -20,21 +20,86 @@
- {% if blocked is defined %}
- Whoogle:
-
-
- {{farside}}/whoogle/search?q={{query}}
-
-
- Searx:
-
-
- {{farside}}/searx/search?q={{query}}
-
+
+
+ Other options:
+
- {% endif %}
Return Home
diff --git a/app/utils/misc.py b/app/utils/misc.py
index d5fa5e6..20705bc 100644
--- a/app/utils/misc.py
+++ b/app/utils/misc.py
@@ -56,8 +56,8 @@ def gen_file_hash(path: str, static_file: str) -> str:
return filename_split[0] + '.' + file_hash + filename_split[-1]
-def read_config_bool(var: str) -> bool:
- val = os.getenv(var, '0')
+def read_config_bool(var: str, default: bool=False) -> bool:
+ val = os.getenv(var, '1' if default else '0')
# user can specify one of the following values as 'true' inputs (all
# variants with upper case letters will also work):
# ('true', 't', '1', 'yes', 'y')
diff --git a/app/utils/results.py b/app/utils/results.py
index 42654e9..c78f866 100644
--- a/app/utils/results.py
+++ b/app/utils/results.py
@@ -12,7 +12,7 @@ import re
import warnings
SKIP_ARGS = ['ref_src', 'utm']
-SKIP_PREFIX = ['//www.', '//mobile.', '//m.', 'www.', 'mobile.', 'm.']
+SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
GOOG_STATIC = 'www.gstatic.com'
G_M_LOGO_URL = 'https://www.gstatic.com/m/images/icons/googleg.gif'
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
@@ -152,11 +152,12 @@ def get_first_link(soup: BeautifulSoup) -> str:
return ''
-def get_site_alt(link: str) -> str:
+def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:
"""Returns an alternative to a particular site, if one is configured
Args:
- link: A string result URL to check against the SITE_ALTS map
+ link: A string result URL to check against the site_alts map
+ site_alts: A map of site alternatives to replace with. defaults to SITE_ALTS
Returns:
str: An updated (or ignored) result link
@@ -178,9 +179,9 @@ def get_site_alt(link: str) -> str:
# "https://medium.com/..." should match, but "philomedium.com" should not)
hostcomp = f'{parsed_link.scheme}://{hostname}'
- for site_key in SITE_ALTS.keys():
+ for site_key in site_alts.keys():
site_alt = f'{parsed_link.scheme}://{site_key}'
- if not hostname or site_alt not in hostcomp or not SITE_ALTS[site_key]:
+ if not hostname or site_alt not in hostcomp or not site_alts[site_key]:
continue
# Wikipedia -> Wikiless replacements require the subdomain (if it's
@@ -193,9 +194,8 @@ def get_site_alt(link: str) -> str:
elif 'medium' in hostname and len(subdomain) > 0:
hostname = f'{subdomain}.{hostname}'
- parsed_alt = urlparse.urlparse(SITE_ALTS[site_key])
- link = link.replace(hostname, SITE_ALTS[site_key]) + params
-
+ parsed_alt = urlparse.urlparse(site_alts[site_key])
+ link = link.replace(hostname, site_alts[site_key]) + params
# If a scheme is specified in the alternative, this results in a
# replaced link that looks like "https://http://altservice.tld".
# In this case, we can remove the original scheme from the result
@@ -205,9 +205,12 @@ def get_site_alt(link: str) -> str:
for prefix in SKIP_PREFIX:
if parsed_alt.scheme:
- link = link.replace(prefix, '')
+ # If a scheme is specified, remove everything before the
+ # first occurence of it
+ link = f'{parsed_alt.scheme}{link.split(parsed_alt.scheme, 1)[-1]}'
else:
- link = link.replace(prefix, '//')
+ # Otherwise, replace the first occurrence of the prefix
+ link = link.replace(prefix, '//', 1)
break
return link
diff --git a/app/version.py b/app/version.py
index c061f7e..d3675b7 100644
--- a/app/version.py
+++ b/app/version.py
@@ -4,4 +4,4 @@ optional_dev_tag = ''
if os.getenv('DEV_BUILD'):
optional_dev_tag = '.dev' + os.getenv('DEV_BUILD')
-__version__ = '0.8.3' + optional_dev_tag
+__version__ = '0.8.4' + optional_dev_tag
diff --git a/charts/whoogle/Chart.yaml b/charts/whoogle/Chart.yaml
index 07dfcd4..b10c349 100644
--- a/charts/whoogle/Chart.yaml
+++ b/charts/whoogle/Chart.yaml
@@ -3,7 +3,7 @@ name: whoogle
description: A self hosted search engine on Kubernetes
type: application
version: 0.1.0
-appVersion: 0.8.3
+appVersion: 0.8.4
icon: https://github.com/benbusby/whoogle-search/raw/main/app/static/img/favicon/favicon-96x96.png
diff --git a/misc/instances.txt b/misc/instances.txt
index 95927b2..3d64425 100644
--- a/misc/instances.txt
+++ b/misc/instances.txt
@@ -1,6 +1,7 @@
https://search.albony.xyz
https://search.garudalinux.org
https://search.dr460nf1r3.org
+https://search.nezumi.party
https://s.tokhmi.xyz
https://search.sethforprivacy.com
https://whoogle.dcs0.hu
@@ -15,7 +16,6 @@ https://whoogle2.ungovernable.men
https://whoogle3.ungovernable.men
https://wgl.frail.duckdns.org
https://whoogle.no-logs.com
-https://search.rubberverse.xyz
https://whoogle.ftw.lol
https://whoogle-search--replitcomreside.repl.co
https://search.notrustverify.ch
diff --git a/requirements.txt b/requirements.txt
index e0966ac..90642a8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,13 +7,13 @@ cffi==1.15.1
chardet==5.1.0
click==8.1.3
cryptography==3.3.2; platform_machine == 'armv7l'
-cryptography==41.0.4; platform_machine != 'armv7l'
+cryptography==41.0.6; platform_machine != 'armv7l'
cssutils==2.6.0
defusedxml==0.7.1
Flask==2.3.2
idna==3.4
itsdangerous==2.1.2
-Jinja2==3.1.2
+Jinja2==3.1.3
MarkupSafe==2.1.2
more-itertools==9.0.0
packaging==23.0
@@ -29,9 +29,9 @@ python-dateutil==2.8.2
requests==2.31.0
soupsieve==2.4
stem==1.8.1
-urllib3==1.26.17
+urllib3==1.26.18
validators==0.22.0
waitress==2.1.2
wcwidth==0.2.6
-Werkzeug==2.3.3
+Werkzeug==3.0.1
python-dotenv==0.21.1
diff --git a/setup.cfg b/setup.cfg
index 01bdec7..6e61f45 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -27,6 +27,7 @@ install_requires=
python-dotenv
requests
stem
+ validators
waitress
[options.extras_require]
diff --git a/test/test_results.py b/test/test_results.py
index 63ae159..ad0fd3e 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -2,6 +2,7 @@ from bs4 import BeautifulSoup
from app.filter import Filter
from app.models.config import Config
from app.models.endpoint import Endpoint
+from app.utils import results
from app.utils.session import generate_key
from datetime import datetime
from dateutil.parser import ParserError, parse
@@ -95,13 +96,13 @@ def test_view_my_ip(client):
def test_recent_results(client):
times = {
- 'past year': 365,
- 'past month': 31,
- 'past week': 7
+ 'tbs=qdr:y': 365,
+ 'tbs=qdr:m': 31,
+ 'tbs=qdr:w': 7
}
for time, num_days in times.items():
- rv = client.get(f'/{Endpoint.search}?q=test :' + time)
+ rv = client.get(f'/{Endpoint.search}?q=test&' + time)
result_divs = get_search_results(rv.data)
current_date = datetime.now()
@@ -136,3 +137,22 @@ def test_leading_slash_search(client):
continue
assert link['href'].startswith(f'{Endpoint.search}')
+
+
+def test_site_alt_prefix_skip():
+ # Ensure prefixes are skipped correctly for site alts
+
+ # default silte_alts (farside.link)
+ assert results.get_site_alt(link = 'https://www.reddit.com') == 'https://farside.link/libreddit'
+ assert results.get_site_alt(link = 'https://www.twitter.com') == 'https://farside.link/nitter'
+ assert results.get_site_alt(link = 'https://www.youtube.com') == 'https://farside.link/invidious'
+
+ test_site_alts = {
+ 'reddit.com': 'reddit.endswithmobile.domain',
+ 'twitter.com': 'https://twitter.endswithm.domain',
+ 'youtube.com': 'http://yt.endswithwww.domain',
+ }
+ # Domains with part of SKIP_PREFIX in them
+ assert results.get_site_alt(link = 'https://www.reddit.com', site_alts = test_site_alts) == 'https://reddit.endswithmobile.domain'
+ assert results.get_site_alt(link = 'https://www.twitter.com', site_alts = test_site_alts) == 'https://twitter.endswithm.domain'
+ assert results.get_site_alt(link = 'https://www.youtube.com', site_alts = test_site_alts) == 'http://yt.endswithwww.domain'