Compare commits

..

No commits in common. 'main' and 'v0.7.1' have entirely different histories.
main ... v0.7.1

@ -6,16 +6,13 @@ on:
branches: [main]
types:
- completed
push:
tags:
- '*'
jobs:
on-success:
runs-on: ubuntu-latest
steps:
- name: Wait for tests to succeed
if: ${{ github.event.workflow_run.conclusion != 'success' && startsWith(github.ref, 'refs/tags') != true }}
if: ${{ github.event.workflow_run.conclusion != 'success' }}
run: exit 1
- name: checkout code
uses: actions/checkout@v2
@ -24,17 +21,10 @@ jobs:
uses: crazy-max/ghaction-docker-buildx@v1
with:
version: latest
- name: Login to Docker Hub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to ghcr.io
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: log in to docker hub
run: |
echo "${{ secrets.DOCKER_PASSWORD }}" | \
docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin
- name: build and push the image
if: startsWith(github.ref, 'refs/heads/main') && github.actor == 'benbusby'
run: |
@ -42,10 +32,7 @@ jobs:
docker buildx ls
docker buildx build --push \
--tag benbusby/whoogle-search:latest \
--platform linux/amd64,linux/arm64 .
docker buildx build --push \
--tag ghcr.io/benbusby/whoogle-search:latest \
--platform linux/amd64,linux/arm64 .
--platform linux/amd64,linux/arm/v7,linux/arm64 .
- name: build and push tag
if: startsWith(github.ref, 'refs/tags')
run: |
@ -54,6 +41,3 @@ jobs:
docker buildx build --push \
--tag benbusby/whoogle-search:${GITHUB_REF#refs/*/v}\
--platform linux/amd64,linux/arm/v7,linux/arm64 .
docker buildx build --push \
--tag ghcr.io/benbusby/whoogle-search:${GITHUB_REF#refs/*/v}\
--platform linux/amd64,linux/arm/v7,linux/arm64 .

@ -0,0 +1,21 @@
name: pep8
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pycodestyle
- name: Run pycodestyle
run: |
pycodestyle --show-source --show-pep8 app/*
pycodestyle --show-source --show-pep8 test/*

@ -20,7 +20,6 @@ jobs:
python -m
pip install
build
setuptools
--user
- name: Set dev timestamp
run: echo "DEV_BUILD=$(date +%s)" >> $GITHUB_ENV

9
.gitignore vendored

@ -1,18 +1,15 @@
venv/
.venv/
.idea/
__pycache__/
*.pyc
*.pem
*.conf
*.key
config.json
test/static
flask_session/
app/static/config
app/static/custom_config
app/static/bangs/*
!app/static/bangs/00-whoogle.json
app/static/bangs
# pip stuff
/build/
@ -21,7 +18,3 @@ dist/
# env
whoogle.env
# vim
*~
*.swp

@ -1 +1,3 @@
entrypoint = "misc/replit.py"
language = "bash"
run = "killall -q python3 > /dev/null 2>&1; pip install -r requirements.txt && ./run"
onBoot = "killall -q python3 > /dev/null 2>&1; pip install -r requirements.txt && ./run"

@ -1,4 +1,4 @@
FROM python:3.11.0a5-alpine as builder
FROM python:3.8-alpine as builder
RUN apk --update add \
build-base \
@ -12,28 +12,19 @@ COPY requirements.txt .
RUN pip install --upgrade pip
RUN pip install --prefix /install --no-warn-script-location --no-cache-dir -r requirements.txt
FROM python:3.11.0a5-alpine
FROM python:3.8-alpine
RUN apk add --update --no-cache tor curl openrc libstdc++
# git go //for obfs4proxy
RUN apk add --update --no-cache tor curl openrc
# libcurl4-openssl-dev
RUN apk -U upgrade
# uncomment to build obfs4proxy
# RUN git clone https://gitlab.com/yawning/obfs4.git
# WORKDIR /obfs4
# RUN go build -o obfs4proxy/obfs4proxy ./obfs4proxy
# RUN cp ./obfs4proxy/obfs4proxy /usr/bin/obfs4proxy
ARG DOCKER_USER=whoogle
ARG DOCKER_USERID=927
ARG config_dir=/config
RUN mkdir -p $config_dir
RUN chmod a+w $config_dir
RUN mkdir -p -m 777 $config_dir
VOLUME $config_dir
ARG url_prefix=''
ARG username=''
ARG password=''
ARG proxyuser=''
@ -45,16 +36,14 @@ ARG use_https=''
ARG whoogle_port=5000
ARG twitter_alt='farside.link/nitter'
ARG youtube_alt='farside.link/invidious'
ARG instagram_alt='farside.link/bibliogram'
ARG reddit_alt='farside.link/libreddit'
ARG medium_alt='farside.link/scribe'
ARG translate_alt='farside.link/lingva'
ARG imgur_alt='farside.link/rimgo'
ARG wikipedia_alt='farside.link/wikiless'
ARG imdb_alt='farside.link/libremdb'
ARG quora_alt='farside.link/quetre'
ARG translate_alt='lingva.ml'
ARG imgur_alt='imgin.voidnet.tech'
ARG wikipedia_alt='wikiless.org'
ENV CONFIG_VOLUME=$config_dir \
WHOOGLE_URL_PREFIX=$url_prefix \
WHOOGLE_USER=$username \
WHOOGLE_PASS=$password \
WHOOGLE_PROXY_USER=$proxyuser \
@ -66,13 +55,12 @@ ENV CONFIG_VOLUME=$config_dir \
EXPOSE_PORT=$whoogle_port \
WHOOGLE_ALT_TW=$twitter_alt \
WHOOGLE_ALT_YT=$youtube_alt \
WHOOGLE_ALT_IG=$instagram_alt \
WHOOGLE_ALT_RD=$reddit_alt \
WHOOGLE_ALT_MD=$medium_alt \
WHOOGLE_ALT_TL=$translate_alt \
WHOOGLE_ALT_IMG=$imgur_alt \
WHOOGLE_ALT_WIKI=$wikipedia_alt \
WHOOGLE_ALT_IMDB=$imdb_alt \
WHOOGLE_ALT_QUORA=$quora_alt
WHOOGLE_ALT_WIKI=$wikipedia_alt
WORKDIR /whoogle
@ -80,13 +68,14 @@ COPY --from=builder /install /usr/local
COPY misc/tor/torrc /etc/tor/torrc
COPY misc/tor/start-tor.sh misc/tor/start-tor.sh
COPY app/ app/
COPY run whoogle.env* ./
COPY run .
#COPY whoogle.env .
# Create user/group to run as
RUN adduser -D -g $DOCKER_USERID -u $DOCKER_USERID $DOCKER_USER
# Fix ownership / permissions
RUN chown -R ${DOCKER_USER}:${DOCKER_USER} /whoogle /var/lib/tor
RUN chown -R ${DOCKER_USER}:${DOCKER_USER} /whoogle /var/lib/tor
# Allow writing symlinks to build dir
RUN chown $DOCKER_USERID:$DOCKER_USERID app/static/build

@ -2,5 +2,4 @@ graft app/static
graft app/templates
graft app/misc
include requirements.txt
recursive-include test
global-exclude *.pyc

@ -4,47 +4,39 @@
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![tests](https://github.com/benbusby/whoogle-search/actions/workflows/tests.yml/badge.svg)](https://github.com/benbusby/whoogle-search/actions/workflows/tests.yml)
[![buildx](https://github.com/benbusby/whoogle-search/actions/workflows/buildx.yml/badge.svg)](https://github.com/benbusby/whoogle-search/actions/workflows/buildx.yml)
[![pep8](https://github.com/benbusby/whoogle-search/workflows/pep8/badge.svg)](https://github.com/benbusby/whoogle-search/actions?query=workflow%3Apep8)
[![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master)
[![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search)](https://hub.docker.com/r/benbusby/whoogle-search)
<table>
<tr>
<td><a href="https://sr.ht/~benbusby/whoogle-search">SourceHut</a></td>
<td><a href="https://github.com/benbusby/whoogle-search">GitHub</a></td>
</tr>
</table>
Get Google search results, but without any ads, JavaScript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile.
Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile.
Contents
1. [Features](#features)
3. [Install/Deploy Options](#install)
1. [Heroku Quick Deploy](#heroku-quick-deploy)
1. [Render.com](#render)
1. [Repl.it](#replit)
1. [Fly.io](#flyio)
1. [Koyeb](#koyeb)
1. [pipx](#pipx)
1. [pip](#pip)
1. [Manual](#manual)
1. [Docker](#manual-docker)
1. [Arch/AUR](#arch-linux--arch-based-distributions)
1. [Helm/Kubernetes](#helm-chart-for-kubernetes)
2. [Dependencies](#dependencies)
3. [Install/Deploy](#install)
1. [Heroku Quick Deploy](#a-heroku-quick-deploy)
2. [Repl.it](#b-replit)
3. [Fly.io](#c-flyio)
4. [pipx](#d-pipx)
5. [pip](#e-pip)
6. [Manual](#f-manual)
7. [Docker](#g-manual-docker)
8. [Arch/AUR](#arch-linux--arch-based-distributions)
9. [Helm/Kubernetes](#helm-chart-for-kubernetes)
4. [Environment Variables and Configuration](#environment-variables)
5. [Usage](#usage)
6. [Extra Steps](#extra-steps)
1. [Set Primary Search Engine](#set-whoogle-as-your-primary-search-engine)
2. [Custom Redirecting](#custom-redirecting)
2. [Custom Bangs](#custom-bangs)
3. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only)
4. [Manual HTTPS Enforcement](#https-enforcement)
5. [Using with Firefox Containers](#using-with-firefox-containers)
6. [Reverse Proxying](#reverse-proxying)
1. [Nginx](#nginx)
2. [Prevent Downtime (Heroku Only)](#prevent-downtime-heroku-only)
3. [Manual HTTPS Enforcement](#https-enforcement)
4. [Using with Firefox Containers](#using-with-firefox-containers)
7. [Contributing](#contributing)
8. [FAQ](#faq)
9. [Public Instances](#public-instances)
10. [Screenshots](#screenshots)
11. Mirrors (read-only)
1. [GitLab](https://gitlab.com/benbusby/whoogle-search)
2. [Gogs](https://gogs.benbusby.com/benbusby/whoogle-search)
## Features
- No ads or sponsored content
@ -62,7 +54,6 @@ Contents
- Randomly generated User Agent
- Easy to install/deploy
- DDG-style bang (i.e. `!<tag> <query>`) searches
- User-defined [custom bangs](#custom-bangs)
- Optional location-based searching (i.e. results near \<city\>)
- Optional NoJS mode to view search results in a separate window with JavaScript blocked
@ -72,35 +63,33 @@ Contents
<sup>***If deployed to a remote server, or configured to send requests through a VPN, Tor, proxy, etc.</sup>
## Dependencies
If using Heroku Quick Deploy, **you can skip this section**.
- Docker ([Windows](https://docs.docker.com/docker-for-windows/install/), [macOS](https://docs.docker.com/docker-for-mac/install/), [Ubuntu](https://docs.docker.com/engine/install/ubuntu/), [other Linux distros](https://docs.docker.com/engine/install/binaries/))
- Only needed if you intend on deploying the app as a Docker image
- [Python3](https://www.python.org/downloads/)
- `libcurl4-openssl-dev` and `libssl-dev`
- macOS: `brew install openssl curl-openssl`
- Ubuntu: `sudo apt-get install -y libcurl4-openssl-dev libssl-dev`
- Arch: `pacman -S curl openssl`
## Install
There are a few different ways to begin using the app, depending on your preferences:
___
### [Heroku Quick Deploy](https://heroku.com/about)
### A) [Heroku Quick Deploy](https://heroku.com/about)
[![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/benbusby/whoogle-search/tree/main)
Provides:
- Easy Deployment of App
- A HTTPS url (https://\<your app name\>.herokuapp.com)
- Free deployment of app
- Free HTTPS url (https://\<your app name\>.herokuapp.com)
- Downtime after periods of inactivity \([solution](https://github.com/benbusby/whoogle-search#prevent-downtime-heroku-only)\)
Notes:
- Requires a **PAID** Heroku Account.
- Requires a (free) Heroku account
- Sometimes has issues with auto-redirecting to `https`. Make sure to navigate to the `https` version of your app before adding as a default search engine.
___
### [Render](https://render.com)
Create an account on [render.com](https://render.com) and import the Whoogle repo with the following settings:
- Runtime: `Python 3`
- Build Command: `pip install -r requirements.txt`
- Run Command: `./run`
___
### [Repl.it](https://repl.it)
### B) [Repl.it](https://repl.it)
[![Run on Repl.it](https://repl.it/badge/github/benbusby/whoogle-search)](https://repl.it/github/benbusby/whoogle-search)
*Note: Requires a (free) Replit account*
@ -109,40 +98,35 @@ Provides:
- Free deployment of app
- Free HTTPS url (https://\<app name\>.\<username\>\.repl\.co)
- Supports custom domains
- Downtime after periods of inactivity ([solution](https://repl.it/talk/learn/How-to-use-and-setup-UptimeRobot/9003)\)
- Downtime after periods of inactivity \([solution 1](https://repl.it/talk/ask/use-this-pingmat1replco-just-enter/28821/101298), [solution 2](https://repl.it/talk/learn/How-to-use-and-setup-UptimeRobot/9003)\)
___
### C) [Fly.io](https://fly.io)
### [Fly.io](https://fly.io)
You will need a [Fly.io](https://fly.io) account to do this. Fly requires a credit card to deploy anything, but you can have up to 3 shared-CPU VMs running full-time each month for free.
You will need a [Fly.io](https://fly.io) account to deploy Whoogle. The [free allowances](https://fly.io/docs/about/pricing/#free-allowances) are enough for personal use.
#### Install the CLI:
#### Install the CLI: https://fly.io/docs/hands-on/installing/
```bash
curl -L https://fly.io/install.sh | sh
```
#### Deploy the app
#### Deploy your app
```bash
flyctl auth login
flyctl launch --image benbusby/whoogle-search:latest
fly apps create --org personal --port 5000
# Choose a name and the Image builder
# Enter `benbusby/whoogle-search:latest` as the image name
fly deploy
```
The first deploy won't succeed because the default `internal_port` is wrong.
To fix this, open the generated `fly.toml` file, set `services.internal_port` to `5000` and run `flyctl launch` again.
Your app is now available at `https://<app-name>.fly.dev`.
___
### [Koyeb](https://www.koyeb.com)
You can customize the `fly.toml`:
- Remove the non-https service
- Add environment variables under the `[env]` key
- Use `fly secrets set NAME=value` for more sensitive values like `WHOOGLE_PASS` and `WHOOGLE_PROXY_PASS`.
Use one of the following guides to install Whoogle on Koyeb:
1. Using GitHub: https://www.koyeb.com/docs/quickstart/deploy-with-git
2. Using Docker: https://www.koyeb.com/docs/quickstart/deploy-a-docker-application
___
### [pipx](https://github.com/pipxproject/pipx#install-pipx)
### D) [pipx](https://github.com/pipxproject/pipx#install-pipx)
Persistent install:
`pipx install git+https://github.com/benbusby/whoogle-search.git`
@ -151,9 +135,7 @@ Sandboxed temporary instance:
`pipx run --spec git+https://github.com/benbusby/whoogle-search.git whoogle-search`
___
### pip
### E) pip
`pip install whoogle-search`
```bash
@ -180,21 +162,10 @@ optional arguments:
```
See the [available environment variables](#environment-variables) for additional configuration.
___
### Manual
### F) Manual
*Note: `Content-Security-Policy` headers can be sent by Whoogle if you set `WHOOGLE_CSP`.*
#### Dependencies
- [Python3](https://www.python.org/downloads/)
- `libcurl4-openssl-dev` and `libssl-dev`
- macOS: `brew install openssl curl-openssl`
- Ubuntu: `sudo apt-get install -y libcurl4-openssl-dev libssl-dev`
- Arch: `pacman -S curl openssl`
#### Install
Clone the repo and run the following commands to start the app in a local-only environment:
```bash
@ -228,13 +199,12 @@ Description=Whoogle
# with default values.
#Environment=WHOOGLE_ALT_TW=farside.link/nitter
#Environment=WHOOGLE_ALT_YT=farside.link/invidious
#Environment=WHOOGLE_ALT_IG=farside.link/bibliogram/u
#Environment=WHOOGLE_ALT_RD=farside.link/libreddit
#Environment=WHOOGLE_ALT_MD=farside.link/scribe
#Environment=WHOOGLE_ALT_TL=farside.link/lingva
#Environment=WHOOGLE_ALT_IMG=farside.link/rimgo
#Environment=WHOOGLE_ALT_WIKI=farside.link/wikiless
#Environment=WHOOGLE_ALT_IMDB=farside.link/libremdb
#Environment=WHOOGLE_ALT_QUORA=farside.link/quetre
#Environment=WHOOGLE_ALT_TL=lingva.ml
#Environment=WHOOGLE_ALT_IMG=imgin.voidnet.tech
#Environment=WHOOGLE_ALT_WIKI=wikiless.org
# Load values from dotenv only
#Environment=WHOOGLE_DOTENV=1
Type=simple
@ -247,7 +217,6 @@ ExecStart=<python_install_dir>/python3 <whoogle_install_dir>/whoogle-search --ho
ExecStart=<whoogle_repo_dir>/run
# For example:
# ExecStart=/var/www/whoogle-search/run
WorkingDirectory=<whoogle_repo_dir>
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
RestartSec=3
@ -263,54 +232,7 @@ sudo systemctl enable whoogle
sudo systemctl start whoogle
```
#### Tor Configuration *optional*
If routing your request through Tor you will need to make the following adjustments.
Due to the nature of interacting with Google through Tor we will need to be able to send signals to Tor and therefore authenticate with it.
There are two authentication methods, password and cookie. You will need to make changes to your torrc:
* Cookie
1. Uncomment or add the following lines in your torrc:
- `ControlPort 9051`
- `CookieAuthentication 1`
- `DataDirectoryGroupReadable 1`
- `CookieAuthFileGroupReadable 1`
2. Make the tor auth cookie readable:
- This is assuming that you are using a dedicated user to run whoogle. If you are using a different user replace `whoogle` with that user.
1. `chmod tor:whoogle /var/lib/tor`
2. `chmod tor:whoogle /var/lib/tor/control_auth_cookie`
3. Restart the tor service:
- `systemctl restart tor`
4. Set the Tor environment variable to 1, `WHOOGLE_CONFIG_TOR`. Refer to the [Environment Variables](#environment-variables) section for more details.
- This may be added in the systemd unit file or env file `WHOOGLE_CONFIG_TOR=1`
* Password
1. Run this command:
- `tor --hash-password {Your Password Here}`; put your password in place of `{Your Password Here}`.
- Keep the output of this command, you will be placing it in your torrc.
- Keep the password input of this command, you will be using it later.
2. Uncomment or add the following lines in your torrc:
- `ControlPort 9051`
- `HashedControlPassword {Place output here}`; put the output of the previous command in place of `{Place output here}`.
3. Now take the password from the first step and place it in the control.conf file within the whoogle working directory, ie. [misc/tor/control.conf](misc/tor/control.conf)
- If you want to place your password file in a different location set this location with the `WHOOGLE_TOR_CONF` environment variable. Refer to the [Environment Variables](#environment-variables) section for more details.
4. Heavily restrict access to control.conf to only be readable by the user running whoogle:
- `chmod 400 control.conf`
5. Finally set the Tor environment variable and use password variable to 1, `WHOOGLE_CONFIG_TOR` and `WHOOGLE_TOR_USE_PASS`. Refer to the [Environment Variables](#environment-variables) section for more details.
- These may be added to the systemd unit file or env file:
- `WHOOGLE_CONFIG_TOR=1`
- `WHOOGLE_TOR_USE_PASS=1`
___
### Manual (Docker)
### G) Manual (Docker)
1. Ensure the Docker daemon is running, and is accessible by your user account
- To add user permissions, you can execute `sudo usermod -aG docker yourusername`
- Running `docker ps` should return something besides an error. If you encounter an error saying the daemon isn't running, try `sudo systemctl start docker` (Linux) or ensure the docker tool is running (Windows/macOS).
@ -371,22 +293,16 @@ heroku open
This series of commands can take a while, but once you run it once, you shouldn't have to run it again. The final command, `heroku open` will launch a tab in your web browser, where you can test out Whoogle and even [set it as your primary search engine](https://github.com/benbusby/whoogle#set-whoogle-as-your-primary-search-engine).
You may also edit environment variables from your apps Settings tab in the Heroku Dashboard.
___
### Arch Linux & Arch-based Distributions
#### Arch Linux & Arch-based Distributions
There is an [AUR package available](https://aur.archlinux.org/packages/whoogle-git/), as well as a pre-built and daily updated package available at [Chaotic-AUR](https://chaotic.cx).
___
### Helm chart for Kubernetes
#### Helm chart for Kubernetes
To use the Kubernetes Helm Chart:
1. Ensure you have [Helm](https://helm.sh/docs/intro/install/) `>=3.0.0` installed
2. Clone this repository
3. Update [charts/whoogle/values.yaml](./charts/whoogle/values.yaml) as desired
4. Run `helm install whoogle ./charts/whoogle`
___
#### Using your own server, or alternative container deployment
There are other methods for deploying docker containers that are well outlined in [this article](https://rollout.io/blog/the-shortlist-of-docker-hosting/), but there are too many to describe set up for each here. Generally it should be about the same amount of effort as the Heroku deployment.
@ -405,7 +321,6 @@ There are a few optional environment variables available for customizing a Whoog
| Variable | Description |
| -------------------- | ----------------------------------------------------------------------------------------- |
| WHOOGLE_URL_PREFIX | The URL prefix to use for the whoogle instance (i.e. "/whoogle") |
| WHOOGLE_DOTENV | Load environment variables in `whoogle.env` |
| WHOOGLE_USER | The username for basic auth. WHOOGLE_PASS must also be set if used. |
| WHOOGLE_PASS | The password for basic auth. WHOOGLE_USER must also be set if used. |
@ -413,56 +328,41 @@ There are a few optional environment variables available for customizing a Whoog
| WHOOGLE_PROXY_PASS | The password of the proxy server. |
| WHOOGLE_PROXY_TYPE | The type of the proxy server. Can be "socks5", "socks4", or "http". |
| WHOOGLE_PROXY_LOC | The location of the proxy server (host or ip). |
| WHOOGLE_USER_AGENT | The desktop user agent to use. Defaults to a randomly generated one. |
| WHOOGLE_USER_AGENT_MOBILE | The mobile user agent to use. Defaults to a randomly generated one. |
| WHOOGLE_USE_CLIENT_USER_AGENT | Enable to use your own user agent for all requests. Defaults to false. |
| WHOOGLE_REDIRECTS | Specify sites that should be redirected elsewhere. See [custom redirecting](#custom-redirecting). |
| EXPOSE_PORT | The port where Whoogle will be exposed. |
| HTTPS_ONLY | Enforce HTTPS. (See [here](https://github.com/benbusby/whoogle-search#https-enforcement)) |
| WHOOGLE_ALT_TW | The twitter.com alternative to use when site alternatives are enabled in the config. Set to "" to disable. |
| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. Set to "" to disable. |
| WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. Set to "" to disable. |
| WHOOGLE_ALT_TL | The Google Translate alternative to use. This is used for all "translate ____" searches. Set to "" to disable. |
| WHOOGLE_ALT_MD | The medium.com alternative to use when site alternatives are enabled in the config. Set to "" to disable. |
| WHOOGLE_ALT_IMG | The imgur.com alternative to use when site alternatives are enabled in the config. Set to "" to disable. |
| WHOOGLE_ALT_WIKI | The wikipedia.org alternative to use when site alternatives are enabled in the config. Set to "" to disable. |
| WHOOGLE_ALT_IMDB | The imdb.com alternative to use when site alternatives are enabled in the config. Set to "" to disable. |
| WHOOGLE_ALT_QUORA | The quora.com alternative to use when site alternatives are enabled in the config. Set to "" to disable. |
| WHOOGLE_AUTOCOMPLETE | Controls visibility of autocomplete/search suggestions. Default on -- use '0' to disable. |
| WHOOGLE_ALT_TW | The twitter.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_TL | The Google Translate alternative to use. This is used for all "translate ____" searches. |
| WHOOGLE_ALT_MD | The medium.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_IMG | The imgur.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_WIKI | The wikipedia.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_AUTOCOMPLETE | Controls visibility of autocomplete/search suggestions. Default on -- use '0' to disable |
| WHOOGLE_MINIMAL | Remove everything except basic result cards from all search queries. |
| WHOOGLE_CSP | Sets a default set of 'Content-Security-Policy' headers |
| WHOOGLE_RESULTS_PER_PAGE | Set the number of results per page |
| WHOOGLE_TOR_SERVICE | Enable/disable the Tor service on startup. Default on -- use '0' to disable. |
| WHOOGLE_TOR_USE_PASS | Use password authentication for tor control port. |
| WHOOGLE_TOR_CONF | The absolute path to the config file containing the password for the tor control port. Default: ./misc/tor/control.conf WHOOGLE_TOR_PASS must be 1 for this to work.|
| WHOOGLE_SHOW_FAVICONS | Show/hide favicons next to search result URLs. Default on. |
| WHOOGLE_UPDATE_CHECK | Enable/disable the automatic daily check for new versions of Whoogle. Default on. |
| WHOOGLE_RESULTS_PER_PAGE | Set the number of results per page |
### Config Environment Variables
These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time.
| Variable | Description |
| ------------------------------------ | --------------------------------------------------------------- |
| WHOOGLE_CONFIG_DISABLE | Hide config from UI and disallow changes to config by client |
| WHOOGLE_CONFIG_COUNTRY | Filter results by hosting country |
| WHOOGLE_CONFIG_LANGUAGE | Set interface language |
| WHOOGLE_CONFIG_SEARCH_LANGUAGE | Set search result language |
| WHOOGLE_CONFIG_BLOCK | Block websites from search results (use comma-separated list) |
| WHOOGLE_CONFIG_BLOCK_TITLE | Block search result with a REGEX filter on title |
| WHOOGLE_CONFIG_BLOCK_URL | Block search result with a REGEX filter on URL |
| WHOOGLE_CONFIG_THEME | Set theme mode (light, dark, or system) |
| WHOOGLE_CONFIG_SAFE | Enable safe searches |
| WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) |
| WHOOGLE_CONFIG_NEAR | Restrict results to only those near a particular city |
| WHOOGLE_CONFIG_TOR | Use Tor routing (if available) |
| WHOOGLE_CONFIG_NEW_TAB | Always open results in new tab |
| WHOOGLE_CONFIG_VIEW_IMAGE | Enable View Image option |
| WHOOGLE_CONFIG_GET_ONLY | Search using GET requests only |
| WHOOGLE_CONFIG_URL | The root url of the instance (`https://<your url>/`) |
| WHOOGLE_CONFIG_STYLE | The custom CSS to use for styling (should be single line) |
| WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED | Encrypt preferences token, requires preferences key |
| WHOOGLE_CONFIG_PREFERENCES_KEY | Key to encrypt preferences in URL (REQUIRED to show url) |
| WHOOGLE_CONFIG_ANON_VIEW | Include the "anonymous view" option for each search result |
| Variable | Description |
| ------------------------------ | --------------------------------------------------------------- |
| WHOOGLE_CONFIG_DISABLE | Hide config from UI and disallow changes to config by client |
| WHOOGLE_CONFIG_COUNTRY | Filter results by hosting country |
| WHOOGLE_CONFIG_LANGUAGE | Set interface language |
| WHOOGLE_CONFIG_SEARCH_LANGUAGE | Set search result language |
| WHOOGLE_CONFIG_BLOCK | Block websites from search results (use comma-separated list) |
| WHOOGLE_CONFIG_THEME | Set theme mode (light, dark, or system) |
| WHOOGLE_CONFIG_SAFE | Enable safe searches |
| WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) |
| WHOOGLE_CONFIG_NEAR | Restrict results to only those near a particular city |
| WHOOGLE_CONFIG_TOR | Use Tor routing (if available) |
| WHOOGLE_CONFIG_NEW_TAB | Always open results in new tab |
| WHOOGLE_CONFIG_VIEW_IMAGE | Enable View Image option |
| WHOOGLE_CONFIG_GET_ONLY | Search using GET requests only |
| WHOOGLE_CONFIG_URL | The root url of the instance (`https://<your url>/`) |
| WHOOGLE_CONFIG_STYLE | The custom CSS to use for styling (should be single line) |
## Usage
Same as most search engines, with the exception of filtering by time range.
@ -470,7 +370,6 @@ Same as most search engines, with the exception of filtering by time range.
To filter by a range of time, append ":past <time>" to the end of your search, where <time> can be `hour`, `day`, `month`, or `year`. Example: `coronavirus updates :past hour`
## Extra Steps
### Set Whoogle as your primary search engine
*Note: If you're using a reverse proxy to run Whoogle Search, make sure the "Root URL" config option on the home page is set to your URL before going through these steps.*
@ -511,44 +410,10 @@ Browser settings:
2. Go to `Default Results` and click the `Setup fallback results` button. Click `+` and add Whoogle, then drag it to the top.
- Chrome/Chromium-based Browsers
- Automatic
- Visit the home page of your Whoogle Search instance -- this will automatically add the search engine if the [requirements](https://www.chromium.org/tab-to-search/) are met (GET request, no OnSubmit script, no path). If not, you can add it manually.
- Visit the home page of your Whoogle Search instance -- this may automatically add the search engine to your list of search engines. If not, you can add it manually.
- Manual
- Under search engines > manage search engines > add, manually enter your Whoogle instance details with a `<whoogle url>/search?q=%s` formatted search URL.
### Custom Redirecting
You can set custom site redirects using the `WHOOGLE_REDIRECTS` environment
variable. A lot of sites, such as Twitter, Reddit, etc, have built-in redirects
to [Farside links](https://sr.ht/~benbusby/farside), but you may want to define
your own.
To do this, you can use the following syntax:
```
WHOOGLE_REDIRECTS="<parent_domain>:<new_domain>"
```
For example, if you want to redirect from "badsite.com" to "goodsite.com":
```
WHOOGLE_REDIRECTS="badsite.com:goodsite.com"
```
This can be used for multiple sites as well, with comma separation:
```
WHOOGLE_REDIRECTS="badA.com:goodA.com,badB.com:goodB.com"
```
NOTE: Do not include "http(s)://" when defining your redirect.
### Custom Bangs
You can create your own custom bangs. By default, bangs are stored in
`app/static/bangs`. See [`00-whoogle.json`](https://github.com/benbusby/whoogle-search/blob/main/app/static/bangs/00-whoogle.json)
for an example. These are parsed in alphabetical order with later files
overriding bangs set in earlier files, with the exception that DDG bangs
(downloaded to `app/static/bangs/bangs.json`) are always parsed first. Thus,
any custom bangs will always override the DDG ones.
### Prevent Downtime (Heroku only)
Part of the deal with Heroku's free tier is that you're allocated 550 hours/month (meaning it can't stay active 24/7), and the app is temporarily shut down after 30 minutes of inactivity. Once it becomes inactive, any Whoogle searches will still work, but it'll take an extra 10-15 seconds for the app to come back online before displaying the result, which can be frustrating if you're in a hurry.
@ -578,31 +443,6 @@ Unfortunately, Firefox Containers do not currently pass through `POST` requests
4. Restart Firefox
5. Navigate to Whoogle instance and [re-add the engine](#set-whoogle-as-your-primary-search-engine)
### Reverse Proxying
#### Nginx
Here is a sample Nginx config for Whoogle:
```
server {
server_name your_domain_name.com;
access_log /dev/null;
error_log /dev/null;
location / {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host $host;
proxy_set_header X-NginX-Proxy true;
proxy_pass http://localhost:5000;
}
}
```
You can then add SSL support using LetsEncrypt by following a guide such as [this one](https://www.nginx.com/blog/using-free-ssltls-certificates-from-lets-encrypt-with-nginx/).
## Contributing
Under the hood, Whoogle is a basic Flask app with the following structure:
@ -623,9 +463,9 @@ Under the hood, Whoogle is a basic Flask app with the following structure:
- `search.html`: An iframe-able search page
- `logo.html`: A template consisting mostly of the Whoogle logo as an SVG (separated to help keep `index.html` a bit cleaner)
- `opensearch.xml`: A template used for supporting [OpenSearch](https://developer.mozilla.org/en-US/docs/Web/OpenSearch).
- `imageresults.html`: An "experimental" template used for supporting the "Full Size" image feature on desktop.
- `imageresults.html`: An "exprimental" template used for supporting the "Full Size" image feature on desktop.
- `static/<css|js>`
- CSS/JavaScript files, should be self-explanatory
- CSS/Javascript files, should be self-explanatory
- `static/settings`
- Key-value JSON files for establishing valid configuration values
@ -664,36 +504,24 @@ I'm a huge fan of Searx though and encourage anyone to use that instead if they
**Why does the image results page look different?**
A lot of the app currently piggybacks on Google's existing support for fetching results pages with JavaScript disabled. To their credit, they've done an excellent job with styling pages, but it seems that the image results page - particularly on mobile - is a little rough. Moving forward, with enough interest, I'd like to transition to fetching the results and parsing them into a unique Whoogle-fied interface that I can style myself.
A lot of the app currently piggybacks on Google's existing support for fetching results pages with Javascript disabled. To their credit, they've done an excellent job with styling pages, but it seems that the image results page - particularly on mobile - is a little rough. Moving forward, with enough interest, I'd like to transition to fetching the results and parsing them into a unique Whoogle-fied interface that I can style myself.
## Public Instances
*Note: Use public instances at your own discretion. The maintainers of Whoogle do not personally validate the integrity of any other instances. Popular public instances are more likely to be rate-limited or blocked.*
*Note: Use public instances at your own discretion. The maintainers of Whoogle are only responsible for https://whoogle.fossho.st, and do not personally validate the integrity of any other instances. Popular public instances are more likely to be rate-limited or blocked.*
| Website | Country | Language | Cloudflare |
|-|-|-|-|
| [https://whoogle.fossho.st](https://whoogle.fossho.st) | 🇺🇸 US | Multi-choice | |
| [https://search.albony.xyz](https://search.albony.xyz/) | 🇮🇳 IN | Multi-choice | |
| [https://search.garudalinux.org](https://search.garudalinux.org) | 🇫🇮 FI | Multi-choice | ✅ |
| [https://search.dr460nf1r3.org](https://search.dr460nf1r3.org) | 🇩🇪 DE | Multi-choice | ✅ |
| [https://s.tokhmi.xyz](https://s.tokhmi.xyz) | 🇺🇸 US | Multi-choice | ✅ |
| [https://whoogle.sdf.org](https://whoogle.sdf.org) | 🇺🇸 US | Multi-choice |
| [https://search.garudalinux.org](https://search.garudalinux.org) | 🇩🇪 DE | Multi-choice | |
| [https://whooglesearch.net](https://whooglesearch.net) | 🇩🇪 DE | Spanish | |
| [https://search.exonip.de](https://search.exonip.de) | 🇳🇱 NL | Multi-choice | |
| [https://s.alefvanoon.xyz](https://s.alefvanoon.xyz) | 🇺🇸 US | Multi-choice | ✅ |
| [https://www.whooglesearch.ml](https://www.whooglesearch.ml) | 🇺🇸 US | English | |
| [https://search.sethforprivacy.com](https://search.sethforprivacy.com) | 🇩🇪 DE | English | |
| [https://whoogle.dcs0.hu](https://whoogle.dcs0.hu) | 🇭🇺 HU | Multi-choice | |
| [https://gowogle.voring.me](https://gowogle.voring.me) | 🇺🇸 US | Multi-choice | |
| [https://whoogle.privacydev.net](https://whoogle.privacydev.net) | 🇫🇷 FR | English | |
| [https://wg.vern.cc](https://wg.vern.cc) | 🇺🇸 US | English | |
| [https://whoogle.hxvy0.gq](https://whoogle.hxvy0.gq) | 🇨🇦 CA | Turkish Only | ✅ |
| [https://whoogle.hostux.net](https://whoogle.hostux.net) | 🇫🇷 FR | Multi-choice | |
| [https://whoogle.lunar.icu](https://whoogle.lunar.icu) | 🇩🇪 DE | Multi-choice | ✅ |
| [https://wgl.frail.duckdns.org](https://wgl.frail.duckdns.org) | 🇧🇷 BR | Multi-choice | |
| [https://whoogle.no-logs.com](https://whoogle.no-logs.com/) | 🇸🇪 SE | Multi-choice | |
| [https://whoogle.ftw.lol](https://whoogle.ftw.lol) | 🇩🇪 DE | Multi-choice | |
| [https://whoogle-search--replitcomreside.repl.co](https://whoogle-search--replitcomreside.repl.co) | 🇺🇸 US | English | |
| [https://search.notrustverify.ch](https://search.notrustverify.ch) | 🇨🇭 CH | Multi-choice | |
| [https://whoogle.datura.network](https://whoogle.datura.network) | 🇩🇪 DE | Multi-choice | |
| [https://whoogle.yepserver.xyz](https://whoogle.yepserver.xyz) | 🇺🇦 UA | Multi-choice | |
| [https://search.nezumi.party](https://search.nezumi.party) | 🇮🇹 IT | Multi-choice | |
| [https://search.snine.nl](https://search.snine.nl) | 🇳🇱 NL | Mult-choice | ✅ |
| [https://whoogle.dcs0.hu](https://whoogle.dcs0.hu) | 🇭🇺 HU | Multi-choice | ✅ |
* A checkmark in the "Cloudflare" category here refers to the use of the reverse proxy, [Cloudflare](https://cloudflare.com). The checkmark will not be listed for a site which uses Cloudflare DNS but rather the proxying service which grants Cloudflare the ability to monitor traffic to the website.
@ -703,15 +531,6 @@ A lot of the app currently piggybacks on Google's existing support for fetching
|-|-|-|
| [http://whoglqjdkgt2an4tdepberwqz3hk7tjo4kqgdnuj77rt7nshw2xqhqad.onion](http://whoglqjdkgt2an4tdepberwqz3hk7tjo4kqgdnuj77rt7nshw2xqhqad.onion) | 🇺🇸 US | Multi-choice
| [http://nuifgsnbb2mcyza74o7illtqmuaqbwu4flam3cdmsrnudwcmkqur37qd.onion](http://nuifgsnbb2mcyza74o7illtqmuaqbwu4flam3cdmsrnudwcmkqur37qd.onion) | 🇩🇪 DE | English
| [http://whoogle.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion](http://whoogle.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion/) | 🇺🇸 US | English |
| [http://whoogle.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion](http://whoogle.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion/) | 🇫🇷 FR | English |
| [http://whoogle.daturab6drmkhyeia4ch5gvfc2f3wgo6bhjrv3pz6n7kxmvoznlkq4yd.onion](http://whoogle.daturab6drmkhyeia4ch5gvfc2f3wgo6bhjrv3pz6n7kxmvoznlkq4yd.onion/) | 🇩🇪 DE | Multi-choice | |
#### I2P Instances
| Website | Country | Language |
|-|-|-|
| [http://verneks7rfjptpz5fpii7n7nrxilsidi2qxepeuuf66c3tsf4nhq.b32.i2p](http://verneks7rfjptpz5fpii7n7nrxilsidi2qxepeuuf66c3tsf4nhq.b32.i2p) | 🇺🇸 US | English |
## Screenshots
#### Desktop

@ -15,11 +15,6 @@
],
"stack": "container",
"env": {
"WHOOGLE_URL_PREFIX": {
"description": "The URL prefix to use for the whoogle instance (i.e. \"/whoogle\")",
"value": "",
"required": false
},
"WHOOGLE_USER": {
"description": "The username for basic auth. WHOOGLE_PASS must also be set if used. Leave empty to disable.",
"value": "",
@ -60,6 +55,11 @@
"value": "farside.link/invidious",
"required": false
},
"WHOOGLE_ALT_IG": {
"description": "The site to use as a replacement for instagram.com when site alternatives are enabled in the config.",
"value": "farside.link/bibliogram/u",
"required": false
},
"WHOOGLE_ALT_RD": {
"description": "The site to use as a replacement for reddit.com when site alternatives are enabled in the config.",
"value": "farside.link/libreddit",
@ -72,29 +72,19 @@
},
"WHOOGLE_ALT_TL": {
"description": "The Google Translate alternative to use for all searches following the 'translate ___' structure.",
"value": "farside.link/lingva",
"value": "lingva.ml",
"required": false
},
"WHOOGLE_ALT_IMG": {
"description": "The site to use as a replacement for imgur.com when site alternatives are enabled in the config.",
"value": "farside.link/rimgo",
"required": false
},
"description": "The site to use as a replacement for imgur.com when site alternatives are enabled in the config.",
"value": "imgin.voidnet.tech",
"required": false
},
"WHOOGLE_ALT_WIKI": {
"description": "The site to use as a replacement for wikipedia.com when site alternatives are enabled in the config.",
"value": "farside.link/wikiless",
"required": false
},
"WHOOGLE_ALT_IMDB": {
"description": "The site to use as a replacement for imdb.com when site alternatives are enabled in the config.",
"value": "farside.link/libremdb",
"required": false
},
"WHOOGLE_ALT_QUORA": {
"description": "The site to use as a replacement for quora.com when site alternatives are enabled in the config.",
"value": "farside.link/quetre",
"required": false
},
"description": "The site to use as a replacement for wikipedia.com when site alternatives are enabled in the config.",
"value": "wikiless.org",
"required": false
},
"WHOOGLE_MINIMAL": {
"description": "Remove everything except basic result cards from all search queries (set to 1 or leave blank)",
"value": "",
@ -105,11 +95,6 @@
"value": "",
"required": false
},
"WHOOGLE_CONFIG_TIME_PERIOD" : {
"description": "[CONFIG] The time period to use for restricting search results",
"value": "",
"required": false
},
"WHOOGLE_CONFIG_LANGUAGE": {
"description": "[CONFIG] The language to use for the interface (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/languages.json)",
"value": "",
@ -172,18 +157,8 @@
},
"WHOOGLE_CONFIG_STYLE": {
"description": "[CONFIG] Custom CSS styling (paste in CSS or leave blank)",
"value": ":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }",
"required": false
},
"WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED": {
"description": "[CONFIG] Encrypt preferences token, requires WHOOGLE_CONFIG_PREFERENCES_KEY to be set",
"value": "",
"required": false
},
"WHOOGLE_CONFIG_PREFERENCES_KEY": {
"description": "[CONFIG] Key to encrypt preferences",
"value": "NEEDS_TO_BE_MODIFIED",
"required": false
}
}
}

@ -1,45 +1,36 @@
from app.filter import clean_query
from app.request import send_tor_signal
from app.utils.session import generate_key
from app.utils.bangs import gen_bangs_json, load_all_bangs
from app.utils.misc import gen_file_hash, read_config_bool
from base64 import b64encode
from bs4 import MarkupResemblesLocatorWarning
from datetime import datetime, timedelta
from dotenv import load_dotenv
from app.utils.session import generate_user_key
from app.utils.bangs import gen_bangs_json
from app.utils.misc import gen_file_hash
from flask import Flask
from flask_session import Session
import json
import logging.config
import os
from stem import Signal
import threading
import warnings
from werkzeug.middleware.proxy_fix import ProxyFix
from app.utils.misc import read_config_bool
from app.version import __version__
from dotenv import load_dotenv
app = Flask(__name__, static_folder=os.path.dirname(
os.path.abspath(__file__)) + '/static')
app.wsgi_app = ProxyFix(app.wsgi_app)
dot_env_path = (
os.path.join(os.path.dirname(os.path.abspath(__file__)),
'../whoogle.env'))
# Load .env file if enabled
if os.path.exists(dot_env_path):
load_dotenv(dot_env_path)
if os.getenv('WHOOGLE_DOTENV', ''):
dotenv_path = '../whoogle.env'
load_dotenv(os.path.join(os.path.dirname(os.path.abspath(__file__)),
dotenv_path))
app.enc_key = generate_key()
app.default_key = generate_user_key()
app.config['SECRET_KEY'] = os.urandom(32)
app.config['SESSION_TYPE'] = 'filesystem'
app.config['SESSION_COOKIE_SAMESITE'] = 'strict'
if read_config_bool('HTTPS_ONLY'):
if os.getenv('HTTPS_ONLY'):
app.config['SESSION_COOKIE_NAME'] = '__Secure-session'
app.config['SESSION_COOKIE_SECURE'] = True
app.config['VERSION_NUMBER'] = __version__
app.config['VERSION_NUMBER'] = '0.7.1'
app.config['APP_ROOT'] = os.getenv(
'APP_ROOT',
os.path.dirname(os.path.abspath(__file__)))
@ -55,79 +46,35 @@ app.config['LANGUAGES'] = json.load(open(
app.config['COUNTRIES'] = json.load(open(
os.path.join(app.config['STATIC_FOLDER'], 'settings/countries.json'),
encoding='utf-8'))
app.config['TIME_PERIODS'] = json.load(open(
os.path.join(app.config['STATIC_FOLDER'], 'settings/time_periods.json'),
encoding='utf-8'))
app.config['TRANSLATIONS'] = json.load(open(
os.path.join(app.config['STATIC_FOLDER'], 'settings/translations.json'),
encoding='utf-8'))
app.config['THEMES'] = json.load(open(
os.path.join(app.config['STATIC_FOLDER'], 'settings/themes.json'),
encoding='utf-8'))
app.config['HEADER_TABS'] = json.load(open(
os.path.join(app.config['STATIC_FOLDER'], 'settings/header_tabs.json'),
encoding='utf-8'))
app.config['CONFIG_PATH'] = os.getenv(
'CONFIG_VOLUME',
os.path.join(app.config['STATIC_FOLDER'], 'config'))
app.config['DEFAULT_CONFIG'] = os.path.join(
app.config['CONFIG_PATH'],
'config.json')
app.config['CONFIG_DISABLE'] = read_config_bool('WHOOGLE_CONFIG_DISABLE')
app.config['CONFIG_DISABLE'] = os.getenv('WHOOGLE_CONFIG_DISABLE', '')
app.config['SESSION_FILE_DIR'] = os.path.join(
app.config['CONFIG_PATH'],
'session')
app.config['MAX_SESSION_SIZE'] = 4000 # Sessions won't exceed 4KB
app.config['BANG_PATH'] = os.getenv(
'CONFIG_VOLUME',
os.path.join(app.config['STATIC_FOLDER'], 'bangs'))
app.config['BANG_FILE'] = os.path.join(
app.config['BANG_PATH'],
'bangs.json')
# Ensure all necessary directories exist
if not os.path.exists(app.config['CONFIG_PATH']):
os.makedirs(app.config['CONFIG_PATH'])
if not os.path.exists(app.config['SESSION_FILE_DIR']):
os.makedirs(app.config['SESSION_FILE_DIR'])
if not os.path.exists(app.config['BANG_PATH']):
os.makedirs(app.config['BANG_PATH'])
if not os.path.exists(app.config['BUILD_FOLDER']):
os.makedirs(app.config['BUILD_FOLDER'])
# Session values
app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key')
if os.path.exists(app_key_path):
try:
app.config['SECRET_KEY'] = open(app_key_path, 'r').read()
except PermissionError:
app.config['SECRET_KEY'] = str(b64encode(os.urandom(32)))
else:
app.config['SECRET_KEY'] = str(b64encode(os.urandom(32)))
with open(app_key_path, 'w') as key_file:
key_file.write(app.config['SECRET_KEY'])
key_file.close()
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=365)
# NOTE: SESSION_COOKIE_SAMESITE must be set to 'lax' to allow the user's
# previous session to persist when accessing the instance from an external
# link. Setting this value to 'strict' causes Whoogle to revalidate a new
# session, and fail, resulting in cookies being disabled.
app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
# Config fields that are used to check for updates
app.config['RELEASES_URL'] = 'https://github.com/' \
'benbusby/whoogle-search/releases'
app.config['LAST_UPDATE_CHECK'] = datetime.now() - timedelta(hours=24)
app.config['HAS_UPDATE'] = ''
# The alternative to Google Translate is treated a bit differently than other
# social media site alternatives, in that it is used for any translation
# related searches.
translate_url = os.getenv('WHOOGLE_ALT_TL', 'https://farside.link/lingva')
translate_url = os.getenv('WHOOGLE_ALT_TL', 'https://lingva.ml')
if not translate_url.startswith('http'):
translate_url = 'https://' + translate_url
app.config['TRANSLATE_URL'] = translate_url
@ -141,10 +88,16 @@ app.config['CSP'] = 'default-src \'none\';' \
'media-src \'self\';' \
'connect-src \'self\';'
# Generate DDG bang filter
generating_bangs = False
if not os.path.exists(app.config['CONFIG_PATH']):
os.makedirs(app.config['CONFIG_PATH'])
if not os.path.exists(app.config['SESSION_FILE_DIR']):
os.makedirs(app.config['SESSION_FILE_DIR'])
# Generate DDG bang filter, and create path if it doesn't exist yet
if not os.path.exists(app.config['BANG_PATH']):
os.makedirs(app.config['BANG_PATH'])
if not os.path.exists(app.config['BANG_FILE']):
generating_bangs = True
json.dump({}, open(app.config['BANG_FILE'], 'w'))
bangs_thread = threading.Thread(
target=gen_bangs_json,
@ -152,6 +105,9 @@ if not os.path.exists(app.config['BANG_FILE']):
bangs_thread.start()
# Build new mapping of static files for cache busting
if not os.path.exists(app.config['BUILD_FOLDER']):
os.makedirs(app.config['BUILD_FOLDER'])
cache_busting_dirs = ['css', 'js']
for cb_dir in cache_busting_dirs:
full_cb_dir = os.path.join(app.config['STATIC_FOLDER'], cb_dir)
@ -178,19 +134,13 @@ app.jinja_env.globals.update(clean_query=clean_query)
app.jinja_env.globals.update(
cb_url=lambda f: app.config['CACHE_BUSTING_MAP'][f])
Session(app)
# Attempt to acquire tor identity, to determine if Tor config is available
send_tor_signal(Signal.HEARTBEAT)
# Suppress spurious warnings from BeautifulSoup
warnings.simplefilter('ignore', MarkupResemblesLocatorWarning)
from app import routes # noqa
# The gen_bangs_json function takes care of loading bangs, so skip it here if
# it's already being loaded
if not generating_bangs:
load_all_bangs(app.config['BANG_FILE'])
# Disable logging from imported modules
logging.config.dictConfig({
'version': 1,

@ -1,39 +1,16 @@
import cssutils
from app.models.config import Config
from app.models.endpoint import Endpoint
from app.request import VALID_PARAMS, MAPS_URL
from app.utils.misc import read_config_bool
from app.utils.results import *
from bs4 import BeautifulSoup
from bs4.element import ResultSet, Tag
from cryptography.fernet import Fernet
from flask import render_template
import html
import re
import urllib.parse as urlparse
from urllib.parse import parse_qs
import re
from app.models.g_classes import GClasses
from app.request import VALID_PARAMS, MAPS_URL
from app.utils.misc import get_abs_url, read_config_bool
from app.utils.results import (
BLANK_B64, GOOG_IMG, GOOG_STATIC, G_M_LOGO_URL, LOGO_URL, SITE_ALTS,
has_ad_content, filter_link_args, append_anon_view, get_site_alt,
)
from app.models.endpoint import Endpoint
from app.models.config import Config
MAPS_ARGS = ['q', 'daddr']
minimal_mode_sections = ['Top stories', 'Images', 'People also ask']
unsupported_g_pages = [
'support.google.com',
'accounts.google.com',
'policies.google.com',
'google.com/preferences',
'google.com/intl',
'advanced_search',
'tbm=shop',
'ageverification.google.co.kr'
]
unsupported_g_divs = ['google.com/preferences?hl=', 'ageverification.google.co.kr']
import os
def extract_q(q_str: str, href: str) -> str:
@ -48,29 +25,7 @@ def extract_q(q_str: str, href: str) -> str:
Returns:
str: The 'q' element of the link, or an empty string
"""
return parse_qs(q_str, keep_blank_values=True)['q'][0] if ('&q=' in href or '?q=' in href) else ''
def build_map_url(href: str) -> str:
"""Tries to extract known args that explain the location in the url. If a
location is found, returns the default url with it. Otherwise, returns the
url unchanged.
Args:
href: The full url to check.
Returns:
str: The parsed url, or the url unchanged.
"""
# parse the url
parsed_url = parse_qs(href)
# iterate through the known parameters and try build the url
for param in MAPS_ARGS:
if param in parsed_url:
return MAPS_URL + "?q=" + parsed_url[param][0]
# query could not be extracted returning unchanged url
return href
return parse_qs(q_str)['q'][0] if ('&q=' in href or '?q=' in href) else ''
def clean_query(query: str) -> str:
@ -86,54 +41,17 @@ def clean_query(query: str) -> str:
return query[:query.find('-site:')] if '-site:' in query else query
def clean_css(css: str, page_url: str) -> str:
"""Removes all remote URLs from a CSS string.
Args:
css: The CSS string
Returns:
str: The filtered CSS, with URLs proxied through Whoogle
"""
sheet = cssutils.parseString(css)
urls = cssutils.getUrls(sheet)
for url in urls:
abs_url = get_abs_url(url, page_url)
if abs_url.startswith('data:'):
continue
css = css.replace(
url,
f'{Endpoint.element}?type=image/png&url={abs_url}'
)
return css
class Filter:
# Limit used for determining if a result is a "regular" result or a list
# type result (such as "people also asked", "related searches", etc)
RESULT_CHILD_LIMIT = 7
def __init__(
self,
user_key: str,
config: Config,
root_url='',
page_url='',
query='',
mobile=False) -> None:
self.soup = None
def __init__(self, user_key: str, config: Config, mobile=False) -> None:
self.config = config
self.mobile = mobile
self.user_key = user_key
self.page_url = page_url
self.query = query
self.main_divs = ResultSet('')
self._elements = 0
self._av = set()
self.root_url = root_url[:-1] if root_url.endswith('/') else root_url
def __getitem__(self, name):
return getattr(self, name)
@ -154,152 +72,42 @@ class Filter:
return Fernet(self.user_key).encrypt(path.encode()).decode()
def clean(self, soup) -> BeautifulSoup:
self.soup = soup
self.main_divs = self.soup.find('div', {'id': 'main'})
self.main_divs = soup.find('div', {'id': 'main'})
self.remove_ads()
self.remove_block_titles()
self.remove_block_url()
self.collapse_sections()
self.update_css()
self.update_styling()
self.remove_block_tabs()
# self.main_divs is only populated for the main page of search results
# (i.e. not images/news/etc).
if self.main_divs:
for div in self.main_divs:
self.sanitize_div(div)
self.update_styling(soup)
for img in [_ for _ in self.soup.find_all('img') if 'src' in _.attrs]:
for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]:
self.update_element_src(img, 'image/png')
for audio in [_ for _ in self.soup.find_all('audio') if 'src' in _.attrs]:
for audio in [_ for _ in soup.find_all('audio') if 'src' in _.attrs]:
self.update_element_src(audio, 'audio/mpeg')
audio['controls'] = ''
for link in self.soup.find_all('a', href=True):
for link in soup.find_all('a', href=True):
self.update_link(link)
self.add_favicon(link)
if self.config.alts:
self.site_alt_swap()
input_form = self.soup.find('form')
input_form = soup.find('form')
if input_form is not None:
input_form['method'] = 'GET' if self.config.get_only else 'POST'
# Use a relative URI for submissions
input_form['action'] = 'search'
# Ensure no extra scripts passed through
for script in self.soup('script'):
for script in soup('script'):
script.decompose()
# Update default footer and header
footer = self.soup.find('footer')
footer = soup.find('footer')
if footer:
# Remove divs that have multiple links beyond just page navigation
[_.decompose() for _ in footer.find_all('div', recursive=False)
if len(_.find_all('a', href=True)) > 3]
for link in footer.find_all('a', href=True):
link['href'] = f'{link["href"]}&preferences={self.config.preferences}'
header = self.soup.find('header')
header = soup.find('header')
if header:
header.decompose()
self.remove_site_blocks(self.soup)
return self.soup
def sanitize_div(self, div) -> None:
"""Removes escaped script and iframe tags from results
Returns:
None (The soup object is modified directly)
"""
if not div:
return
for d in div.find_all('div', recursive=True):
d_text = d.find(text=True, recursive=False)
# Ensure we're working with tags that contain text content
if not d_text or not d.string:
continue
d.string = html.unescape(d_text)
div_soup = BeautifulSoup(d.string, 'html.parser')
# Remove all valid script or iframe tags in the div
for script in div_soup.find_all('script'):
script.decompose()
for iframe in div_soup.find_all('iframe'):
iframe.decompose()
d.string = str(div_soup)
def add_favicon(self, link) -> None:
"""Adds icons for each returned result, using the result site's favicon
Returns:
None (The soup object is modified directly)
"""
# Skip empty, parentless, or internal links
show_favicons = read_config_bool('WHOOGLE_SHOW_FAVICONS', True)
is_valid_link = link and link.parent and link['href'].startswith('http')
if not show_favicons or not is_valid_link:
return
parent = link.parent
is_result_div = False
# Check each parent to make sure that the div doesn't already have a
# favicon attached, and that the div is a result div
while parent:
p_cls = parent.attrs.get('class') or []
if 'has-favicon' in p_cls or GClasses.scroller_class in p_cls:
return
elif GClasses.result_class_a not in p_cls:
parent = parent.parent
else:
is_result_div = True
break
if not is_result_div:
return
# Construct the html for inserting the icon into the parent div
parsed = urlparse.urlparse(link['href'])
favicon = self.encrypt_path(
f'{parsed.scheme}://{parsed.netloc}/favicon.ico',
is_element=True)
src = f'{self.root_url}/{Endpoint.element}?url={favicon}' + \
'&type=image/x-icon'
html = f'<img class="site-favicon" src="{src}">'
favicon = BeautifulSoup(html, 'html.parser')
link.parent.insert(0, favicon)
# Update all parents to indicate that a favicon has been attached
parent = link.parent
while parent:
p_cls = parent.get('class') or []
p_cls.append('has-favicon')
parent['class'] = p_cls
parent = parent.parent
if GClasses.result_class_a in p_cls:
break
def remove_site_blocks(self, soup) -> None:
if not self.config.block or not soup.body:
return
search_string = ' '.join(['-site:' +
_ for _ in self.config.block.split(',')])
selected = soup.body.findAll(text=re.compile(search_string))
for result in selected:
result.string.replace_with(result.string.replace(
search_string, ''))
return soup
def remove_ads(self) -> None:
"""Removes ads found in the list of search result divs
@ -318,7 +126,7 @@ class Filter:
def remove_block_titles(self) -> None:
if not self.main_divs or not self.config.block_title:
return
block_title = re.compile(self.config.block_title)
block_title = re.compile(self.block_title)
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
block_divs = [_ for _ in div.find_all('h3', recursive=True)
if block_title.search(_.text) is not None]
@ -327,27 +135,12 @@ class Filter:
def remove_block_url(self) -> None:
if not self.main_divs or not self.config.block_url:
return
block_url = re.compile(self.config.block_url)
block_url = re.compile(self.block_url)
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
block_divs = [_ for _ in div.find_all('a', recursive=True)
if block_url.search(_.attrs['href']) is not None]
_ = div.decompose() if len(block_divs) else None
def remove_block_tabs(self) -> None:
if self.main_divs:
for div in self.main_divs.find_all(
'div',
attrs={'class': f'{GClasses.main_tbm_tab}'}
):
_ = div.decompose()
else:
# when in images tab
for div in self.soup.find_all(
'div',
attrs={'class': f'{GClasses.images_tbm_tab}'}
):
_ = div.decompose()
def collapse_sections(self) -> None:
"""Collapses long result sections ("people also asked", "related
searches", etc) into "details" elements
@ -373,18 +166,10 @@ class Filter:
return
# Loop through results and check for the number of child divs in each
for result in self.main_divs.find_all():
for result in self.main_divs:
result_children = pull_child_divs(result)
if minimal_mode:
if any(f">{x}</span" in str(s) for s in result_children
for x in minimal_mode_sections):
result.decompose()
continue
for s in result_children:
if ('Twitter ' in str(s)):
result.decompose()
continue
if len(result_children) < self.RESULT_CHILD_LIMIT:
if len(result_children) in (1, 3):
continue
else:
if len(result_children) < self.RESULT_CHILD_LIMIT:
@ -432,7 +217,7 @@ class Filter:
# enabled
parent.decompose()
def update_element_src(self, element: Tag, mime: str, attr='src') -> None:
def update_element_src(self, element: Tag, mime: str) -> None:
"""Encrypts the original src of an element and rewrites the element src
to use the "/element?src=" pass-through.
@ -440,12 +225,10 @@ class Filter:
None (The soup element is modified directly)
"""
src = element[attr].split(' ')[0]
src = element['src']
if src.startswith('//'):
src = 'https:' + src
elif src.startswith('data:'):
return
if src.startswith(LOGO_URL):
# Re-brand with Whoogle logo
@ -453,53 +236,25 @@ class Filter:
render_template('logo.html'),
features='html.parser'))
return
elif src.startswith(G_M_LOGO_URL):
# Re-brand with single-letter Whoogle logo
element['src'] = 'static/img/favicon/apple-icon.png'
element.parent['href'] = 'home'
return
elif src.startswith(GOOG_IMG) or GOOG_STATIC in src:
element['src'] = BLANK_B64
return
element[attr] = f'{self.root_url}/{Endpoint.element}?url=' + (
self.encrypt_path(
src,
is_element=True
) + '&type=' + urlparse.quote(mime)
)
def update_css(self) -> None:
"""Updates URLs used in inline styles to be proxied by Whoogle
using the /element endpoint.
Returns:
None (The soup element is modified directly)
"""
# Filter all <style> tags
for style in self.soup.find_all('style'):
style.string = clean_css(style.string, self.page_url)
# TODO: Convert remote stylesheets to style tags and proxy all
# remote requests
# for link in soup.find_all('link', attrs={'rel': 'stylesheet'}):
# print(link)
def update_styling(self) -> None:
# Update CSS classes for result divs
soup = GClasses.replace_css_classes(self.soup)
element['src'] = f'{Endpoint.element}?url=' + self.encrypt_path(
src,
is_element=True) + '&type=' + urlparse.quote(mime)
def update_styling(self, soup) -> None:
# Remove unnecessary button(s)
for button in self.soup.find_all('button'):
for button in soup.find_all('button'):
button.decompose()
# Remove svg logos
for svg in self.soup.find_all('svg'):
for svg in soup.find_all('svg'):
svg.decompose()
# Update logo
logo = self.soup.find('a', {'class': 'l'})
logo = soup.find('a', {'class': 'l'})
if logo and self.mobile:
logo['style'] = ('display:flex; justify-content:center; '
'align-items:center; color:#685e79; '
@ -507,32 +262,11 @@ class Filter:
# Fix search bar length on mobile
try:
search_bar = self.soup.find('header').find('form').find('div')
search_bar = soup.find('header').find('form').find('div')
search_bar['style'] = 'width: 100%;'
except AttributeError:
pass
# Fix body max width on images tab
style = self.soup.find('style')
div = self.soup.find('div', attrs={
'class': f'{GClasses.images_tbm_tab}'})
if style and div and not self.mobile:
css = style.string
css_html_tag = (
'html{'
'font-family: Roboto, Helvetica Neue, Arial, sans-serif;'
'font-size: 14px;'
'line-height: 20px;'
'text-size-adjust: 100%;'
'word-wrap: break-word;'
'}'
)
css = f"{css_html_tag}{css}"
css = re.sub('body{(.*?)}',
'body{padding:0 8px;margin:0 auto;max-width:736px;}',
css)
style.string = css
def update_link(self, link: Tag) -> None:
"""Update internal link paths with encrypted path, otherwise remove
unnecessary redirects and/or marketing params from the url
@ -544,58 +278,21 @@ class Filter:
None (the tag is updated directly)
"""
parsed_link = urlparse.urlparse(link['href'])
if '/url?q=' in link['href']:
link_netloc = extract_q(parsed_link.query, link['href'])
else:
link_netloc = parsed_link.netloc
# Remove any elements that direct to unsupported Google pages
if any(url in link_netloc for url in unsupported_g_pages):
# Replace href with only the intended destination (no "utm" type tags)
href = link['href'].replace('https://www.google.com', '')
if 'advanced_search' in href or 'tbm=shop' in href:
# FIXME: The "Shopping" tab requires further filtering (see #136)
# Temporarily removing all links to that tab for now.
link.decompose()
return
# Replaces the /url google unsupported link to the direct url
link['href'] = link_netloc
parent = link.parent
if any(divlink in link_netloc for divlink in unsupported_g_divs):
# Handle case where a search is performed in a different
# language than what is configured. This usually returns a
# div with the same classes as normal search results, but with
# a link to configure language preferences through Google.
# Since we want all language config done through Whoogle, we
# can safely decompose this element.
while parent:
p_cls = parent.attrs.get('class') or []
if f'{GClasses.result_class_a}' in p_cls:
parent.decompose()
break
parent = parent.parent
else:
# Remove cases where google links appear in the footer
while parent:
p_cls = parent.attrs.get('class') or []
if parent.name == 'footer' or f'{GClasses.footer}' in p_cls:
link.decompose()
parent = parent.parent
if link.decomposed:
return
# Replace href with only the intended destination (no "utm" type tags)
href = link['href'].replace('https://www.google.com', '')
result_link = urlparse.urlparse(href)
q = extract_q(result_link.query, href)
if q.startswith('/') and q not in self.query and 'spell=1' not in href:
if q.startswith('/'):
# Internal google links (i.e. mail, maps, etc) should still
# be forwarded to Google
link['href'] = 'https://google.com' + q
elif q.startswith('https://accounts.google.com'):
# Remove Sign-in link
link.decompose()
return
elif '/search?q=' in href:
# "li:1" implies the query should be interpreted verbatim,
# which is accomplished by wrapping the query in double quotes
@ -614,85 +311,38 @@ class Filter:
# Strip unneeded arguments
link['href'] = filter_link_args(q)
# Add alternate viewing options for results,
# if the result doesn't already have an AV link
netloc = urlparse.urlparse(link['href']).netloc
if self.config.anon_view and netloc not in self._av:
self._av.add(netloc)
append_anon_view(link, self.config)
# Add no-js option
if self.config.nojs:
append_nojs(link)
if self.config.new_tab:
link['target'] = '_blank'
else:
if href.startswith(MAPS_URL):
# Maps links don't work if a site filter is applied
link['href'] = build_map_url(link['href'])
elif (href.startswith('/?') or href.startswith('/search?') or
href.startswith('/imgres?')):
# make sure that tags can be clicked as relative URLs
link['href'] = href[1:]
elif href.startswith('/intl/'):
# do nothing, keep original URL for ToS
pass
elif href.startswith('/preferences'):
# there is no config specific URL, remove this
link.decompose()
return
link['href'] = MAPS_URL + "?q=" + clean_query(q)
else:
link['href'] = href
if self.config.new_tab and (
link["href"].startswith("http")
or link["href"].startswith("imgres?")
):
link["target"] = "_blank"
def site_alt_swap(self) -> None:
"""Replaces link locations and page elements if "alts" config
is enabled
"""
for site, alt in SITE_ALTS.items():
if site != "medium.com" and alt != "":
# Ignore medium.com replacements since these are handled
# specifically in the link description replacement, and medium
# results are never given their own "card" result where this
# replacement would make sense.
# Also ignore if the alt is empty, since this is used to indicate
# that the alt is not enabled.
for div in self.soup.find_all('div', text=re.compile(site)):
# Use the number of words in the div string to determine if the
# string is a result description (shouldn't replace domains used
# in desc text).
if len(div.string.split(' ')) == 1:
div.string = div.string.replace(site, alt)
for link in self.soup.find_all('a', href=True):
# Search and replace all link descriptions
# with alternative location
link['href'] = get_site_alt(link['href'])
link_desc = link.find_all(
text=re.compile('|'.join(SITE_ALTS.keys())))
if len(link_desc) == 0:
continue
# Replace link location if "alts" config is enabled
if self.config.alts:
# Search and replace all link descriptions
# with alternative location
link['href'] = get_site_alt(link['href'])
link_desc = link.find_all(
text=re.compile('|'.join(SITE_ALTS.keys())))
if len(link_desc) == 0:
return
# Replace link description
link_desc = link_desc[0]
if site not in link_desc or not alt:
# Replace link description
link_desc = link_desc[0]
for site, alt in SITE_ALTS.items():
if site not in link_desc:
continue
new_desc = BeautifulSoup(features='html.parser').new_tag('div')
link_str = str(link_desc)
# Medium links should be handled differently, since 'medium.com'
# is a common substring of domain names, but shouldn't be
# replaced (i.e. 'philomedium.com' should stay as it is).
if 'medium.com' in link_str:
if link_str.startswith('medium.com') or '.medium.com' in link_str:
link_str = SITE_ALTS['medium.com'] + link_str[
link_str.find('medium.com') + len('medium.com'):]
new_desc.string = link_str
else:
new_desc.string = link_str.replace(site, alt)
new_desc.string = str(link_desc).replace(site, alt)
link_desc.replace_with(new_desc)
break
def view_image(self, soup) -> BeautifulSoup:
"""Replaces the soup with a new one that handles mobile results and
@ -706,16 +356,17 @@ class Filter:
"""
# get some tags that are unchanged between mobile and pc versions
search_input = soup.find_all('td', attrs={'class': "O4cRJf"})[0]
search_options = soup.find_all('div', attrs={'class': "M7pB2"})[0]
cor_suggested = soup.find_all('table', attrs={'class': "By0U9"})
next_pages = soup.find('table', attrs={'class': "uZgmoc"})
next_pages = soup.find_all('table', attrs={'class': "uZgmoc"})[0]
information = soup.find_all('div', attrs={'class': "TuS8Ad"})[0]
results = []
# find results div
results_div = soup.find('div', attrs={'class': "nQvrDb"})
# find all the results (if any)
results_all = []
if results_div:
results_all = results_div.find_all('div', attrs={'class': "lIMUZd"})
results_div = soup.find_all('div', attrs={'class': "nQvrDb"})[0]
# find all the results
results_all = results_div.find_all('div', attrs={'class': "lIMUZd"})
for item in results_all:
urls = item.find('a')['href'].split('&imgrefurl=')
@ -747,7 +398,12 @@ class Filter:
results=results,
view_label="View Image"),
features='html.parser')
# replace search input object
soup.find_all('td',
attrs={'class': "O4cRJf"})[0].replaceWith(search_input)
# replace search options object (All, Images, Videos, etc.)
soup.find_all('div',
attrs={'class': "M7pB2"})[0].replaceWith(search_options)
# replace correction suggested by google object if exists
if len(cor_suggested):
soup.find_all(
@ -757,4 +413,7 @@ class Filter:
# replace next page object at the bottom of the page
soup.find_all('table',
attrs={'class': "uZgmoc"})[0].replaceWith(next_pages)
# replace information about user connection at the bottom of the page
soup.find_all('div',
attrs={'class': "TuS8Ad"})[0].replaceWith(information)
return soup

@ -1,39 +1,7 @@
from inspect import Attribute
from typing import Optional
from app.utils.misc import read_config_bool
from flask import current_app
import os
import re
from base64 import urlsafe_b64encode, urlsafe_b64decode
import pickle
from cryptography.fernet import Fernet
import hashlib
import brotli
import logging
import cssutils
from cssutils.css.cssstylesheet import CSSStyleSheet
from cssutils.css.cssstylerule import CSSStyleRule
# removes warnings from cssutils
cssutils.log.setLevel(logging.CRITICAL)
def get_rule_for_selector(stylesheet: CSSStyleSheet,
selector: str) -> Optional[CSSStyleRule]:
"""Search for a rule that matches a given selector in a stylesheet.
Args:
stylesheet (CSSStyleSheet) -- the stylesheet to search
selector (str) -- the selector to search for
Returns:
Optional[CSSStyleRule] -- the rule that matches the selector or None
"""
for rule in stylesheet.cssRules:
if hasattr(rule, "selectorText") and selector == rule.selectorText:
return rule
return None
class Config:
@ -42,13 +10,14 @@ class Config:
self.url = os.getenv('WHOOGLE_CONFIG_URL', '')
self.lang_search = os.getenv('WHOOGLE_CONFIG_SEARCH_LANGUAGE', '')
self.lang_interface = os.getenv('WHOOGLE_CONFIG_LANGUAGE', '')
self.style_modified = os.getenv(
'WHOOGLE_CONFIG_STYLE', '')
self.style = os.getenv(
'WHOOGLE_CONFIG_STYLE',
open(os.path.join(app_config['STATIC_FOLDER'],
'css/variables.css')).read())
self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
self.block_title = os.getenv('WHOOGLE_CONFIG_BLOCK_TITLE', '')
self.block_url = os.getenv('WHOOGLE_CONFIG_BLOCK_URL', '')
self.country = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
self.tbs = os.getenv('WHOOGLE_CONFIG_TIME_PERIOD', '')
self.theme = os.getenv('WHOOGLE_CONFIG_THEME', 'system')
self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')
self.dark = read_config_bool('WHOOGLE_CONFIG_DARK') # deprecated
@ -59,10 +28,6 @@ class Config:
self.new_tab = read_config_bool('WHOOGLE_CONFIG_NEW_TAB')
self.view_image = read_config_bool('WHOOGLE_CONFIG_VIEW_IMAGE')
self.get_only = read_config_bool('WHOOGLE_CONFIG_GET_ONLY')
self.anon_view = read_config_bool('WHOOGLE_CONFIG_ANON_VIEW')
self.preferences_encrypted = read_config_bool('WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED')
self.preferences_key = os.getenv('WHOOGLE_CONFIG_PREFERENCES_KEY', '')
self.accept_language = False
self.safe_keys = [
@ -74,11 +39,7 @@ class Config:
'new_tab',
'view_image',
'block',
'safe',
'nojs',
'anon_view',
'preferences_encrypted',
'tbs'
'safe'
]
# Skip setting custom config if there isn't one
@ -107,51 +68,6 @@ class Config:
if not name.startswith("__")
and (type(attr) is bool or type(attr) is str)}
def get_attrs(self):
return {name: attr for name, attr in self.__dict__.items()
if not name.startswith("__")
and (type(attr) is bool or type(attr) is str)}
@property
def style(self) -> str:
"""Returns the default style updated with specified modifications.
Returns:
str -- the new style
"""
style_sheet = cssutils.parseString(
open(os.path.join(current_app.config['STATIC_FOLDER'],
'css/variables.css')).read()
)
modified_sheet = cssutils.parseString(self.style_modified)
for rule in modified_sheet:
rule_default = get_rule_for_selector(style_sheet,
rule.selectorText)
# if modified rule is in default stylesheet, update it
if rule_default is not None:
# TODO: update this in a smarter way to handle :root better
# for now if we change a varialbe in :root all other default
# variables need to be also present
rule_default.style = rule.style
# else add the new rule to the default stylesheet
else:
style_sheet.add(rule)
return str(style_sheet.cssText, 'utf-8')
@property
def preferences(self) -> str:
# if encryption key is not set will uncheck preferences encryption
if self.preferences_encrypted:
self.preferences_encrypted = bool(self.preferences_key)
# add a tag for visibility if preferences token startswith 'e' it means
# the token is encrypted, 'u' means the token is unencrypted and can be
# used by other whoogle instances
encrypted_flag = "e" if self.preferences_encrypted else 'u'
preferences_digest = self._encode_preferences()
return f"{encrypted_flag}{preferences_digest}"
def is_safe_key(self, key) -> bool:
"""Establishes a group of config options that are safe to set
in the url.
@ -190,13 +106,6 @@ class Config:
Returns:
Config -- a modified config object
"""
if 'preferences' in params:
params_new = self._decode_preferences(params['preferences'])
# if preferences leads to an empty dictionary it means preferences
# parameter was not decrypted successfully
if len(params_new):
params = params_new
for param_key in params.keys():
if not self.is_safe_key(param_key):
continue
@ -204,68 +113,22 @@ class Config:
if param_val == 'off':
param_val = False
elif isinstance(param_val, str):
if param_val.isdigit():
param_val = int(param_val)
elif param_val.isdigit():
param_val = int(param_val)
self[param_key] = param_val
return self
def to_params(self, keys: list = []) -> str:
def to_params(self) -> str:
"""Generates a set of safe params for using in Whoogle URLs
Args:
keys (list) -- optional list of keys of URL parameters
Returns:
str -- a set of URL parameters
"""
if not len(keys):
keys = self.safe_keys
param_str = ''
for safe_key in keys:
for safe_key in self.safe_keys:
if not self[safe_key]:
continue
param_str = param_str + f'&{safe_key}={self[safe_key]}'
return param_str
def _get_fernet_key(self, password: str) -> bytes:
hash_object = hashlib.md5(password.encode())
key = urlsafe_b64encode(hash_object.hexdigest().encode())
return key
def _encode_preferences(self) -> str:
encoded_preferences = brotli.compress(pickle.dumps(self.get_attrs()))
if self.preferences_encrypted:
if self.preferences_key != '':
key = self._get_fernet_key(self.preferences_key)
encoded_preferences = Fernet(key).encrypt(encoded_preferences)
encoded_preferences = brotli.compress(encoded_preferences)
return urlsafe_b64encode(encoded_preferences).decode()
def _decode_preferences(self, preferences: str) -> dict:
mode = preferences[0]
preferences = preferences[1:]
if mode == 'e': # preferences are encrypted
try:
key = self._get_fernet_key(self.preferences_key)
config = Fernet(key).decrypt(
brotli.decompress(urlsafe_b64decode(
preferences.encode() + b'=='))
)
config = pickle.loads(brotli.decompress(config))
except Exception:
config = {}
elif mode == 'u': # preferences are not encrypted
config = pickle.loads(
brotli.decompress(urlsafe_b64decode(
preferences.encode() + b'=='))
)
else: # preferences are incorrectly formatted
config = {}
return config

@ -5,6 +5,7 @@ class Endpoint(Enum):
autocomplete = 'autocomplete'
home = 'home'
healthz = 'healthz'
session = 'session'
config = 'config'
opensearch = 'opensearch.xml'
search = 'search'

@ -1,47 +0,0 @@
from bs4 import BeautifulSoup
class GClasses:
"""A class for tracking obfuscated class names used in Google results that
are directly referenced in Whoogle's filtering code.
Note: Using these should be a last resort. It is always preferred to filter
results using structural cues instead of referencing class names, as these
are liable to change at any moment.
"""
main_tbm_tab = 'KP7LCb'
images_tbm_tab = 'n692Zd'
footer = 'TuS8Ad'
result_class_a = 'ZINbbc'
result_class_b = 'luh4td'
scroller_class = 'idg8be'
result_classes = {
result_class_a: ['Gx5Zad'],
result_class_b: ['fP1Qef']
}
@classmethod
def replace_css_classes(cls, soup: BeautifulSoup) -> BeautifulSoup:
"""Replace updated Google classes with the original class names that
Whoogle relies on for styling.
Args:
soup: The result page as a BeautifulSoup object
Returns:
BeautifulSoup: The new BeautifulSoup
"""
result_divs = soup.find_all('div', {
'class': [_ for c in cls.result_classes.values() for _ in c]
})
for div in result_divs:
new_class = ' '.join(div['class'])
for key, val in cls.result_classes.items():
new_class = ' '.join(new_class.replace(_, key) for _ in val)
div['class'] = new_class.split(' ')
return soup
def __str__(self):
return self.value

@ -1,16 +1,13 @@
from app.models.config import Config
from app.utils.misc import read_config_bool
from datetime import datetime
from defusedxml import ElementTree as ET
import xml.etree.ElementTree as ET
import random
import requests
from requests import Response, ConnectionError
import urllib.parse as urlparse
import os
from stem import Signal, SocketError
from stem.connection import AuthenticationFailure
from stem.control import Controller
from stem.connection import authenticate_cookie, authenticate_password
MAPS_URL = 'https://maps.google.com/maps'
AUTOCOMPLETE_URL = ('https://suggestqueries.google.com/'
@ -40,47 +37,19 @@ class TorError(Exception):
def send_tor_signal(signal: Signal) -> bool:
use_pass = read_config_bool('WHOOGLE_TOR_USE_PASS')
confloc = './misc/tor/control.conf'
# Check that the custom location of conf is real.
temp = os.getenv('WHOOGLE_TOR_CONF', '')
if os.path.isfile(temp):
confloc = temp
# Attempt to authenticate and send signal.
try:
with Controller.from_port(port=9051) as c:
if use_pass:
with open(confloc, "r") as conf:
# Scan for the last line of the file.
for line in conf:
pass
secret = line.strip('\n')
authenticate_password(c, password=secret)
else:
cookie_path = '/var/lib/tor/control_auth_cookie'
authenticate_cookie(c, cookie_path=cookie_path)
c.authenticate()
c.signal(signal)
os.environ['TOR_AVAILABLE'] = '1'
return True
except (SocketError, AuthenticationFailure,
ConnectionRefusedError, ConnectionError):
# TODO: Handle Tor authentication (password and cookie)
except (SocketError, ConnectionRefusedError, ConnectionError):
os.environ['TOR_AVAILABLE'] = '0'
return False
def gen_user_agent(is_mobile) -> str:
user_agent = os.environ.get('WHOOGLE_USER_AGENT', '')
user_agent_mobile = os.environ.get('WHOOGLE_USER_AGENT_MOBILE', '')
if user_agent and not is_mobile:
return user_agent
if user_agent_mobile and is_mobile:
return user_agent_mobile
firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux'
@ -99,8 +68,8 @@ def gen_query(query, args, config) -> str:
if ':past' in query and 'tbs' not in args:
time_range = str.strip(query.split(':past', 1)[-1])
param_dict['tbs'] = '&tbs=' + ('qdr:' + str.lower(time_range[0]))
elif 'tbs' in args or 'tbs' in config:
result_tbs = args.get('tbs') if 'tbs' in args else config['tbs']
elif 'tbs' in args:
result_tbs = args.get('tbs')
param_dict['tbs'] = '&tbs=' + result_tbs
# Occasionally the 'tbs' param provided by google also contains a
@ -196,8 +165,6 @@ class Request:
config.lang_search if config.lang_search else ''
)
self.country = config.country if config.country else ''
# For setting Accept-language Header
self.lang_interface = ''
if config.accept_language:
@ -217,13 +184,19 @@ class Request:
proxy_pass = os.environ.get('WHOOGLE_PROXY_PASS', '')
auth_str = ''
if proxy_user:
auth_str = f'{proxy_user}:{proxy_pass}@'
proxy_str = f'{proxy_type}://{auth_str}{proxy_path}'
auth_str = proxy_user + ':' + proxy_pass
self.proxies = {
'https': proxy_str,
'http': proxy_str
'https': proxy_type + '://' +
((auth_str + '@') if auth_str else '') + proxy_path,
}
# Need to ensure both HTTP and HTTPS are in the proxy dict,
# regardless of underlying protocol
if proxy_type == 'https':
self.proxies['http'] = self.proxies['https'].replace(
'https', 'http')
else:
self.proxies['http'] = self.proxies['https']
else:
self.proxies = {
'http': 'socks5://127.0.0.1:9050',
@ -248,11 +221,7 @@ class Request:
"""
ac_query = dict(q=query)
if self.language:
ac_query['lr'] = self.language
if self.country:
ac_query['gl'] = self.country
if self.lang_interface:
ac_query['hl'] = self.lang_interface
ac_query['hl'] = self.language
response = self.send(base_url=AUTOCOMPLETE_URL,
query=urlparse.urlencode(ac_query)).text
@ -269,7 +238,7 @@ class Request:
return []
def send(self, base_url='', query='', attempt=0,
force_mobile=False, user_agent='') -> Response:
force_mobile=False) -> Response:
"""Sends an outbound request to a URL. Optionally sends the request
using Tor, if enabled by the user.
@ -285,14 +254,10 @@ class Request:
Response: The Response object returned by the requests call
"""
use_client_user_agent = int(os.environ.get('WHOOGLE_USE_CLIENT_USER_AGENT', '0'))
if user_agent and use_client_user_agent == 1:
modified_user_agent = user_agent
if force_mobile and not self.mobile:
modified_user_agent = self.modified_user_agent_mobile
else:
if force_mobile and not self.mobile:
modified_user_agent = self.modified_user_agent_mobile
else:
modified_user_agent = self.modified_user_agent
modified_user_agent = self.modified_user_agent
headers = {
'User-Agent': modified_user_agent
@ -307,8 +272,9 @@ class Request:
# view is suppressed correctly
now = datetime.now()
cookies = {
'CONSENT': 'PENDING+987',
'SOCS': 'CAESHAgBEhIaAB',
'CONSENT': 'YES+cb.{:d}{:02d}{:02d}-17-p0.de+F+678'.format(
now.year, now.month, now.day
)
}
# Validate Tor conn and request new identity if the last one failed

@ -4,13 +4,9 @@ import io
import json
import os
import pickle
import re
import urllib.parse as urlparse
import uuid
import validators
import sys
import traceback
from datetime import datetime, timedelta
from datetime import timedelta
from functools import wraps
import waitress
@ -18,47 +14,37 @@ from app import app
from app.models.config import Config
from app.models.endpoint import Endpoint
from app.request import Request, TorError
from app.utils.bangs import suggest_bang, resolve_bang
from app.utils.misc import empty_gif, placeholder_img, get_proxy_host_url, \
fetch_favicon
from app.filter import Filter
from app.utils.misc import read_config_bool, get_client_ip, get_request_url, \
check_for_update, encrypt_string
from app.utils.widgets import *
from app.utils.results import bold_search_terms,\
add_currency_card, check_currency, get_tabs_content
from app.utils.search import Search, needs_https, has_captcha
from app.utils.session import valid_user_session
from app.utils.bangs import resolve_bang
from app.utils.misc import read_config_bool, get_client_ip, get_request_url
from app.utils.results import add_ip_card, bold_search_terms,\
add_currency_card, check_currency
from app.utils.search import *
from app.utils.session import generate_user_key, valid_user_session
from bs4 import BeautifulSoup as bsoup
from flask import jsonify, make_response, request, redirect, render_template, \
send_file, session, url_for, g
from requests import exceptions
send_file, session, url_for
from requests import exceptions, get
from requests.models import PreparedRequest
from cryptography.fernet import Fernet, InvalidToken
from cryptography.exceptions import InvalidSignature
from werkzeug.datastructures import MultiDict
ac_var = 'WHOOGLE_AUTOCOMPLETE'
autocomplete_enabled = os.getenv(ac_var, '1')
# Load DDG bang json files only on init
bang_json = json.load(open(app.config['BANG_FILE'])) or {}
# Check the newest version of WHOOGLE
update = bsoup(get(app.config['RELEASES_URL']).text, 'html.parser')
newest_version = update.select_one('[class="Link--primary"]').string[1:]
current_version = int(''.join(filter(str.isdigit,
app.config['VERSION_NUMBER'])))
newest_version = int(''.join(filter(str.isdigit, newest_version)))
newest_version = '' if current_version >= newest_version \
else newest_version
def get_search_name(tbm):
for tab in app.config['HEADER_TABS'].values():
if tab['tbm'] == tbm:
return tab['name']
ac_var = 'WHOOGLE_AUTOCOMPLETE'
autocomplete_enabled = os.getenv(ac_var, '1')
def auth_required(f):
@wraps(f)
def decorated(*args, **kwargs):
# do not ask password if cookies already present
if (
valid_user_session(session)
and 'cookies_disabled' not in request.args
and session['auth']
):
return f(*args, **kwargs)
auth = request.authorization
# Skip if username/password not set
@ -68,7 +54,6 @@ def auth_required(f):
auth
and whoogle_user == auth.username
and whoogle_pass == auth.password):
session['auth'] = True
return f(*args, **kwargs)
else:
return make_response('Not logged in', 401, {
@ -80,39 +65,27 @@ def auth_required(f):
def session_required(f):
@wraps(f)
def decorated(*args, **kwargs):
if not valid_user_session(session):
if (valid_user_session(session) and
'cookies_disabled' not in request.args):
g.session_key = session['key']
else:
session.pop('_permanent', None)
# Note: This sets all requests to use the encryption key determined per
# instance on app init. This can be updated in the future to use a key
# that is unique for their session (session['key']) but this should use
# a config setting to enable the session based key. Otherwise there can
# be problems with searches performed by users with cookies blocked if
# a session based key is always used.
g.session_key = app.enc_key
g.session_key = app.default_key
# Clear out old sessions
invalid_sessions = []
for user_session in os.listdir(app.config['SESSION_FILE_DIR']):
file_path = os.path.join(
session_path = os.path.join(
app.config['SESSION_FILE_DIR'],
user_session)
try:
# Ignore files that are larger than the max session file size
if os.path.getsize(file_path) > app.config['MAX_SESSION_SIZE']:
continue
with open(file_path, 'rb') as session_file:
with open(session_path, 'rb') as session_file:
_ = pickle.load(session_file)
data = pickle.load(session_file)
if isinstance(data, dict) and 'valid' in data:
continue
invalid_sessions.append(file_path)
except Exception:
# Broad exception handling here due to how instances installed
# with pip seem to have issues storing unrelated files in the
# same directory as sessions
invalid_sessions.append(session_path)
except (EOFError, FileNotFoundError):
pass
for invalid_session in invalid_sessions:
@ -129,36 +102,46 @@ def session_required(f):
@app.before_request
def before_request_func():
session.permanent = True
# Check for latest version if needed
now = datetime.now()
needs_update_check = now - timedelta(hours=24) > app.config['LAST_UPDATE_CHECK']
if read_config_bool('WHOOGLE_UPDATE_CHECK', True) and needs_update_check:
app.config['LAST_UPDATE_CHECK'] = now
app.config['HAS_UPDATE'] = check_for_update(
app.config['RELEASES_URL'],
app.config['VERSION_NUMBER'])
global bang_json
g.request_params = (
request.args if request.method == 'GET' else request.form
)
# Skip pre-request actions if verifying session
if '/session' in request.path and not valid_user_session(session):
return
default_config = json.load(open(app.config['DEFAULT_CONFIG'])) \
if os.path.exists(app.config['DEFAULT_CONFIG']) else {}
# Generate session values for user if unavailable
if not valid_user_session(session):
if (not valid_user_session(session) and
'cookies_disabled' not in request.args):
session['config'] = default_config
session['uuid'] = str(uuid.uuid4())
session['key'] = app.enc_key
session['auth'] = False
# Establish config values per user session
g.user_config = Config(**session['config'])
# Update user config if specified in search args
g.user_config = g.user_config.from_params(g.request_params)
session['key'] = generate_user_key()
# Skip checking for session on any searches that don't
# require a valid session
if (not Endpoint.autocomplete.in_path(request.path) and
not Endpoint.healthz.in_path(request.path) and
not Endpoint.opensearch.in_path(request.path)):
return redirect(url_for(
'session_check',
session_id=session['uuid'],
follow=get_request_url(request.url)), code=307)
else:
g.user_config = Config(**session['config'])
elif 'cookies_disabled' not in request.args:
# Set session as permanent
session.permanent = True
app.permanent_session_lifetime = timedelta(days=365)
g.user_config = Config(**session['config'])
else:
# User has cookies disabled, fall back to immutable default config
session.pop('_permanent', None)
g.user_config = Config(**default_config)
if not g.user_config.url:
g.user_config.url = get_request_url(request.url_root)
@ -170,12 +153,20 @@ def before_request_func():
g.app_location = g.user_config.url
# Attempt to reload bangs json if not generated yet
if not bang_json and os.path.getsize(app.config['BANG_FILE']) > 4:
try:
bang_json = json.load(open(app.config['BANG_FILE']))
except json.decoder.JSONDecodeError:
# Ignore decoding error, can occur if file is still
# being written
pass
@app.after_request
def after_request_func(resp):
resp.headers['X-Content-Type-Options'] = 'nosniff'
resp.headers['X-Frame-Options'] = 'DENY'
resp.headers['Cache-Control'] = 'max-age=86400'
if os.getenv('WHOOGLE_CSP', False):
resp.headers['Content-Security-Policy'] = app.config['CSP']
@ -197,6 +188,19 @@ def healthz():
return ''
@app.route(f'/{Endpoint.session}/<session_id>', methods=['GET', 'PUT', 'POST'])
def session_check(session_id):
if 'uuid' in session and session['uuid'] == session_id:
session['valid'] = True
return redirect(request.args.get('follow'), code=307)
else:
follow_url = request.args.get('follow')
req = PreparedRequest()
req.prepare_url(follow_url, {'cookies_disabled': 1})
session.pop('_permanent', None)
return redirect(req.url, code=307)
@app.route('/', methods=['GET'])
@app.route(f'/{Endpoint.home}', methods=['GET'])
@auth_required
@ -208,10 +212,9 @@ def index():
return render_template('error.html', error_message=error_message)
return render_template('index.html',
has_update=app.config['HAS_UPDATE'],
newest_version=newest_version,
languages=app.config['LANGUAGES'],
countries=app.config['COUNTRIES'],
time_periods=app.config['TIME_PERIODS'],
themes=app.config['THEMES'],
autocomplete_enabled=autocomplete_enabled,
translation=app.config['TRANSLATIONS'][
@ -222,7 +225,8 @@ def index():
dark=g.user_config.dark),
config_disabled=(
app.config['CONFIG_DISABLE'] or
not valid_user_session(session)),
not valid_user_session(session) or
'cookies_disabled' in request.args),
config=g.user_config,
tor_available=int(os.environ.get('TOR_AVAILABLE')),
version_number=app.config['VERSION_NUMBER'])
@ -244,10 +248,8 @@ def opensearch():
return render_template(
'opensearch.xml',
main_url=opensearch_url,
request_type='' if get_only else 'method="post"',
search_type=request.args.get('tbm'),
search_name=get_search_name(request.args.get('tbm'))
), 200, {'Content-Type': 'application/xml'}
request_type='' if get_only else 'method="post"'
), 200, {'Content-Disposition': 'attachment; filename="opensearch.xml"'}
@app.route(f'/{Endpoint.search_html}', methods=['GET'])
@ -271,7 +273,8 @@ def autocomplete():
# Search bangs if the query begins with "!", but not "! " (feeling lucky)
if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
return jsonify([q, suggest_bang(q)])
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if
_.startswith(q)]])
if not q and not request.data:
return jsonify({'?': []})
@ -288,22 +291,19 @@ def autocomplete():
g.user_request.autocomplete(q) if not g.user_config.tor else []
])
@app.route(f'/{Endpoint.search}', methods=['GET', 'POST'])
@session_required
@auth_required
def search():
if request.method == 'POST':
# Redirect as a GET request with an encrypted query
post_data = MultiDict(request.form)
post_data['q'] = encrypt_string(g.session_key, post_data['q'])
get_req_str = urlparse.urlencode(post_data)
return redirect(url_for('.search') + '?' + get_req_str)
# Update user config if specified in search args
g.user_config = g.user_config.from_params(g.request_params)
search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query()
bang = resolve_bang(query)
if bang:
bang = resolve_bang(query=query, bangs_dict=bang_json)
if bang != '':
return redirect(bang)
# Redirect to home if invalid/blank search
@ -329,16 +329,8 @@ def search():
translation = app.config['TRANSLATIONS'][localization_lang]
translate_to = localization_lang.replace('lang_', '')
# removing st-card to only use whoogle time selector
soup = bsoup(response, "html.parser");
for x in soup.find_all(attrs={"id": "st-card"}):
x.replace_with("")
response = str(soup)
# Return 503 if temporarily blocked by captcha
if has_captcha(str(response)):
app.logger.error('503 (CAPTCHA)')
return render_template(
'error.html',
blocked=True,
@ -347,43 +339,25 @@ def search():
farside='https://farside.link',
config=g.user_config,
query=urlparse.unquote(query),
params=g.user_config.to_params(keys=['preferences'])), 503
params=g.user_config.to_params()), 503
response = bold_search_terms(response, query)
# check for widgets and add if requested
if search_util.widget != '':
# Feature to display IP address
if search_util.check_kw_ip():
html_soup = bsoup(str(response), 'html.parser')
if search_util.widget == 'ip':
response = add_ip_card(html_soup, get_client_ip(request))
elif search_util.widget == 'calculator' and not 'nojs' in request.args:
response = add_calculator_card(html_soup)
# Update tabs content
tabs = get_tabs_content(app.config['HEADER_TABS'],
search_util.full_query,
search_util.search_type,
g.user_config.preferences,
translation)
response = add_ip_card(html_soup, get_client_ip(request))
# Feature to display currency_card
# Since this is determined by more than just the
# query is it not defined as a standard widget
conversion = check_currency(str(response))
if conversion:
html_soup = bsoup(str(response), 'html.parser')
response = add_currency_card(html_soup, conversion)
preferences = g.user_config.preferences
home_url = f"home?preferences={preferences}" if preferences else "home"
cleanresponse = str(response).replace("andlt;","&lt;").replace("andgt;","&gt;")
return render_template(
'display.html',
has_update=app.config['HAS_UPDATE'],
newest_version=newest_version,
query=urlparse.unquote(query),
search_type=search_util.search_type,
search_name=get_search_name(search_util.search_type),
config=g.user_config,
autocomplete_enabled=autocomplete_enabled,
lingva_url=app.config['TRANSLATE_URL'],
@ -397,21 +371,17 @@ def search():
is_translation=any(
_ in query.lower() for _ in [translation['translate'], 'translate']
) and not search_util.search_type, # Standard search queries only
response=cleanresponse,
response=response,
version_number=app.config['VERSION_NUMBER'],
search_header=render_template(
search_header=(render_template(
'header.html',
home_url=home_url,
config=g.user_config,
translation=translation,
languages=app.config['LANGUAGES'],
countries=app.config['COUNTRIES'],
time_periods=app.config['TIME_PERIODS'],
logo=render_template('logo.html', dark=g.user_config.dark),
query=urlparse.unquote(query),
search_type=search_util.search_type,
mobile=g.user_request.mobile,
tabs=tabs)).replace(" ", "")
mobile=g.user_request.mobile)
if 'isch' not in
search_util.search_type else '')), 200
@app.route(f'/{Endpoint.config}', methods=['GET', 'POST', 'PUT'])
@ -421,18 +391,13 @@ def config():
config_disabled = (
app.config['CONFIG_DISABLE'] or
not valid_user_session(session))
name = ''
if 'name' in request.args:
name = os.path.normpath(request.args.get('name'))
if not re.match(r'^[A-Za-z0-9_.+-]+$', name):
return make_response('Invalid config name', 400)
if request.method == 'GET':
return json.dumps(g.user_config.__dict__)
elif request.method == 'PUT' and not config_disabled:
if name:
config_pkl = os.path.join(app.config['CONFIG_PATH'], name)
if 'name' in request.args:
config_pkl = os.path.join(
app.config['CONFIG_PATH'],
request.args.get('name'))
session['config'] = (pickle.load(open(config_pkl, 'rb'))
if os.path.exists(config_pkl)
else session['config'])
@ -450,7 +415,7 @@ def config():
config_data,
open(os.path.join(
app.config['CONFIG_PATH'],
name), 'wb'))
request.args.get('name')), 'wb'))
session['config'] = config_data
return redirect(config_data['url'])
@ -458,6 +423,22 @@ def config():
return redirect(url_for('.index'), code=403)
@app.route(f'/{Endpoint.url}', methods=['GET'])
@session_required
@auth_required
def url():
if 'url' in request.args:
return redirect(request.args.get('url'))
q = request.args.get('q')
if len(q) > 0 and 'http' in q:
return redirect(q)
else:
return render_template(
'error.html',
error_message='Unable to resolve query: ' + q)
@app.route(f'/{Endpoint.imgres}')
@session_required
@auth_required
@ -469,35 +450,12 @@ def imgres():
@session_required
@auth_required
def element():
element_url = src_url = request.args.get('url')
if element_url.startswith('gAAAAA'):
try:
cipher_suite = Fernet(g.session_key)
src_url = cipher_suite.decrypt(element_url.encode()).decode()
except (InvalidSignature, InvalidToken) as e:
return render_template(
'error.html',
error_message=str(e)), 401
cipher_suite = Fernet(g.session_key)
src_url = cipher_suite.decrypt(request.args.get('url').encode()).decode()
src_type = request.args.get('type')
# Ensure requested element is from a valid domain
domain = urlparse.urlparse(src_url).netloc
if not validators.domain(domain):
return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
try:
response = g.user_request.send(base_url=src_url)
# Display an empty gif if the requested element couldn't be retrieved
if response.status_code != 200 or len(response.content) == 0:
if 'favicon' in src_url:
favicon = fetch_favicon(src_url)
return send_file(io.BytesIO(favicon), mimetype='image/png')
else:
return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
file_data = response.content
file_data = g.user_request.send(base_url=src_url).content
tmp_mem = io.BytesIO()
tmp_mem.write(file_data)
tmp_mem.seek(0)
@ -506,73 +464,24 @@ def element():
except exceptions.RequestException:
pass
empty_gif = base64.b64decode(
'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
@app.route(f'/{Endpoint.window}')
@session_required
@auth_required
def window():
target_url = request.args.get('location')
if target_url.startswith('gAAAAA'):
cipher_suite = Fernet(g.session_key)
target_url = cipher_suite.decrypt(target_url.encode()).decode()
content_filter = Filter(
g.session_key,
root_url=request.url_root,
config=g.user_config)
target = urlparse.urlparse(target_url)
# Ensure requested URL has a valid domain
if not validators.domain(target.netloc):
return render_template(
'error.html',
error_message='Invalid location'), 400
host_url = f'{target.scheme}://{target.netloc}'
get_body = g.user_request.send(base_url=target_url).text
get_body = g.user_request.send(base_url=request.args.get('location')).text
get_body = get_body.replace('src="/',
'src="' + request.args.get('location') + '"')
get_body = get_body.replace('href="/',
'href="' + request.args.get('location') + '"')
results = bsoup(get_body, 'html.parser')
src_attrs = ['src', 'href', 'srcset', 'data-srcset', 'data-src']
# Parse HTML response and replace relative links w/ absolute
for element in results.find_all():
for attr in src_attrs:
if not element.has_attr(attr) or not element[attr].startswith('/'):
continue
element[attr] = host_url + element[attr]
# Replace or remove javascript sources
for script in results.find_all('script', {'src': True}):
if 'nojs' in request.args:
script.decompose()
else:
content_filter.update_element_src(script, 'application/javascript')
# Replace all possible image attributes
img_sources = ['src', 'data-src', 'data-srcset', 'srcset']
for img in results.find_all('img'):
_ = [
content_filter.update_element_src(img, 'image/png', attr=_)
for _ in img_sources if img.has_attr(_)
]
# Replace all stylesheet sources
for link in results.find_all('link', {'href': True}):
content_filter.update_element_src(link, 'text/css', attr='href')
# Use anonymous view for all links on page
for a in results.find_all('a', {'href': True}):
a['href'] = f'{Endpoint.window}?location=' + a['href'] + (
'&nojs=1' if 'nojs' in request.args else '')
# Remove all iframes -- these are commonly used inside of <noscript> tags
# to enforce loading Google Analytics
for iframe in results.find_all('iframe'):
iframe.decompose()
for script in results('script'):
script.decompose()
return render_template(
'display.html',
@ -583,54 +492,6 @@ def window():
)
@app.route('/robots.txt')
def robots():
response = make_response(
'''User-Agent: *
Disallow: /''', 200)
response.mimetype = 'text/plain'
return response
@app.route('/favicon.ico')
def favicon():
return app.send_static_file('img/favicon.ico')
@app.errorhandler(404)
def page_not_found(e):
return render_template('error.html', error_message=str(e)), 404
@app.errorhandler(Exception)
def internal_error(e):
query = ''
if request.method == 'POST':
query = request.form.get('q')
else:
query = request.args.get('q')
# Attempt to parse the query
try:
search_util = Search(request, g.user_config, g.session_key)
query = search_util.new_search_query()
except Exception:
pass
print(traceback.format_exc(), file=sys.stderr)
localization_lang = g.user_config.get_localization_lang()
translation = app.config['TRANSLATIONS'][localization_lang]
return render_template(
'error.html',
error_message='Internal server error (500)',
translation=translation,
farside='https://farside.link',
config=g.user_config,
query=urlparse.unquote(query),
params=g.user_config.to_params(keys=['preferences'])), 500
def run_app() -> None:
parser = argparse.ArgumentParser(
description='Whoogle Search console runner')
@ -644,16 +505,6 @@ def run_app() -> None:
default='127.0.0.1',
metavar='<ip address>',
help='Specifies the host address to use (default 127.0.0.1)')
parser.add_argument(
'--unix-socket',
default='',
metavar='</path/to/unix.sock>',
help='Listen for app on unix socket instead of host:port')
parser.add_argument(
'--unix-socket-perms',
default='600',
metavar='<octal permissions>',
help='Octal permissions to use for the Unix domain socket (default 600)')
parser.add_argument(
'--debug',
default=False,
@ -704,10 +555,5 @@ def run_app() -> None:
if args.debug:
app.run(host=args.host, port=args.port, debug=args.debug)
elif args.unix_socket:
waitress.serve(app, unix_socket=args.unix_socket, unix_socket_perms=args.unix_socket_perms)
else:
waitress.serve(
app,
listen="{}:{}".format(args.host, args.port),
url_prefix=os.environ.get('WHOOGLE_URL_PREFIX', ''))
waitress.serve(app, listen="{}:{}".format(args.host, args.port))

@ -1,14 +0,0 @@
{
"!i": {
"url": "search?q={}&tbm=isch",
"suggestion": "!i (Whoogle Images)"
},
"!v": {
"url": "search?q={}&tbm=vid",
"suggestion": "!v (Whoogle Videos)"
},
"!n": {
"url": "search?q={}&tbm=nws",
"suggestion": "!n (Whoogle News)"
}
}

@ -22,11 +22,6 @@ li {
color: var(--whoogle-dark-text) !important;
}
.anon-view {
color: var(--whoogle-dark-text) !important;
text-decoration: underline;
}
textarea {
background: var(--whoogle-dark-page-bg) !important;
color: var(--whoogle-dark-text) !important;
@ -64,28 +59,19 @@ select {
.ZINbbc {
overflow: hidden;
box-shadow: 0 0 0 0 !important;
background-color: var(--whoogle-dark-result-bg) !important;
margin-bottom: 10px !important;
border-radius: 8px !important;
}
.KP7LCb {
box-shadow: 0 0 0 0 !important;
background-color: var(--whoogle-dark-result-bg) !important;
}
.BVG0Nb {
box-shadow: 0 0 0 0 !important;
background-color: var(--whoogle-dark-page-bg) !important;
background-color: var(--whoogle-dark-result-bg) !important;
}
.ZINbbc.luh4tb {
background: var(--whoogle-dark-result-bg) !important;
margin-bottom: 24px !important;
.bRsWnc {
background-color: var(--whoogle-dark-result-bg) !important;
}
.bRsWnc {
background-color: var(--whoogle-dark-result-bg) !important;
.BVG0Nb {
background-color: var(--whoogle-dark-page-bg) !important;
}
.x54gtf {
@ -105,7 +91,7 @@ select {
}
.sa1toc {
background: var(--whoogle-dark-page-bg) !important;
background: var(--whoogle-dark-element-bg) !important;
}
#search-bar {
@ -143,7 +129,7 @@ select {
color: var(--whoogle-dark-contrast-text) !important;
}
.content, .result-config {
.content {
background-color: var(--whoogle-dark-element-bg) !important;
color: var(--whoogle-contrast-text) !important;
}
@ -212,7 +198,3 @@ path {
.cb:focus {
color: var(--whoogle-dark-contrast-text) !important;
}
.desktop-header, .mobile-header {
background-color: var(--whoogle-dark-result-bg) !important;
}

@ -13,12 +13,6 @@ header {
border-radius: 2px 0 0 0;
}
.result-config {
margin-bottom: 10px;
padding: 10px;
border-radius: 8px;
}
.mobile-logo {
font: 22px/36px Futura, Arial, sans-serif;
padding-left: 5px;
@ -104,11 +98,6 @@ header {
border: 0px !important;
}
.autocomplete-mobile{
display: -webkit-box;
width: 100%;
}
.desktop-header-logo {
height: 1.65em;
}
@ -117,134 +106,3 @@ header {
width: 100%;
flex: 1
}
a {
color: #1967D2;
text-decoration: none;
tap-highlight-color: rgba(0, 0, 0, .10);
}
.header-tab-div {
border-radius: 0 0 8px 8px;
box-shadow: 0 2px 3px rgba(32, 33, 36, 0.18);
overflow: hidden;
margin-bottom: 10px;
}
.header-tab-div-2 {
border-top: 1px solid #dadce0;
height: 39px;
overflow: hidden;
}
.header-tab-div-3 {
height: 51px;
overflow-x: auto;
overflow-y: hidden;
}
.desktop-header {
height: 39px;
display: box;
display: flex;
width: 100%;
}
.header-tab {
box-pack: justify;
font-size: 14px;
line-height: 37px;
justify-content: space-between;
}
.desktop-header a, .desktop-header span {
color: #70757a;
display: block;
flex: none;
padding: 0 16px;
text-align: center;
text-transform: uppercase;
}
span.header-tab-span {
border-bottom: 2px solid #4285f4;
color: #4285f4;
font-weight: bold;
}
.mobile-header {
height: 39px;
display: box;
display: flex;
overflow-x: scroll;
width: 100%;
padding-left: 12px;
}
.mobile-header a, .mobile-header span {
color: #70757a;
text-decoration: none;
display: inline-block;
/* padding: 8px 12px 8px 12px; */
}
span.mobile-tab-span {
border-bottom: 2px solid #202124;
color: #202124;
height: 26px;
/* margin: 0 12px; */
/* padding: 0; */
}
.desktop-header input {
margin: 2px 4px 2px 8px;
}
a.header-tab-a:visited {
color: #70757a;
}
.header-tab-div-end {
border-left: 1px solid rgba(0, 0, 0, 0.12);
}
.adv-search {
font-size: 30px;
}
.adv-search:hover {
cursor: pointer;
}
#adv-search-toggle {
display: none;
}
.result-collapsible {
max-height: 0px;
overflow: hidden;
transition: max-height .25s linear;
}
.search-bar-input {
display: block;
font-size: 16px;
padding: 0 0 0 8px;
flex: 1;
height: 35px;
outline: none;
border: none;
width: 100%;
-webkit-tap-highlight-color: rgba(0, 0, 0, 0);
overflow: hidden;
}
#result-country {
max-width: 200px;
}
@media (max-width: 801px) {
.header-tab-div {
margin-bottom: 10px !important
}
}

@ -22,11 +22,6 @@ li {
color: var(--whoogle-text) !important;
}
.anon-view {
color: var(--whoogle-text) !important;
text-decoration: underline;
}
textarea {
background: var(--whoogle-page-bg) !important;
color: var(--whoogle-text) !important;
@ -39,23 +34,15 @@ select {
.ZINbbc {
overflow: hidden;
background-color: var(--whoogle-result-bg) !important;
margin-bottom: 10px !important;
border-radius: 8px !important;
box-shadow: 0 1px 6px rgba(32,33,36,0.28) !important;
background-color: var(--whoogle-result-bg) !important;
}
.BVG0Nb {
background-color: var(--whoogle-result-bg) !important;
}
.ZINbbc.luh4tb {
background: var(--whoogle-result-bg) !important;
margin-bottom: 24px !important;
background-color: var(--whoogle-result-bg) !important;
}
.bRsWnc {
background-color: var(--whoogle-result-bg) !important;
background-color: var(--whoogle-result-bg) !important;
}
.x54gtf {
@ -129,7 +116,7 @@ input {
color: var(--whoogle-contrast-text) !important;
}
.content, .result-config {
.content {
background-color: var(--whoogle-element-bg) !important;
color: var(--whoogle-contrast-text) !important;
}
@ -199,7 +186,3 @@ path {
.cb:focus {
color: var(--whoogle-text) !important;
}
.desktop-header, .mobile-header {
background-color: var(--whoogle-result-bg) !important;
}

@ -183,8 +183,4 @@ details summary {
select {
width: 100%;
}
#search-bar {
font-size: 20px;
}
}

@ -1,12 +1,3 @@
body {
display: block !important;
margin: auto !important;
}
.vvjwJb {
font-size: 16px !important;
}
.autocomplete {
position: relative;
display: inline-block;
@ -31,9 +22,8 @@ body {
}
details summary {
margin-bottom: 20px;
padding: 10px;
font-weight: bold;
padding-left: 10px;
}
details summary span {
@ -53,39 +43,3 @@ details summary span {
.ip-text-div {
padding-top: 0 !important;
}
.footer {
text-align: center;
}
.site-favicon {
float: left;
width: 25px;
padding-right: 5px;
}
.has-favicon .sCuL3 {
padding-left: 30px;
}
#flex_text_audio_icon_chunk {
display: none;
}
audio {
display: block;
margin-right: auto;
padding-bottom: 5px;
}
@media (min-width: 801px) {
body {
min-width: 736px !important;
}
}
@media (max-width: 801px) {
details summary {
margin-bottom: 10px !important
}
}

@ -1,9 +1,4 @@
let searchInput;
let currentFocus;
let originalSearch;
let autocompleteResults;
const handleUserInput = () => {
const handleUserInput = searchBar => {
let xhrRequest = new XMLHttpRequest();
xhrRequest.open("POST", "autocomplete");
xhrRequest.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
@ -14,114 +9,118 @@ const handleUserInput = () => {
}
// Fill autocomplete with fetched results
autocompleteResults = JSON.parse(xhrRequest.responseText)[1];
updateAutocompleteList();
let autocompleteResults = JSON.parse(xhrRequest.responseText);
autocomplete(searchBar, autocompleteResults[1]);
};
xhrRequest.send('q=' + searchInput.value);
xhrRequest.send('q=' + searchBar.value);
};
const removeActive = suggestion => {
// Remove "autocomplete-active" class from previously active suggestion
for (let i = 0; i < suggestion.length; i++) {
suggestion[i].classList.remove("autocomplete-active");
}
};
const autocomplete = (searchInput, autocompleteResults) => {
let currentFocus;
let originalSearch;
const addActive = (suggestion) => {
// Handle navigation outside of suggestion list
if (!suggestion || !suggestion[currentFocus]) {
if (currentFocus >= suggestion.length) {
// Move selection back to the beginning
currentFocus = 0;
} else if (currentFocus < 0) {
// Retrieve original search and remove active suggestion selection
currentFocus = -1;
searchInput.value = originalSearch;
removeActive(suggestion);
return;
} else {
return;
}
}
searchInput.addEventListener("input", function () {
let autocompleteList, autocompleteItem, i, val = this.value;
closeAllLists();
removeActive(suggestion);
suggestion[currentFocus].classList.add("autocomplete-active");
if (!val || !autocompleteResults) {
return false;
}
// Autofill search bar with suggestion content (minus the "bang name" if using a bang operator)
let searchContent = suggestion[currentFocus].textContent;
if (searchContent.indexOf('(') > 0) {
searchInput.value = searchContent.substring(0, searchContent.indexOf('('));
} else {
searchInput.value = searchContent;
}
currentFocus = -1;
autocompleteList = document.createElement("div");
autocompleteList.setAttribute("id", this.id + "-autocomplete-list");
autocompleteList.setAttribute("class", "autocomplete-items");
this.parentNode.appendChild(autocompleteList);
for (i = 0; i < autocompleteResults.length; i++) {
if (autocompleteResults[i].substr(0, val.length).toUpperCase() === val.toUpperCase()) {
autocompleteItem = document.createElement("div");
autocompleteItem.innerHTML = "<strong>" + autocompleteResults[i].substr(0, val.length) + "</strong>";
autocompleteItem.innerHTML += autocompleteResults[i].substr(val.length);
autocompleteItem.innerHTML += "<input type=\"hidden\" value=\"" + autocompleteResults[i] + "\">";
autocompleteItem.addEventListener("click", function () {
searchInput.value = this.getElementsByTagName("input")[0].value;
closeAllLists();
document.getElementById("search-form").submit();
});
autocompleteList.appendChild(autocompleteItem);
}
}
});
searchInput.focus();
};
searchInput.addEventListener("keydown", function (e) {
let suggestion = document.getElementById(this.id + "-autocomplete-list");
if (suggestion) suggestion = suggestion.getElementsByTagName("div");
if (e.keyCode === 40) { // down
e.preventDefault();
currentFocus++;
addActive(suggestion);
} else if (e.keyCode === 38) { //up
e.preventDefault();
currentFocus--;
addActive(suggestion);
} else if (e.keyCode === 13) { // enter
e.preventDefault();
if (currentFocus > -1) {
if (suggestion) suggestion[currentFocus].click();
}
} else {
originalSearch = document.getElementById("search-bar").value;
}
});
const autocompleteInput = (e) => {
// Handle navigation between autocomplete suggestions
let suggestion = document.getElementById("autocomplete-list");
if (suggestion) suggestion = suggestion.getElementsByTagName("div");
if (e.keyCode === 40) { // down
e.preventDefault();
currentFocus++;
addActive(suggestion);
} else if (e.keyCode === 38) { //up
e.preventDefault();
currentFocus--;
addActive(suggestion);
} else if (e.keyCode === 13) { // enter
e.preventDefault();
if (currentFocus > -1) {
if (suggestion) suggestion[currentFocus].click();
const addActive = suggestion => {
let searchBar = document.getElementById("search-bar");
// Handle navigation outside of suggestion list
if (!suggestion || !suggestion[currentFocus]) {
if (currentFocus >= suggestion.length) {
// Move selection back to the beginning
currentFocus = 0;
} else if (currentFocus < 0) {
// Retrieve original search and remove active suggestion selection
currentFocus = -1;
searchBar.value = originalSearch;
removeActive(suggestion);
return;
} else {
return;
}
}
} else {
originalSearch = searchInput.value;
}
};
const updateAutocompleteList = () => {
let autocompleteItem, i;
let val = originalSearch;
let autocompleteList = document.getElementById("autocomplete-list");
autocompleteList.innerHTML = "";
if (!val || !autocompleteResults) {
return false;
}
currentFocus = -1;
for (i = 0; i < autocompleteResults.length; i++) {
if (autocompleteResults[i].substr(0, val.length).toUpperCase() === val.toUpperCase()) {
autocompleteItem = document.createElement("div");
autocompleteItem.setAttribute("class", "autocomplete-item");
autocompleteItem.innerHTML = "<strong>" + autocompleteResults[i].substr(0, val.length) + "</strong>";
autocompleteItem.innerHTML += autocompleteResults[i].substr(val.length);
autocompleteItem.innerHTML += "<input type=\"hidden\" value=\"" + autocompleteResults[i] + "\">";
autocompleteItem.addEventListener("click", function () {
searchInput.value = this.getElementsByTagName("input")[0].value;
autocompleteList.innerHTML = "";
document.getElementById("search-form").submit();
});
autocompleteList.appendChild(autocompleteItem);
removeActive(suggestion);
suggestion[currentFocus].classList.add("autocomplete-active");
// Autofill search bar with suggestion content (minus the "bang name" if using a bang operator)
let searchContent = suggestion[currentFocus].textContent;
if (searchContent.indexOf('(') > 0) {
searchBar.value = searchContent.substring(0, searchContent.indexOf('('));
} else {
searchBar.value = searchContent;
}
}
};
document.addEventListener("DOMContentLoaded", function() {
let autocompleteList = document.createElement("div");
autocompleteList.setAttribute("id", "autocomplete-list");
autocompleteList.setAttribute("class", "autocomplete-items");
searchBar.focus();
};
searchInput = document.getElementById("search-bar");
searchInput.parentNode.appendChild(autocompleteList);
const removeActive = suggestion => {
for (let i = 0; i < suggestion.length; i++) {
suggestion[i].classList.remove("autocomplete-active");
}
};
searchInput.addEventListener("keydown", (event) => autocompleteInput(event));
const closeAllLists = el => {
let suggestions = document.getElementsByClassName("autocomplete-items");
for (let i = 0; i < suggestions.length; i++) {
if (el !== suggestions[i] && el !== searchInput) {
suggestions[i].parentNode.removeChild(suggestions[i]);
}
}
};
// Close lists and search when user selects a suggestion
document.addEventListener("click", function (e) {
autocompleteList.innerHTML = "";
closeAllLists(e.target);
});
});
};

@ -2,8 +2,6 @@ const setupSearchLayout = () => {
// Setup search field
const searchBar = document.getElementById("search-bar");
const searchBtn = document.getElementById("search-submit");
const arrowKeys = [37, 38, 39, 40];
let searchValue = searchBar.value;
// Automatically focus on search field
searchBar.focus();
@ -13,9 +11,8 @@ const setupSearchLayout = () => {
if (event.keyCode === 13) {
event.preventDefault();
searchBtn.click();
} else if (searchBar.value !== searchValue && !arrowKeys.includes(event.keyCode)) {
searchValue = searchBar.value;
handleUserInput();
} else {
handleUserInput(searchBar);
}
});
};

@ -1,67 +1,11 @@
document.addEventListener("DOMContentLoaded", () => {
const advSearchToggle = document.getElementById("adv-search-toggle");
const advSearchDiv = document.getElementById("adv-search-div");
const searchBar = document.getElementById("search-bar");
const countrySelect = document.getElementById("result-country");
const timePeriodSelect = document.getElementById("result-time-period");
const arrowKeys = [37, 38, 39, 40];
let searchValue = searchBar.value;
countrySelect.onchange = () => {
let str = window.location.href;
n = str.lastIndexOf("/search");
if (n > 0) {
str = str.substring(0, n) + `/search?q=${searchBar.value}`;
str = tackOnParams(str);
window.location.href = str;
}
}
timePeriodSelect.onchange = () => {
let str = window.location.href;
n = str.lastIndexOf("/search");
if (n > 0) {
str = str.substring(0, n) + `/search?q=${searchBar.value}`;
str = tackOnParams(str);
window.location.href = str;
}
}
function tackOnParams(str) {
if (timePeriodSelect.value != "") {
str = str + `&tbs=${timePeriodSelect.value}`;
}
if (countrySelect.value != "") {
str = str + `&country=${countrySelect.value}`;
}
return str;
}
const toggleAdvancedSearch = on => {
if (on) {
advSearchDiv.style.maxHeight = "70px";
searchBar.addEventListener("keyup", function (event) {
if (event.keyCode !== 13) {
handleUserInput(searchBar);
} else {
advSearchDiv.style.maxHeight = "0px";
}
localStorage.advSearchToggled = on;
}
try {
toggleAdvancedSearch(JSON.parse(localStorage.advSearchToggled));
} catch (error) {
console.warn("Did not recover advanced search toggle state");
}
advSearchToggle.onclick = () => {
toggleAdvancedSearch(advSearchToggle.checked);
}
searchBar.addEventListener("keyup", function(event) {
if (event.keyCode === 13) {
document.getElementById("search-form").submit();
} else if (searchBar.value !== searchValue && !arrowKeys.includes(event.keyCode)) {
searchValue = searchBar.value;
handleUserInput();
}
});
});

@ -1,62 +1,44 @@
(function () {
let searchBar, results;
let shift = false;
const keymap = {
ArrowUp: goUp,
ArrowDown: goDown,
ShiftTab: goUp,
Tab: goDown,
k: goUp,
j: goDown,
'/': focusSearch,
};
let activeIdx = -1;
document.addEventListener('DOMContentLoaded', () => {
searchBar = document.querySelector('#search-bar');
results = document.querySelectorAll('#main>div>div>div>a');
});
document.addEventListener('keydown', (e) => {
if (e.key === 'Shift') {
shift = true;
}
if (e.target.tagName === 'INPUT') return true;
if (typeof keymap[e.key] === 'function') {
e.preventDefault();
keymap[`${shift && e.key == 'Tab' ? 'Shift' : ''}${e.key}`]();
}
});
document.addEventListener('keyup', (e) => {
if (e.key === 'Shift') {
shift = false;
}
});
function goUp () {
if (activeIdx > 0) focusResult(activeIdx - 1);
else focusSearch();
}
function goDown () {
if (activeIdx < results.length - 1) focusResult(activeIdx + 1);
}
function focusResult (idx) {
activeIdx = idx;
results[activeIdx].scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' });
results[activeIdx].focus();
}
function focusSearch () {
if (window.usingCalculator) {
// if this function exists, it means the calculator widget has been displayed
if (usingCalculator()) return;
}
activeIdx = -1;
searchBar.focus();
}
let searchBar, results;
const keymap = {
ArrowUp: goUp,
ArrowDown: goDown,
k: goUp,
j: goDown,
'/': focusSearch,
};
let activeIdx = -1;
document.addEventListener('DOMContentLoaded', () => {
searchBar = document.querySelector('#search-bar');
results = document.querySelectorAll('#main>div>div>div>a');
});
document.addEventListener('keydown', (e) => {
if (e.target.tagName === 'INPUT') return true;
if (typeof keymap[e.key] === 'function') {
e.preventDefault();
keymap[e.key]();
}
});
function goUp () {
if (activeIdx > 0) focusResult(activeIdx - 1);
else focusSearch();
}
function goDown () {
if (activeIdx < results.length - 1) focusResult(activeIdx + 1);
}
function focusResult (idx) {
activeIdx = idx;
results[activeIdx].scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' });
results[activeIdx].focus();
}
function focusSearch () {
activeIdx = -1;
searchBar.focus();
}
}());

@ -1,10 +1,6 @@
const checkForTracking = () => {
const mainDiv = document.getElementById("main");
const searchBar = document.getElementById("search-bar");
// some pages (e.g. images) do not have these
if (!mainDiv || !searchBar)
return;
const query = searchBar.value.replace(/\s+/g, '');
const query = document.getElementById("search-bar").value.replace(/\s+/g, '');
// Note: regex functions for checking for tracking queries were derived
// from here -- https://stackoverflow.com/questions/619977
@ -63,14 +59,11 @@ document.addEventListener("DOMContentLoaded", function() {
checkForTracking();
// Clear input if reset button tapped
const searchBar = document.getElementById("search-bar");
const search = document.getElementById("search-bar");
const resetBtn = document.getElementById("search-reset");
// some pages (e.g. images) do not have these
if (!searchBar || !resetBtn)
return;
resetBtn.addEventListener("click", event => {
event.preventDefault();
searchBar.value = "";
searchBar.focus();
search.value = "";
search.focus();
});
});

@ -1,32 +0,0 @@
{
"all": {
"tbm": null,
"href": "search?q={query}",
"name": "All",
"selected": true
},
"images": {
"tbm": "isch",
"href": "search?q={query}",
"name": "Images",
"selected": false
},
"maps": {
"tbm": null,
"href": "https://maps.google.com/maps?q={map_query}",
"name": "Maps",
"selected": false
},
"videos": {
"tbm": "vid",
"href": "search?q={query}",
"name": "Videos",
"selected": false
},
"news": {
"tbm": "nws",
"href": "search?q={query}",
"name": "News",
"selected": false
}
}

@ -4,7 +4,6 @@
{"name": "Afrikaans (Afrikaans)", "value": "lang_af"},
{"name": "Arabic (عربى)", "value": "lang_ar"},
{"name": "Armenian (հայերեն)", "value": "lang_hy"},
{"name": "Azerbaijani (Azərbaycanca)", "value": "lang_az"},
{"name": "Belarusian (Беларуская)", "value": "lang_be"},
{"name": "Bulgarian (български)", "value": "lang_bg"},
{"name": "Catalan (Català)", "value": "lang_ca"},
@ -29,7 +28,6 @@
{"name": "Italian (Italiano)", "value": "lang_it"},
{"name": "Japanese (日本語)", "value": "lang_ja"},
{"name": "Korean (한국어)", "value": "lang_ko"},
{"name": "Kurdish (Kurdî)", "value": "lang_ku"},
{"name": "Latvian (Latvietis)", "value": "lang_lv"},
{"name": "Lithuanian (Lietuvis)", "value": "lang_lt"},
{"name": "Norwegian (Norwegian)", "value": "lang_no"},
@ -47,9 +45,8 @@
{"name": "Swedish (Svenska)", "value": "lang_sv"},
{"name": "Thai (ไทย)", "value": "lang_th"},
{"name": "Turkish (Türk)", "value": "lang_tr"},
{"name": "Ukrainian (Український)", "value": "lang_uk"},
{"name": "Ukranian (Український)", "value": "lang_uk"},
{"name": "Vietnamese (Tiếng Việt)", "value": "lang_vi"},
{"name": "Welsh (Cymraeg)", "value": "lang_cy"},
{"name": "Xhosa (isiXhosa)", "value": "lang_xh"},
{"name": "Zulu (isiZulu)", "value": "lang_zu"}
]

@ -1,8 +0,0 @@
[
{"name": "Any time", "value": ""},
{"name": "Past hour", "value": "qdr:h"},
{"name": "Past 24 hours", "value": "qdr:d"},
{"name": "Past week", "value": "qdr:w"},
{"name": "Past month", "value": "qdr:m"},
{"name": "Past year", "value": "qdr:y"}
]

File diff suppressed because it is too large Load Diff

@ -1,260 +0,0 @@
<!--
Calculator widget.
This file should contain all required
CSS, HTML, and JS for it.
-->
<style>
#calc-text {
background: var(--whoogle-dark-page-bg);
padding: 8px;
border-radius: 8px;
text-align: right;
font-family: monospace;
font-size: 16px;
color: var(--whoogle-dark-text);
}
#prev-equation {
text-align: right;
}
.error-border {
border: 1px solid red;
}
#calc-btns {
display: grid;
grid-template-columns: repeat(6, 1fr);
grid-template-rows: repeat(5, 1fr);
gap: 5px;
}
#calc-btns button {
background: #313141;
color: var(--whoogle-dark-text);
border: none;
border-radius: 8px;
padding: 8px;
cursor: pointer;
}
#calc-btns button:hover {
background: #414151;
}
#calc-btns .common {
background: #51516a;
}
#calc-btns .common:hover {
background: #61617a;
}
#calc-btn-0 { grid-row: 5; grid-column: 3; }
#calc-btn-1 { grid-row: 4; grid-column: 3; }
#calc-btn-2 { grid-row: 4; grid-column: 4; }
#calc-btn-3 { grid-row: 4; grid-column: 5; }
#calc-btn-4 { grid-row: 3; grid-column: 3; }
#calc-btn-5 { grid-row: 3; grid-column: 4; }
#calc-btn-6 { grid-row: 3; grid-column: 5; }
#calc-btn-7 { grid-row: 2; grid-column: 3; }
#calc-btn-8 { grid-row: 2; grid-column: 4; }
#calc-btn-9 { grid-row: 2; grid-column: 5; }
#calc-btn-EQ { grid-row: 5; grid-column: 5; }
#calc-btn-PT { grid-row: 5; grid-column: 4; }
#calc-btn-BCK { grid-row: 5; grid-column: 6; }
#calc-btn-ADD { grid-row: 4; grid-column: 6; }
#calc-btn-SUB { grid-row: 3; grid-column: 6; }
#calc-btn-MLT { grid-row: 2; grid-column: 6; }
#calc-btn-DIV { grid-row: 1; grid-column: 6; }
#calc-btn-CLR { grid-row: 1; grid-column: 5; }
#calc-btn-PRC{ grid-row: 1; grid-column: 4; }
#calc-btn-RP { grid-row: 1; grid-column: 3; }
#calc-btn-LP { grid-row: 1; grid-column: 2; }
#calc-btn-ABS { grid-row: 1; grid-column: 1; }
#calc-btn-SIN { grid-row: 2; grid-column: 2; }
#calc-btn-COS { grid-row: 3; grid-column: 2; }
#calc-btn-TAN { grid-row: 4; grid-column: 2; }
#calc-btn-SQR { grid-row: 5; grid-column: 2; }
#calc-btn-EXP { grid-row: 2; grid-column: 1; }
#calc-btn-E { grid-row: 3; grid-column: 1; }
#calc-btn-PI { grid-row: 4; grid-column: 1; }
#calc-btn-LOG { grid-row: 5; grid-column: 1; }
</style>
<p id="prev-equation"></p>
<div id="calculator-widget">
<p id="calc-text">0</p>
<div id="calc-btns">
<button id="calc-btn-0" class="common">0</button>
<button id="calc-btn-1" class="common">1</button>
<button id="calc-btn-2" class="common">2</button>
<button id="calc-btn-3" class="common">3</button>
<button id="calc-btn-4" class="common">4</button>
<button id="calc-btn-5" class="common">5</button>
<button id="calc-btn-6" class="common">6</button>
<button id="calc-btn-7" class="common">7</button>
<button id="calc-btn-8" class="common">8</button>
<button id="calc-btn-9" class="common">9</button>
<button id="calc-btn-EQ" class="common">=</button>
<button id="calc-btn-PT" class="common">.</button>
<button id="calc-btn-BCK"></button>
<button id="calc-btn-ADD">+</button>
<button id="calc-btn-SUB">-</button>
<button id="calc-btn-MLT">x</button>
<button id="calc-btn-DIV">/</button>
<button id="calc-btn-CLR">C</button>
<button id="calc-btn-PRC">%</button>
<button id="calc-btn-RP">)</button>
<button id="calc-btn-LP">(</button>
<button id="calc-btn-ABS">|x|</button>
<button id="calc-btn-SIN">sin</button>
<button id="calc-btn-COS">cos</button>
<button id="calc-btn-TAN">tan</button>
<button id="calc-btn-SQR"></button>
<button id="calc-btn-EXP">^</button>
<button id="calc-btn-E"></button>
<button id="calc-btn-PI">π</button>
<button id="calc-btn-LOG">log</button>
</div>
</div>
<script>
// JS does not have this by default.
// from https://www.freecodecamp.org/news/how-to-factorialize-a-number-in-javascript-9263c89a4b38/
function factorial(num) {
if (num < 0)
return -1;
else if (num === 0)
return 1;
else {
return (num * factorial(num - 1));
}
}
// returns true if the user is currently focused on the calculator widget
function usingCalculator() {
let activeElement = document.activeElement;
while (true) {
if (!activeElement) return false;
if (activeElement.id === "calculator-wrapper") return true;
activeElement = activeElement.parentElement;
}
}
const $ = q => document.querySelectorAll(q);
// key bindings for commonly used buttons
const keybindings = {
"0": "0",
"1": "1",
"2": "2",
"3": "3",
"4": "4",
"5": "5",
"6": "6",
"7": "7",
"8": "8",
"9": "9",
"Enter": "EQ",
".": "PT",
"+": "ADD",
"-": "SUB",
"*": "MLT",
"/": "DIV",
"%": "PRC",
"c": "CLR",
"(": "LP",
")": "RP",
"Backspace": "BCK",
}
window.addEventListener("keydown", event => {
if (!usingCalculator()) return;
if (event.key === "Enter" && document.activeElement.id !== "search-bar")
event.preventDefault();
if (keybindings[event.key])
document.getElementById("calc-btn-" + keybindings[event.key]).click();
})
// calculates the string
const calc = () => {
var mathtext = document.getElementById("calc-text");
var statement = mathtext.innerHTML
// remove empty ()
.replace("()", "")
// special constants
.replace("π", "(Math.PI)")
.replace("ℇ", "(Math.E)")
// turns 3(1+2) into 3*(1+2) (for example)
.replace(/(?<=[0-9\)])(?<=[^+\-x*\/%^])\(/, "x(")
// same except reversed
.replace(/\)(?=[0-9\(])(?=[^+\-x*\/%^])/, ")x")
// replace human friendly x with JS *
.replace("x", "*")
// trig & misc functions
.replace("sin", "Math.sin")
.replace("cos", "Math.cos")
.replace("tan", "Math.tan")
.replace("√", "Math.sqrt")
.replace("^", "**")
.replace("abs", "Math.abs")
.replace("log", "Math.log")
;
// add any missing )s to the end
while(true) if (
(statement.match(/\(/g) || []).length >
(statement.match(/\)/g) || []).length
) statement += ")"; else break;
// evaluate the expression.
console.log("calculating [" + statement + "]");
try {
var result = eval(statement);
document.getElementById("prev-equation").innerHTML = mathtext.innerHTML + " = ";
mathtext.innerHTML = result;
mathtext.classList.remove("error-border");
} catch (e) {
mathtext.classList.add("error-border");
console.error(e);
}
}
const updateCalc = (e) => {
// character(s) recieved from button
var c = event.target.innerHTML;
var mathtext = document.getElementById("calc-text");
if (mathtext.innerHTML === "0") mathtext.innerHTML = "";
// special cases
switch (c) {
case "C":
// Clear
mathtext.innerHTML = "0";
break;
case "⬅":
// Delete
mathtext.innerHTML = mathtext.innerHTML.slice(0, -1);
if (mathtext.innerHTML.length === 0) {
mathtext.innerHTML = "0";
}
break;
case "=":
calc()
break;
case "sin":
case "cos":
case "tan":
case "log":
case "√":
mathtext.innerHTML += `${c}(`;
break;
case "|x|":
mathtext.innerHTML += "abs("
break;
case "+":
case "-":
case "x":
case "/":
case "%":
case "^":
if (mathtext.innerHTML.length === 0) mathtext.innerHTML = "0";
// prevent typing 2 operators in a row
if (mathtext.innerHTML.match(/[+\-x\/%^] $/))
mathtext.innerHTML = mathtext.innerHTML.slice(0, -3);
mathtext.innerHTML += ` ${c} `;
break;
default:
mathtext.innerHTML += c;
}
}
for (let i of $("#calc-btns button")) {
i.addEventListener('click', event => {
updateCalc(event);
})
}
</script>

@ -2,11 +2,7 @@
<head>
<link rel="shortcut icon" href="static/img/favicon.ico" type="image/x-icon">
<link rel="icon" href="static/img/favicon.ico" type="image/x-icon">
{% if not search_type %}
<link rel="search" href="opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
{% else %}
<link rel="search" href="opensearch.xml?tbm={{ search_type }}" type="application/opensearchdescription+xml" title="Whoogle Search ({{ search_name }})">
{% endif %}
<link rel="search" href="opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="referrer" content="no-referrer">
<link rel="stylesheet" href="{{ cb_url('logo.css') }}">

@ -16,91 +16,25 @@
<div>
<h1>Error</h1>
<p>
{{ error_message }}
{{ error_message|safe }}
</p>
<hr>
{% if query and translation %}
<p>
<h4><a class="link" href="https://farside.link">{{ translation['continue-search'] }}</a></h4>
<ul>
<li>
<a href="https://github.com/benbusby/whoogle-search">Whoogle</a>
<ul>
<li>
<a class="link-color" href="{{farside}}/whoogle/search?q={{query}}{{params}}">
{{farside}}/whoogle/search?q={{query}}
</a>
</li>
</ul>
</li>
<li>
<a href="https://github.com/searxng/searxng">SearXNG</a>
<ul>
<li>
<a class="link-color" href="{{farside}}/searxng/search?q={{query}}">
{{farside}}/searxng/search?q={{query}}
</a>
</li>
</ul>
</li>
</ul>
<hr>
<h4>Other options:</h4>
<ul>
<li>
<a href="https://kagi.com">Kagi</a>
<ul>
<li>Requires account</li>
<li>
<a class="link-color" href="https://kagi.com/search?q={{query}}">
kagi.com/search?q={{query}}
</a>
</li>
</ul>
</li>
<li>
<a href="https://duckduckgo.com">DuckDuckGo</a>
<ul>
<li>
<a class="link-color" href="https://duckduckgo.com/search?q={{query}}">
duckduckgo.com/search?q={{query}}
</a>
</li>
</ul>
</li>
<li>
<a href="https://search.brave.com">Brave Search</a>
<ul>
<li>
<a class="link-color" href="https://search.brave.com/search?q={{query}}">
search.brave.com/search?q={{query}}
</a>
</li>
</ul>
</li>
<li>
<a href="https://ecosia.com">Ecosia</a>
<ul>
<li>
<a class="link-color" href="https://ecosia.com/search?q={{query}}">
ecosia.com/search?q={{query}}
</a>
</li>
</ul>
</li>
<li>
<a href="https://google.com">Google</a>
<ul>
<li>
<a class="link-color" href="https://google.com/search?q={{query}}">
google.com/search?q={{query}}
</a>
</li>
</ul>
</li>
</ul>
<hr>
</p>
<p>
{% if blocked is defined %}
<h4>{{ translation['continue-search'] }} <a class="link" href="https://github.com/benbusby/farside">Farside</a>!</h4>
Whoogle:
<br>
<a class="link-color" href="{{farside}}/whoogle/search?q={{query}}{{params}}">
{{farside}}/whoogle/search?q={{query}}{{params}}
</a>
<br><br>
Searx:
<br>
<a class="link-color" href="{{farside}}/searx/search?q={{query}}">
{{farside}}/searx/search?q={{query}}
</a>
<hr>
{% endif %}
</p>
<a class="link" href="home">Return Home</a>
</div>

@ -2,7 +2,7 @@
<p class="footer">
Whoogle Search v{{ version_number }} ||
<a class="link" href="https://github.com/benbusby/whoogle-search">{{ translation['github-link'] }}</a>
{% if has_update %}
{% if newest_version %}
|| <span class="update_available">Update Available 🟢</span>
{% endif %}
</p>

@ -1,19 +1,16 @@
{% if mobile %}
<header>
<div class="header-div">
<form class="search-form header"
<div class="bz1lBb header-div">
<form class="search-form Pg70bf"
id="search-form"
method="{{ 'GET' if config.get_only else 'POST' }}">
<a class="logo-link mobile-logo" href="{{ home_url }}">
<a class="logo-link mobile-logo" href="home">
<div id="mobile-header-logo">
{{ logo|safe }}
</div>
</a>
<div class="H0PQec mobile-input-div">
<div class="autocomplete-mobile esbc autocomplete">
{% if config.preferences %}
<input type="hidden" name="preferences" value="{{ config.preferences }}" />
{% endif %}
<div class="sbc esbc autocomplete">
<input
id="search-bar"
class="mobile-search-bar"
@ -21,46 +18,24 @@
autocomplete="off"
autocorrect="off"
spellcheck="false"
class="search-bar-input"
class="noHIxc"
name="q"
type="text"
value="{{ clean_query(query) }}"
dir="auto">
<input id="search-reset" type="reset" value="x">
<input name="tbm" value="{{ search_type }}" style="display: none">
<input name="country" value="{{ config.country }}" style="display: none;">
<input type="submit" style="display: none;">
<div class="sc"></div>
</div>
</div>
</form>
</div>
<div>
<div class="header-tab-div">
<div class="header-tab-div-2">
<div class="header-tab-div-3">
<div class="mobile-header header-tab">
{% for tab_id, tab_content in tabs.items() %}
{% if tab_content['selected'] %}
<span class="mobile-tab-span">{{ tab_content['name'] }}</span>
{% else %}
<a class="header-tab-a" href="{{ tab_content['href'] }}">{{ tab_content['name'] }}</a>
{% endif %}
{% endfor %}
<label for="adv-search-toggle" id="adv-search-label" class="adv-search"></label>
<input id="adv-search-toggle" type="checkbox">
<div class="header-tab-div-end"></div>
</div>
</div>
</div>
</div>
<div class="" id="s">
</div>
</header>
{% else %}
<header>
<div class="logo-div">
<a class="logo-link" href="{{ home_url }}">
<a class="logo-link" href="home">
<div class="desktop-header-logo">
{{ logo|safe }}
</div>
@ -73,23 +48,18 @@
method="{{ 'GET' if config.get_only else 'POST' }}">
<div class="autocomplete header-autocomplete">
<div style="width: 100%; display: flex">
{% if config.preferences %}
<input type="hidden" name="preferences" value="{{ config.preferences }}" />
{% endif %}
<input
id="search-bar"
autocapitalize="none"
autocomplete="off"
autocorrect="off"
class="search-bar-desktop search-bar-input"
class="search-bar-desktop noHIxc"
name="q"
spellcheck="false"
type="text"
value="{{ clean_query(query) }}"
dir="auto">
<input name="tbm" value="{{ search_type }}" style="display: none">
<input name="country" value="{{ config.country }}" style="display: none;">
<input name="tbs" value="{{ config.tbs }}" style="display: none;">
<input type="submit" style="display: none;">
<div class="sc"></div>
</div>
@ -97,62 +67,6 @@
</form>
</div>
</header>
<div>
<div class="header-tab-div">
<div class="header-tab-div-2">
<div class="header-tab-div-3">
<div class="desktop-header header-tab">
{% for tab_id, tab_content in tabs.items() %}
{% if tab_content['selected'] %}
<span class="header-tab-span">{{ tab_content['name'] }}</span>
{% else %}
<a class="header-tab-a" href="{{ tab_content['href'] }}">{{ tab_content['name'] }}</a>
{% endif %}
{% endfor %}
<label for="adv-search-toggle" id="adv-search-label" class="adv-search"></label>
<input id="adv-search-toggle" type="checkbox">
<div class="header-tab-div-end"></div>
</div>
</div>
</div>
</div>
<div class="" id="s">
</div>
{% endif %}
<div class="result-collapsible" id="adv-search-div">
<div class="result-config">
<label for="config-country">{{ translation['config-country'] }}: </label>
<select name="country" id="result-country">
{% for country in countries %}
<option value="{{ country.value }}"
{% if (
config.country != '' and config.country in country.value
) or (
config.country == '' and country.value == '')
%}
selected
{% endif %}>
{{ country.name }}
</option>
{% endfor %}
</select>
<br />
<label for="config-time-period">{{ translation['config-time-period'] }}: </label>
<select name="tbs" id="result-time-period">
{% for time_period in time_periods %}
<option value="{{ time_period.value }}"
{% if (
config.tbs != '' and config.tbs in time_period.value
) or (
config.tbs == '' and time_period.value == '')
%}
selected
{% endif %}>
{{ translation[time_period.value] }}
</option>
{% endfor %}
</select>
</div>
</div>
<script type="text/javascript" src="{{ cb_url('header.js') }}"></script>

@ -1,390 +1,116 @@
<div>
<!DOCTYPE html>
<html>
<head>
<meta content="application/xhtml+xml; charset=utf-8" http-equiv="Content-Type"/>
<meta content="no-cache" name="Cache-Control"/>
<title>
</title>
<style>
html {
font-family: Roboto, Helvetica Neue, Arial, sans-serif;
font-size: 14px;
line-height: 20px;
text-size-adjust: 100%;
color: #3c4043;
word-wrap: break-word;
background-color: #fff;
}
body {
padding: 0 8px;
margin: 0 auto;
max-width: 736px;
}
a {
text-decoration: none;
color: inherit;
}
a:hover {
text-decoration: underline;
}
a img {
border: 0;
}
.FbhRzb {
border-left: thin solid #dadce0;
border-right: thin solid #dadce0;
border-top: thin solid #dadce0;
height: 40px;
overflow: hidden;
}
.n692Zd {
margin-bottom: 10px;
}
.cvifge {
height: 40px;
border-spacing: 0;
width: 100%;
}
.QvGUP {
height: 40px;
padding: 0 8px 0 8px;
vertical-align: top;
}
.O4cRJf {
height: 40px;
width: 100%;
padding: 0;
padding-right: 16px;
}
.O1ePr {
height: 40px;
padding: 0;
vertical-align: top;
}
.kgJEQe {
height: 36px;
width: 98px;
vertical-align: top;
margin-top: 4px;
}
.lXLRf {
vertical-align: top;
}
.MhzMZd {
border: 0;
vertical-align: middle;
font-size: 14px;
height: 40px;
padding: 0;
width: 100%;
padding-left: 16px;
}
.xB0fq {
height: 40px;
border: none;
font-size: 14px;
background-color: #4285f4;
color: #fff;
padding: 0 16px;
margin: 0;
vertical-align: top;
cursor: pointer;
}
.xB0fq:focus {
border: 1px solid #000;
}
.M7pB2 {
border: thin solid #dadce0;
margin: 0 0 3px 0;
font-size: 13px;
font-weight: 500;
height: 40px;
}
.euZec {
width: 100%;
height: 40px;
text-align: center;
border-spacing: 0;
}
table.euZec td {
padding: 0;
width: 25%;
}
.QIqI7 {
display: inline-block;
padding-top: 4px;
font-weight: bold;
color: #4285f4;
}
.EY24We {
border-bottom: 2px solid #4285f4;
}
.CsQyDc {
display: inline-block;
color: #70757a;
}
.TuS8Ad {
font-size: 14px;
}
.HddGcc {
padding: 8px;
color: #70757a;
}
.dzp8ae {
font-weight: bold;
color: #3c4043;
}
.rEM8G {
color: #70757a;
}
.bookcf {
table-layout: fixed;
width: 100%;
border-spacing: 0;
}
.InWNIe {
text-align: center;
}
.uZgmoc {
border: thin solid #dadce0;
color: #70757a;
font-size: 14px;
text-align: center;
table-layout: fixed;
width: 100%;
}
.frGj1b {
display: block;
padding: 12px 0 12px 0;
width: 100%;
}
.BnJWBc {
text-align: center;
padding: 6px 0 13px 0;
height: 35px;
}
.e3goi {
vertical-align: top;
padding: 0;
height: 180px;
}
.GpQGbf {
margin: auto;
border-collapse: collapse;
border-spacing: 0;
width: 100%;
}
.X6ZCif {
color: #202124;
font-size: 11px;
line-height: 16px;
display: inline-block;
padding-top: 2px;
overflow: hidden;
padding-bottom: 4px;
width: 100%;
}
.TwVfHd {
border-radius: 16px;
border: thin solid #dadce0;
display: inline-block;
padding: 8px 8px;
margin-right: 8px;
margin-bottom: 4px;
}
.yekiAe {
background-color: #dadce0;
}
.svla5d {
width: 100%;
}
.ezO2md {
border: thin solid #dadce0;
padding: 12px 16px 12px 16px;
margin-bottom: 10px;
font-family: Roboto, Helvetica, Arial, sans-serif;
}
.TxbwNb {
border-spacing: 0;
}
.K35ahc {
width: 100%;
}
.owohpf {
text-align: center;
}
.RAyV4b {
width: 162px;
height: 140px;
line-height: 140px;
overflow: "hidden";
text-align: center;
}
.t0fcAb {
text-align: center;
margin: auto;
vertical-align: middle;
width: 100%;
height: 100%;
object-fit: contain;
}
.Tor4Ec {
padding-top: 2px;
padding-bottom: 8px;
}
.fYyStc {
word-break: break-word;
}
.ynsChf {
display: block;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.Fj3V3b {
color: #1967d2;
font-size: 14px;
line-height: 20px;
}
.FrIlee {
color: #202124;
font-size: 11px;
line-height: 16px;
}
.F9iS2e {
color: #70757a;
font-size: 11px;
line-height: 16px;
}
.WMQ2Le {
color: #70757a;
font-size: 12px;
line-height: 16px;
}
.x3G5ab {
color: #202124;
font-size: 12px;
line-height: 16px;
}
.fuLhoc {
color: #1967d2;
font-size: 18px;
line-height: 24px;
}
.epoveb {
font-size: 32px;
line-height: 40px;
font-weight: 400;
color: #202124;
}
.dXDvrc {
color: #0d652d;
font-size: 14px;
line-height: 20px;
word-wrap: break-word;
}
.dloBPe {
font-weight: bold;
}
.YVIcad {
color: #70757a;
}
.JkVVdd {
color: #ea4335;
}
.oXZRFd {
color: #ea4335;
}
.MQHtg {
color: #fbbc04;
}
.pyMRrb {
color: #1e8e3e;
}
.EtTZid {
color: #1e8e3e;
}
.M3vVJe {
color: #1967d2;
}
.qXLe6d {
display: block;
}
.NHQNef {
font-style: italic;
}
.Cb8Z7c {
white-space: pre;
}
a.ZWRArf {
text-decoration: none;
}
a .CVA68e:hover {
text-decoration: underline;
}
a{text-decoration:none;color:inherit}a:hover{text-decoration:underline}a img{border:0}body{font-family:Roboto,Helvetica,Arial,sans-serif;padding:8px;margin:0 auto;max-width:700px;min-width:240px;}.FbhRzb{border-left:thin solid #dadce0;border-right:thin solid #dadce0;border-top:thin solid #dadce0;height:40px;overflow:hidden}.n692Zd{margin-bottom:10px}.cvifge{height:40px;border-spacing:0;width:100%;}.QvGUP{height:40px;padding:0 8px 0 8px;vertical-align:top}.O4cRJf{height:40px;width:100%;padding:0;padding-right:16px}.O1ePr{height:40px;padding:0;vertical-align:top}.kgJEQe{height:36px;width:98px;vertical-align:top;margin-top:4px}.lXLRf{vertical-align:top}.MhzMZd{border:0;vertical-align:middle;font-size:14px;height:40px;padding:0;width:100%;padding-left:16px}.xB0fq{height:40px;border:none;font-size:14px;background-color:#4285f4;color:#fff;padding:0 16px;margin:0;vertical-align:top;cursor:pointer}.xB0fq:focus{border:1px solid #000}.M7pB2{border:thin solid #dadce0;margin:0 0 3px 0;font-size:13px;font-weight:500;height:40px}.euZec{width:100%;height:40px;text-align:center;border-spacing:0}table.euZec td{padding:0;width:25%}.QIqI7{display:inline-block;padding-top:4px;font-weight:bold;color:#4285f4}.EY24We{border-bottom:2px solid #4285f4}.CsQyDc{display:inline-block;color:#70757a}.TuS8Ad{font-size:14px}.HddGcc{padding:8px;color:#70757a}.dzp8ae{font-weight:bold;color:#3c4043}.rEM8G{color:#70757a}.bookcf{table-layout:fixed;width:100%;border-spacing:0}.InWNIe{text-align:center}.uZgmoc{border:thin solid #dadce0;color:#70757a;font-size:14px;text-align:center;table-layout:fixed;width:100%}.frGj1b{display:block;padding:12px 0 12px 0;width:100%}.BnJWBc{text-align:center;padding:6px 0 13px 0;height:35px}.e3goi{vertical-align:top;padding:0;height:180px}.GpQGbf{margin:auto;border-collapse:collapse;border-spacing:0;width:100%}
</style>
</head>
<body>
<style>
.X6ZCif{color:#202124;font-size:11px;line-height:16px;display:inline-block;padding-top:2px;overflow:hidden;padding-bottom:4px;width:100%}.TwVfHd{border-radius:16px;border:thin solid #dadce0;display:inline-block;padding:8px 8px;margin-right:8px;margin-bottom:4px}.yekiAe{background-color:#dadce0}.svla5d{width:100%}.ezO2md{border:thin solid #dadce0;padding:12px 16px 12px 16px;margin-bottom:10px;font-family:Roboto,Helvetica,Arial,sans-serif}.lIMUZd{font-family:Roboto,Helvetica,Arial,sans-serif}.TxbwNb{border-spacing:0}.K35ahc{width:100%}.owohpf{text-align:center}.RAyV4b{width:162px;height:140px;line-height:140px;overflow:'hidden';text-align:center;}.t0fcAb{text-align:center;margin:auto;vertical-align:middle;width:100%;height:100%;object-fit: contain}.Tor4Ec{padding-top:2px;padding-bottom:8px;}.fYyStc{word-break:break-word}.ynsChf{display:block;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}.Fj3V3b{color:#1967D2;font-size:14px;line-height:20px}.FrIlee{color:#202124;font-size:11px;line-height:16px}.F9iS2e{color:#70757a;font-size:11px;line-height:16px}.WMQ2Le{color:#70757a;font-size:12px;line-height:16px}.x3G5ab{color:#202124;font-size:12px;line-height:16px}.fuLhoc{color:#1967D2;font-size:18px;line-height:24px}.epoveb{font-size:32px;line-height:40px;font-weight:400;color:#202124}.dXDvrc{color:#0d652d;font-size:14px;line-height:20px;word-wrap:break-word}.dloBPe{font-weight:bold}.YVIcad{color:#70757a}.JkVVdd{color:#ea4335}.oXZRFd{color:#ea4335}.MQHtg{color:#fbbc04}.pyMRrb{color:#1e8e3e}.EtTZid{color:#1e8e3e}.M3vVJe{color:#1967D2}.qXLe6d{display:block}.NHQNef{font-style:italic}.Cb8Z7c{white-space:pre}a.ZWRArf{text-decoration:none}a .CVA68e:hover{text-decoration:underline}
</style>
<div class="n692Zd">
<div class="BnJWBc">
<a class="lXLRf" href="/?safe=off&amp;gbv=1&amp;output=images&amp;ie=UTF-8&amp;tbm=isch&amp;sa=X&amp;ved=0ahUKEwjhh7TZyd_vAhWShf0HHeYzCmsQPAgC">
<img alt="Google" class="kgJEQe" src="/images/branding/searchlogo/1x/googlelogo_desk_heirloom_color_150x55dp.gif"/>
</a>
</div>
<div class="FbhRzb">
<form action="/search">
<input name="safe" type="hidden" value="off"/>
<input name="gbv" type="hidden" value="1"/>
<input name="ie" type="hidden" value="ISO-8859-1"/>
<input name="tbm" type="hidden" value="isch"/>
<input name="oq" type="hidden"/>
<input name="aqs" type="hidden"/>
<table class="cvifge">
<tr>
<td class="O4cRJf">
<!-- search input -->
</td>
</tr>
</table>
</form>
</div>
<div class="M7pB2">
<!-- search options -->
</div>
</div>
<!-- <div class="X6ZCif"> Not present in mobile
</div> -->
<div>
<div>
<div>
<div class="lIMUZd">
<table class="By0U9">
<!-- correction suggested -->
</table>
</div>
<div class="lIMUZd">
<table class="By0U9">
<!-- correction suggested -->
</table>
</div>
</div>
</div>
<table class="GpQGbf">
{% for i in range((length // 4) + 1) %}
<tr>
{% for j in range([length - (i*4), 4]|min) %}
<td align="center" class="e3goi">
<td align="center" class="e3goi">
<div class="svla5d">
<div>
<div class="lIMUZd">
<div>
<table class="TxbwNb">
<tr>
<td>
<a href="{{ results[(i*4)+j].web_page }}">
<div class="RAyV4b">
<img
alt=""
class="t0fcAb"
src="{{ results[(i*4)+j].img_tbn }}"
/>
</div>
</a>
</td>
</tr>
<tr>
<td>
<a href="{{ results[(i*4)+j].web_page }}">
<div class="Tor4Ec">
<span class="qXLe6d x3G5ab">
<span class="fYyStc">
{{ results[(i*4)+j].domain }}
</span>
</span>
</div>
</a>
<a href="{{ results[(i*4)+j].img_url }}">
<div class="Tor4Ec">
<span class="qXLe6d F9iS2e">
<span class="fYyStc"> {{ view_label }} </span>
</span>
</div>
</a>
</td>
</tr>
</table>
</div>
<div class="lIMUZd">
<div>
<table class="TxbwNb">
<tr>
<td>
<a href="{{ results[(i*4)+j].web_page }}">
<div class="RAyV4b">
<img alt="" class="t0fcAb" src="{{ results[(i*4)+j].img_tbn }}"/>
</div>
</a>
</td>
</tr>
<tr>
<td>
<a href="{{ results[(i*4)+j].web_page }}">
<div class="Tor4Ec">
<span class="qXLe6d x3G5ab">
<span class="fYyStc">
{{ results[(i*4)+j].domain }}
</span>
</span>
</div>
</a>
<a href="{{ results[(i*4)+j].img_url }}">
<div class="Tor4Ec">
<span class="qXLe6d F9iS2e">
<span class="fYyStc">
{{ view_label }}
</span>
</span>
</div>
</a>
</td>
</tr>
</table>
</div>
</div>
</div>
</div>
</td>
{% endfor %}
</tr>
</td>
{% endfor %}
</tr>
{% endfor %}
</table>
</div>
<table class="uZgmoc">
<!-- next page object -->
</table>
<br />
</div>
<br/>
<div class="TuS8Ad">
<!-- information about user connection -->
<div>
</div>
</body>
</html>

@ -66,9 +66,6 @@
<form id="search-form" action="search" method="{{ 'get' if config.get_only else 'post' }}">
<div class="search-fields">
<div class="autocomplete">
{% if config.preferences %}
<input type="hidden" name="preferences" value="{{ config.preferences }}" />
{% endif %}
<input
type="text"
name="q"
@ -96,11 +93,7 @@
<select name="country" id="config-country">
{% for country in countries %}
<option value="{{ country.value }}"
{% if (
config.country != '' and config.country in country.value
) or (
config.country == '' and country.value == '')
%}
{% if country.value in config.country %}
selected
{% endif %}>
{{ country.name }}
@ -108,23 +101,6 @@
{% endfor %}
</select>
</div>
<div class="config-div">
<label for="config-time-period">{{ translation['config-time-period'] }}</label>
<select name="tbs" id="config-time-period">
{% for time_period in time_periods %}
<option value="{{ time_period.value }}"
{% if (
config.tbs != '' and config.tbs in time_period.value
) or (
config.tbs == '' and time_period.value == '')
%}
selected
{% endif %}>
{{ translation[time_period.value] }}
</option>
{% endfor %}
</select>
</div>
<div class="config-div config-div-lang">
<label for="config-lang-interface">{{ translation['config-lang'] }}: </label>
<select name="lang_interface" id="config-lang-interface">
@ -172,10 +148,6 @@
<input type="text" name="block_url" id="config-block"
placeholder="{{ translation['config-block-url-help'] }}" value="{{ config.block_url }}">
</div>
<div class="config-div config-div-anon-view">
<label for="config-anon-view">{{ translation['config-anon-view'] }}: </label>
<input type="checkbox" name="anon_view" id="config-anon-view" {{ 'checked' if config.anon_view else '' }}>
</div>
<div class="config-div config-div-nojs">
<label for="config-nojs">{{ translation['config-nojs'] }}: </label>
<input type="checkbox" name="nojs" id="config-nojs" {{ 'checked' if config.nojs else '' }}>
@ -228,7 +200,7 @@
<input type="checkbox" name="get_only"
id="config-get-only" {{ 'checked' if config.get_only else '' }}>
</div>
<div class="config-div config-div-accept-language">
<div class="config-div config-div-get-only">
<label for="config-accept-language">Set Accept-Language: </label>
<input type="checkbox" name="accept_language"
id="config-accept-language" {{ 'checked' if config.accept_language else '' }}>
@ -243,21 +215,15 @@
{{ translation['config-css'] }}:
</a>
<textarea
name="style_modified"
name="style"
id="config-style"
autocapitalize="off"
autocomplete="off"
spellcheck="false"
autocorrect="off"
value="">{{ config.style_modified.replace('\t', '') }}</textarea>
</div>
<div class="config-div config-div-pref-url">
<label for="config-pref-encryption">{{ translation['config-pref-encryption'] }}: </label>
<input type="checkbox" name="preferences_encrypted"
id="config-pref-encryption" {{ 'checked' if config.preferences_encrypted and config.preferences_key else '' }}>
<div><span class="info-text"> — {{ translation['config-pref-help'] }}</span></div>
<label for="config-pref-url">{{ translation['config-pref-url'] }}: </label>
<input type="text" name="pref-url" id="config-pref-url" value="{{ config.url }}?preferences={{ config.preferences }}">
value="">
{{ config.style.replace('\t', '') }}
</textarea>
</div>
</div>
<div class="config-div config-buttons">

@ -1,11 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
xmlns:moz="http://www.mozilla.org/2006/browser/search/">
{% if not search_type %}
<ShortName>Whoogle</ShortName>
{% else %}
<ShortName>Whoogle {{ search_name }}</ShortName>
{% endif %}
<ShortName>Whoogle</ShortName>
<Description>Whoogle: A lightweight, deployable Google search proxy for desktop/mobile that removes Javascript, AMP links, and ads
</Description>
<InputEncoding>UTF-8</InputEncoding>
@ -14,9 +10,6 @@
</Image>
<Url type="text/html" {{ request_type|safe }} template="{{ main_url }}/search">
<Param name="q" value="{searchTerms}"/>
{% if search_type %}
<Param name="tbm" value="{{ search_type }}"/>
{% endif %}
</Url>
<Url type="application/x-suggestions+json" {{ request_type|safe }} template="{{ main_url }}/autocomplete">
<Param name="q" value="{searchTerms}"/>

@ -1,4 +1,4 @@
<form id="search-form" action="search" method="post">
<form id="search-form" action="{{ url }}/search" method="post">
<input
type="text"
name="q"

@ -1,56 +1,7 @@
import json
import requests
import urllib.parse as urlparse
import os
import glob
bangs_dict = {}
DDG_BANGS = 'https://duckduckgo.com/bang.js'
def load_all_bangs(ddg_bangs_file: str, ddg_bangs: dict = {}):
"""Loads all the bang files in alphabetical order
Args:
ddg_bangs_file: The str path to the new DDG bangs json file
ddg_bangs: The dict of ddg bangs. If this is empty, it will load the
bangs from the file
Returns:
None
"""
global bangs_dict
ddg_bangs_file = os.path.normpath(ddg_bangs_file)
if (bangs_dict and not ddg_bangs) or os.path.getsize(ddg_bangs_file) <= 4:
return
bangs = {}
bangs_dir = os.path.dirname(ddg_bangs_file)
bang_files = glob.glob(os.path.join(bangs_dir, '*.json'))
# Normalize the paths
bang_files = [os.path.normpath(f) for f in bang_files]
# Move the ddg bangs file to the beginning
bang_files = sorted([f for f in bang_files if f != ddg_bangs_file])
if ddg_bangs:
bangs |= ddg_bangs
else:
bang_files.insert(0, ddg_bangs_file)
for i, bang_file in enumerate(bang_files):
try:
bangs |= json.load(open(bang_file))
except json.decoder.JSONDecodeError:
# Ignore decoding error only for the ddg bangs file, since this can
# occur if file is still being written
if i != 0:
raise
bangs_dict = dict(sorted(bangs.items()))
DDG_BANGS = 'https://duckduckgo.com/bang.v255.js'
def gen_bangs_json(bangs_file: str) -> None:
@ -85,67 +36,31 @@ def gen_bangs_json(bangs_file: str) -> None:
json.dump(bangs_data, open(bangs_file, 'w'))
print('* Finished creating ddg bangs json')
load_all_bangs(bangs_file, bangs_data)
def suggest_bang(query: str) -> list[str]:
"""Suggests bangs for a user's query
Args:
query: The search query
Returns:
list[str]: A list of bang suggestions
"""
global bangs_dict
return [bangs_dict[_]['suggestion'] for _ in bangs_dict if _.startswith(query)]
def resolve_bang(query: str) -> str:
def resolve_bang(query: str, bangs_dict: dict) -> str:
"""Transform's a user's query to a bang search, if an operator is found
Args:
query: The search query
bangs_dict: The dict of available bang operators, with corresponding
format string search URLs
(i.e. "!w": "https://en.wikipedia.org...?search={}")
Returns:
str: A formatted redirect for a bang search, or an empty str if there
wasn't a match or didn't contain a bang operator
"""
global bangs_dict
#if ! not in query simply return (speed up processing)
if '!' not in query:
return ''
split_query = query.strip().split(' ')
# look for operator in query if one is found, list operator should be of
# length 1, operator should not be case-sensitive here to remove it later
operator = [
word
for word in split_query
if word.lower() in bangs_dict
]
if len(operator) == 1:
# get operator
operator = operator[0]
# removes operator from query
split_query.remove(operator)
# rebuild the query string
bang_query = ' '.join(split_query).strip()
# Check if operator is a key in bangs and get bang if exists
bang = bangs_dict.get(operator.lower(), None)
if bang:
bang_url = bang['url']
if bang_query:
return bang_url.replace('{}', bang_query, 1)
else:
parsed_url = urlparse.urlparse(bang_url)
return f'{parsed_url.scheme}://{parsed_url.netloc}'
# Ensure bang search is case insensitive
query = query.lower()
split_query = query.split(' ')
for operator in bangs_dict.keys():
if operator not in split_query \
and operator[1:] + operator[0] not in split_query:
continue
return bangs_dict[operator]['url'].replace(
'{}',
query.replace(operator if operator in split_query
else operator[1:] + operator[0], '').strip(), 1)
return ''

@ -1,51 +1,6 @@
import base64
from bs4 import BeautifulSoup as bsoup
from cryptography.fernet import Fernet
from flask import Request
import hashlib
import io
import os
import re
from requests import exceptions, get
from urllib.parse import urlparse
ddg_favicon_site = 'http://icons.duckduckgo.com/ip2'
empty_gif = base64.b64decode(
'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
placeholder_img = base64.b64decode(
'iVBORw0KGgoAAAANSUhEUgAAABkAAAAZCAYAAADE6YVjAAABF0lEQVRIS8XWPw9EMBQA8Eok' \
'JBKrMFqMBt//GzAYLTZ/VomExPDu6uLiaPteqVynBn0/75W2Vp7nEIYhe6p1XcespmmAd7Is' \
'M+4URcGiKPogvMMvmIS2eN9MOMKbKWgf54SYgI4vKkTuQKJKSJErkKzUSkQHUs0lilAg7GMh' \
'ISoIA/hYMiKCKIA2soeowCWEMkfHtUmrXLcyGYYBfN9HF8djiaglWzNZlgVs21YisoAUaEXG' \
'cQTP86QIFgi7vyLzPIPjOEIEC7ANQv/4aZrAdd0TUtc1i+MYnSsMWjPp+x6CIPgJVlUVS5KE' \
'DKig/+wnVzM4pnzaGeHd+ENlWbI0TbVLJBtw2uMfP63wc9d2kDCWxi5Q27bsBerSJ9afJbeL' \
'AAAAAElFTkSuQmCC'
)
def fetch_favicon(url: str) -> bytes:
"""Fetches a favicon using DuckDuckGo's favicon retriever
Args:
url: The url to fetch the favicon from
Returns:
bytes - the favicon bytes, or a placeholder image if one
was not returned
"""
domain = urlparse(url).netloc
response = get(f'{ddg_favicon_site}/{domain}.ico')
if response.status_code == 200 and len(response.content) > 0:
tmp_mem = io.BytesIO()
tmp_mem.write(response.content)
tmp_mem.seek(0)
return tmp_mem.read()
else:
return placeholder_img
def gen_file_hash(path: str, static_file: str) -> str:
@ -56,13 +11,11 @@ def gen_file_hash(path: str, static_file: str) -> str:
return filename_split[0] + '.' + file_hash + filename_split[-1]
def read_config_bool(var: str, default: bool=False) -> bool:
val = os.getenv(var, '1' if default else '0')
# user can specify one of the following values as 'true' inputs (all
# variants with upper case letters will also work):
# ('true', 't', '1', 'yes', 'y')
val = val.lower() in ('true', 't', '1', 'yes', 'y')
return val
def read_config_bool(var: str) -> bool:
val = os.getenv(var, '0')
if val.isdigit():
return bool(int(val))
return False
def get_client_ip(r: Request) -> str:
@ -77,63 +30,3 @@ def get_request_url(url: str) -> str:
return url.replace('http://', 'https://', 1)
return url
def get_proxy_host_url(r: Request, default: str, root=False) -> str:
scheme = r.headers.get('X-Forwarded-Proto', 'https')
http_host = r.headers.get('X-Forwarded-Host')
full_path = r.full_path if not root else ''
if full_path.startswith('/'):
full_path = f'/{full_path}'
if http_host:
prefix = os.environ.get('WHOOGLE_URL_PREFIX', '')
if prefix:
prefix = f'/{re.sub("[^0-9a-zA-Z]+", "", prefix)}'
return f'{scheme}://{http_host}{prefix}{full_path}'
return default
def check_for_update(version_url: str, current: str) -> int:
# Check for the latest version of Whoogle
try:
update = bsoup(get(version_url).text, 'html.parser')
latest = update.select_one('[class="Link--primary"]').string[1:]
current = int(''.join(filter(str.isdigit, current)))
latest = int(''.join(filter(str.isdigit, latest)))
has_update = '' if current >= latest else latest
except (exceptions.ConnectionError, AttributeError):
# Ignore failures, assume current version is up to date
has_update = ''
return has_update
def get_abs_url(url, page_url):
# Creates a valid absolute URL using a partial or relative URL
if url.startswith('//'):
return f'https:{url}'
elif url.startswith('/'):
return f'{urlparse(page_url).netloc}{url}'
elif url.startswith('./'):
return f'{page_url}{url[2:]}'
return url
def list_to_dict(lst: list) -> dict:
if len(lst) < 2:
return {}
return {lst[i].replace(' ', ''): lst[i+1].replace(' ', '')
for i in range(0, len(lst), 2)}
def encrypt_string(key: bytes, string: str) -> str:
cipher_suite = Fernet(key)
return cipher_suite.encrypt(string.encode()).decode()
def decrypt_string(key: bytes, string: str) -> str:
cipher_suite = Fernet(g.session_key)
return cipher_suite.decrypt(string.encode()).decode()

@ -1,20 +1,14 @@
from app.models.config import Config
from app.models.endpoint import Endpoint
from app.utils.misc import list_to_dict
from bs4 import BeautifulSoup, NavigableString
import copy
from flask import current_app
import html
import os
import urllib.parse as urlparse
from urllib.parse import parse_qs
import re
import warnings
SKIP_ARGS = ['ref_src', 'utm']
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
GOOG_STATIC = 'www.gstatic.com'
G_M_LOGO_URL = 'https://www.gstatic.com/m/images/icons/googleg.gif'
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
LOGO_URL = GOOG_IMG + '_desk'
BLANK_B64 = ('data:image/png;base64,'
@ -23,51 +17,25 @@ BLANK_B64 = ('data:image/png;base64,'
# Ad keywords
BLACKLIST = [
'ad', 'ads', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告',
'Reklama', 'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan',
'広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन',
'Reklam', 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés',
'Anúncio', 'Quảng cáo','โฆษณา', 'sponsored', 'patrocinado', 'gesponsert'
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan', '広告', 'Augl.',
'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', 'آگهی',
'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio'
]
SITE_ALTS = {
'twitter.com': os.getenv('WHOOGLE_ALT_TW', 'farside.link/nitter'),
'youtube.com': os.getenv('WHOOGLE_ALT_YT', 'farside.link/invidious'),
'instagram.com': os.getenv('WHOOGLE_ALT_IG', 'farside.link/bibliogram/u'),
'reddit.com': os.getenv('WHOOGLE_ALT_RD', 'farside.link/libreddit'),
**dict.fromkeys([
'medium.com',
'levelup.gitconnected.com'
], os.getenv('WHOOGLE_ALT_MD', 'farside.link/scribe')),
'imgur.com': os.getenv('WHOOGLE_ALT_IMG', 'farside.link/rimgo'),
'wikipedia.org': os.getenv('WHOOGLE_ALT_WIKI', 'farside.link/wikiless'),
'imdb.com': os.getenv('WHOOGLE_ALT_IMDB', 'farside.link/libremdb'),
'quora.com': os.getenv('WHOOGLE_ALT_QUORA', 'farside.link/quetre')
'imgur.com': os.getenv('WHOOGLE_ALT_IMG', 'imgin.voidnet.tech'),
'wikipedia.com': os.getenv('WHOOGLE_ALT_WIKI', 'wikiless.org')
}
# Include custom site redirects from WHOOGLE_REDIRECTS
SITE_ALTS.update(list_to_dict(re.split(',|:', os.getenv('WHOOGLE_REDIRECTS', ''))))
def contains_cjko(s: str) -> bool:
"""This function check whether or not a string contains Chinese, Japanese,
or Korean characters. It employs regex and uses the u escape sequence to
match any character in a set of Unicode ranges.
Args:
s (str): string to be checked
Returns:
bool: True if the input s contains the characters and False otherwise
"""
unicode_ranges = ('\u4e00-\u9fff' # Chinese characters
'\u3040-\u309f' # Japanese hiragana
'\u30a0-\u30ff' # Japanese katakana
'\u4e00-\u9faf' # Japanese kanji
'\uac00-\ud7af' # Korean hangul syllables
'\u1100-\u11ff' # Korean hangul jamo
)
return bool(re.search(fr'[{unicode_ranges}]', s))
def bold_search_terms(response: str, query: str) -> BeautifulSoup:
"""Wraps all search terms in bold tags (<b>). If any terms are wrapped
@ -88,29 +56,20 @@ def bold_search_terms(response: str, query: str) -> BeautifulSoup:
if len(element) == len(target_word):
return
# Ensure target word is escaped for regex
target_word = re.escape(target_word)
# Check if the word contains Chinese, Japanese, or Korean characters
if contains_cjko(target_word):
reg_pattern = fr'((?![{{}}<>-]){target_word}(?![{{}}<>-]))'
else:
reg_pattern = fr'\b((?![{{}}<>-]){target_word}(?![{{}}<>-]))\b'
if re.match('.*[@_!#$%^&*()<>?/\|}{~:].*', target_word) or (
if not re.match('.*[a-zA-Z0-9].*', target_word) or (
element.parent and element.parent.name == 'style'):
return
element.replace_with(BeautifulSoup(
re.sub(reg_pattern,
re.sub(fr'\b((?![{{}}<>-]){target_word}(?![{{}}<>-]))\b',
r'<b>\1</b>',
element,
html.escape(element),
flags=re.I), 'html.parser')
)
# Split all words out of query, grouping the ones wrapped in quotes
for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query):
word = re.sub(r'[@_!#$%^&*()<>?/\|}{~:]+', '', word)
word = re.sub(r'[^A-Za-z0-9 ]+', '', word)
target = response.find_all(
text=re.compile(r'' + re.escape(word), re.I))
for nav_str in target:
@ -129,8 +88,7 @@ def has_ad_content(element: str) -> bool:
bool: True/False for the element containing an ad
"""
element_str = ''.join(filter(str.isalpha, element))
return (element_str.upper() in (value.upper() for value in BLACKLIST)
return (element.upper() in (value.upper() for value in BLACKLIST)
or '' in element)
@ -144,34 +102,19 @@ def get_first_link(soup: BeautifulSoup) -> str:
str: A str link to the first result
"""
first_link = ''
orig_details = []
# Temporarily remove details so we don't grab those links
for details in soup.find_all('details'):
temp_details = soup.new_tag('removed_details')
orig_details.append(details.replace_with(temp_details))
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
# Return the first search result URL
if a['href'].startswith('http://') or a['href'].startswith('https://'):
first_link = a['href']
break
# Add the details back
for orig_detail, details in zip(orig_details, soup.find_all('removed_details')):
details.replace_with(orig_detail)
if 'url?q=' in a['href']:
return filter_link_args(a['href'])
return ''
return first_link
def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:
def get_site_alt(link: str) -> str:
"""Returns an alternative to a particular site, if one is configured
Args:
link: A string result URL to check against the site_alts map
site_alts: A map of site alternatives to replace with. defaults to SITE_ALTS
link: A string result URL to check against the SITE_ALTS map
Returns:
str: An updated (or ignored) result link
@ -179,52 +122,15 @@ def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:
"""
# Need to replace full hostname with alternative to encapsulate
# subdomains as well
parsed_link = urlparse.urlparse(link)
hostname = urlparse.urlparse(link).hostname
# Extract subdomain separately from the domain+tld. The subdomain
# is used for wikiless translations.
split_host = parsed_link.netloc.split('.')
subdomain = split_host[0] if len(split_host) > 2 else ''
hostname = '.'.join(split_host[-2:])
# The full scheme + hostname is used when comparing against the list of
# available alternative services, due to how Medium links are constructed.
# (i.e. for medium.com: "https://something.medium.com" should match,
# "https://medium.com/..." should match, but "philomedium.com" should not)
hostcomp = f'{parsed_link.scheme}://{hostname}'
for site_key in site_alts.keys():
site_alt = f'{parsed_link.scheme}://{site_key}'
if not hostname or site_alt not in hostcomp or not site_alts[site_key]:
for site_key in SITE_ALTS.keys():
if not hostname or site_key not in hostname:
continue
# Wikipedia -> Wikiless replacements require the subdomain (if it's
# a 2-char language code) to be passed as a URL param to Wikiless
# in order to preserve the language setting.
params = ''
if 'wikipedia' in hostname and len(subdomain) == 2:
hostname = f'{subdomain}.{hostname}'
params = f'?lang={subdomain}'
elif 'medium' in hostname and len(subdomain) > 0:
hostname = f'{subdomain}.{hostname}'
parsed_alt = urlparse.urlparse(site_alts[site_key])
link = link.replace(hostname, site_alts[site_key]) + params
# If a scheme is specified in the alternative, this results in a
# replaced link that looks like "https://http://altservice.tld".
# In this case, we can remove the original scheme from the result
# and use the one specified for the alt.
if parsed_alt.scheme:
link = '//'.join(link.split('//')[1:])
link = link.replace(hostname, SITE_ALTS[site_key])
for prefix in SKIP_PREFIX:
if parsed_alt.scheme:
# If a scheme is specified, remove everything before the
# first occurence of it
link = f'{parsed_alt.scheme}{link.split(parsed_alt.scheme, 1)[-1]}'
else:
# Otherwise, replace the first occurrence of the prefix
link = link.replace(prefix, '//', 1)
link = link.replace(prefix, '//')
break
return link
@ -274,33 +180,52 @@ def append_nojs(result: BeautifulSoup) -> None:
"""
nojs_link = BeautifulSoup(features='html.parser').new_tag('a')
nojs_link['href'] = f'{Endpoint.window}?nojs=1&location=' + result['href']
nojs_link['href'] = f'/{Endpoint.window}?location=' + result['href']
nojs_link.string = ' NoJS Link'
result.append(nojs_link)
def append_anon_view(result: BeautifulSoup, config: Config) -> None:
"""Appends an 'anonymous view' for a search result, where all site
contents are viewed through Whoogle as a proxy.
def add_ip_card(html_soup: BeautifulSoup, ip: str) -> BeautifulSoup:
"""Adds the client's IP address to the search results
if query contains keywords
Args:
result: The search result to append an anon view link to
nojs: Remove Javascript from Anonymous View
html_soup: The parsed search result containing the keywords
ip: ip address of the client
Returns:
None
BeautifulSoup
"""
av_link = BeautifulSoup(features='html.parser').new_tag('a')
nojs = 'nojs=1' if config.nojs else 'nojs=0'
location = f'location={result["href"]}'
av_link['href'] = f'{Endpoint.window}?{nojs}&{location}'
translation = current_app.config['TRANSLATIONS'][
config.get_localization_lang()
]
av_link.string = f'{translation["anon-view"]}'
av_link['class'] = 'anon-view'
result.append(av_link)
if (not html_soup.select_one(".EY24We")
and html_soup.select_one(".OXXup").get_text().lower() == "all"):
# HTML IP card tag
ip_tag = html_soup.new_tag("div")
ip_tag["class"] = "ZINbbc xpd O9g5cc uUPGi"
# For IP Address html tag
ip_address = html_soup.new_tag("div")
ip_address["class"] = "kCrYT ip-address-div"
ip_address.string = ip
# Text below the IP address
ip_text = html_soup.new_tag("div")
ip_text.string = "Your public IP address"
ip_text["class"] = "kCrYT ip-text-div"
# Adding all the above html tags to the IP card
ip_tag.append(ip_address)
ip_tag.append(ip_text)
# Finding the element before which the IP card would be placed
f_link = html_soup.select_one(".BNeawe.vvjwJb.AP7Wnd")
ref_element = f_link.find_parent(class_="ZINbbc xpd O9g5cc" +
" uUPGi")
# Inserting the element
ref_element.insert_before(ip_tag)
return html_soup
def check_currency(response: str) -> dict:
"""Check whether the results have currency conversion
@ -317,18 +242,12 @@ def check_currency(response: str) -> dict:
if currency_link:
while 'class' not in currency_link.attrs or \
'ZINbbc' not in currency_link.attrs['class']:
if currency_link.parent:
currency_link = currency_link.parent
else:
return {}
currency_link = currency_link.parent
currency_link = currency_link.find_all(class_='BNeawe')
currency1 = currency_link[0].text
currency2 = currency_link[1].text
currency1 = currency1.rstrip('=').split(' ', 1)
currency2 = currency2.split(' ', 1)
# Handle differences in currency formatting
# i.e. "5.000" vs "5,000"
if currency2[0][-3] == ',':
currency1[0] = currency1[0].replace('.', '')
currency1[0] = currency1[0].replace(',', '.')
@ -337,17 +256,10 @@ def check_currency(response: str) -> dict:
else:
currency1[0] = currency1[0].replace(',', '')
currency2[0] = currency2[0].replace(',', '')
currency1_value = float(re.sub(r'[^\d\.]', '', currency1[0]))
currency1_label = currency1[1]
currency2_value = float(re.sub(r'[^\d\.]', '', currency2[0]))
currency2_label = currency2[1]
return {'currencyValue1': currency1_value,
'currencyLabel1': currency1_label,
'currencyValue2': currency2_value,
'currencyLabel2': currency2_label
return {'currencyValue1': float(currency1[0]),
'currencyLabel1': currency1[1],
'currencyValue2': float(currency2[0]),
'currencyLabel2': currency2[1]
}
return {}
@ -417,49 +329,3 @@ def add_currency_card(soup: BeautifulSoup,
element1.insert_before(conversion_box)
return soup
def get_tabs_content(tabs: dict,
full_query: str,
search_type: str,
preferences: str,
translation: dict) -> dict:
"""Takes the default tabs content and updates it according to the query.
Args:
tabs: The default content for the tabs
full_query: The original search query
search_type: The current search_type
translation: The translation to get the names of the tabs
Returns:
dict: contains the name, the href and if the tab is selected or not
"""
map_query = full_query
if '-site:' in full_query:
block_idx = full_query.index('-site:')
map_query = map_query[:block_idx]
tabs = copy.deepcopy(tabs)
for tab_id, tab_content in tabs.items():
# update name to desired language
if tab_id in translation:
tab_content['name'] = translation[tab_id]
# update href with query
query = full_query.replace(f'&tbm={search_type}', '')
if tab_content['tbm'] is not None:
query = f"{query}&tbm={tab_content['tbm']}"
if preferences:
query = f"{query}&preferences={preferences}"
tab_content['href'] = tab_content['href'].format(
query=query,
map_query=map_query)
# update if selected tab (default all tab is selected)
if tab_content['tbm'] == search_type:
tabs['all']['selected'] = False
tab_content['selected'] = True
return tabs

@ -1,10 +1,9 @@
import os
import re
from typing import Any
from app.filter import Filter
from app.filter import Filter, get_first_link
from app.request import gen_query
from app.utils.misc import get_proxy_host_url
from app.utils.results import get_first_link
from bs4 import BeautifulSoup as bsoup
from cryptography.fernet import Fernet, InvalidToken
from flask import g
@ -57,14 +56,12 @@ class Search:
"""
def __init__(self, request, config, session_key, cookies_disabled=False):
method = request.method
self.request = request
self.request_params = request.args if method == 'GET' else request.form
self.user_agent = request.headers.get('User-Agent')
self.feeling_lucky = False
self.config = config
self.session_key = session_key
self.query = ''
self.widget = ''
self.cookies_disabled = cookies_disabled
self.search_type = self.request_params.get(
'tbm') if 'tbm' in self.request_params else ''
@ -102,22 +99,9 @@ class Search:
except InvalidToken:
pass
# Strip '!' for "feeling lucky" queries
if match := re.search("(^|\s)!($|\s)", q):
self.feeling_lucky = True
start, end = match.span()
self.query = " ".join([seg for seg in [q[:start], q[end:]] if seg])
else:
self.feeling_lucky = False
self.query = q
# Check for possible widgets
self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" +
"($|( *[^a-z0-9] *(((addres|address|adres|" +
"adress)|a)? *$)))", self.query.lower()) else self.widget
self.widget = 'calculator' if re.search(
r"\bcalculator\b|\bcalc\b|\bcalclator\b|\bmath\b",
self.query.lower()) else self.widget
# Strip leading '! ' for "feeling lucky" queries
self.feeling_lucky = q.startswith('! ')
self.query = q[2:] if self.feeling_lucky else q
return self.query
def generate_response(self) -> str:
@ -129,21 +113,13 @@ class Search:
"""
mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
# reconstruct url if X-Forwarded-Host header present
root_url = get_proxy_host_url(
self.request,
self.request.url_root,
root=True)
content_filter = Filter(self.session_key,
root_url=root_url,
mobile=mobile,
config=self.config,
query=self.query)
config=self.config)
full_query = gen_query(self.query,
self.request_params,
self.config)
self.full_query = full_query
# force mobile search when view image is true and
# the request is not already made by a mobile
@ -152,12 +128,10 @@ class Search:
and not g.user_request.mobile)
get_body = g.user_request.send(query=full_query,
force_mobile=view_image,
user_agent=self.user_agent)
force_mobile=view_image)
# Produce cleanable html soup from response
get_body_safed = get_body.text.replace("&lt;","andlt;").replace("&gt;","andgt;")
html_soup = bsoup(get_body_safed, 'html.parser')
html_soup = bsoup(get_body.text, 'html.parser')
# Replace current soup if view_image is active
if view_image:
@ -167,25 +141,31 @@ class Search:
if g.user_request.tor_valid:
html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser'))
formatted_results = content_filter.clean(html_soup)
if self.feeling_lucky:
if lucky_link := get_first_link(formatted_results):
return lucky_link
# Fall through to regular search if unable to find link
self.feeling_lucky = False
# Append user config to all search links, if available
param_str = ''.join('&{}={}'.format(k, v)
for k, v in
self.request_params.to_dict(flat=True).items()
if self.config.is_safe_key(k))
for link in formatted_results.find_all('a', href=True):
link['rel'] = "nofollow noopener noreferrer"
if 'search?' not in link['href'] or link['href'].index(
'search?') > 1:
continue
link['href'] += param_str
return str(formatted_results)
return get_first_link(html_soup)
else:
formatted_results = content_filter.clean(html_soup)
# Append user config to all search links, if available
param_str = ''.join('&{}={}'.format(k, v)
for k, v in
self.request_params.to_dict(flat=True).items()
if self.config.is_safe_key(k))
for link in formatted_results.find_all('a', href=True):
if 'search?' not in link['href'] or link['href'].index(
'search?') > 1:
continue
link['href'] += param_str
return str(formatted_results)
def check_kw_ip(self) -> re.Match:
"""Checks for keywords related to 'my ip' in the query
Returns:
bool
"""
return re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" +
"($|( *[^a-z0-9] *(((addres|address|adres|" +
"adress)|a)? *$)))", self.query.lower())

@ -1,10 +1,10 @@
from cryptography.fernet import Fernet
from flask import current_app as app
REQUIRED_SESSION_VALUES = ['uuid', 'config', 'key', 'auth']
REQUIRED_SESSION_VALUES = ['uuid', 'config', 'key']
def generate_key() -> bytes:
def generate_user_key() -> bytes:
"""Generates a key for encrypting searches and element URLs
Args:

@ -1,71 +0,0 @@
from pathlib import Path
from bs4 import BeautifulSoup
# root
BASE_DIR = Path(__file__).parent.parent.parent
def add_ip_card(html_soup: BeautifulSoup, ip: str) -> BeautifulSoup:
"""Adds the client's IP address to the search results
if query contains keywords
Args:
html_soup: The parsed search result containing the keywords
ip: ip address of the client
Returns:
BeautifulSoup
"""
main_div = html_soup.select_one('#main')
if main_div:
# HTML IP card tag
ip_tag = html_soup.new_tag('div')
ip_tag['class'] = 'ZINbbc xpd O9g5cc uUPGi'
# For IP Address html tag
ip_address = html_soup.new_tag('div')
ip_address['class'] = 'kCrYT ip-address-div'
ip_address.string = ip
# Text below the IP address
ip_text = html_soup.new_tag('div')
ip_text.string = 'Your public IP address'
ip_text['class'] = 'kCrYT ip-text-div'
# Adding all the above html tags to the IP card
ip_tag.append(ip_address)
ip_tag.append(ip_text)
# Insert the element at the top of the result list
main_div.insert_before(ip_tag)
return html_soup
def add_calculator_card(html_soup: BeautifulSoup) -> BeautifulSoup:
"""Adds the a calculator widget to the search results
if query contains keywords
Args:
html_soup: The parsed search result containing the keywords
Returns:
BeautifulSoup
"""
main_div = html_soup.select_one('#main')
if main_div:
# absolute path
widget_file = open(BASE_DIR / 'app/static/widgets/calculator.html', encoding="utf8")
widget_tag = html_soup.new_tag('div')
widget_tag['class'] = 'ZINbbc xpd O9g5cc uUPGi'
widget_tag['id'] = 'calculator-wrapper'
calculator_text = html_soup.new_tag('div')
calculator_text['class'] = 'kCrYT ip-address-div'
calculator_text.string = 'Calculator'
calculator_widget = html_soup.new_tag('div')
calculator_widget.append(BeautifulSoup(widget_file, 'html.parser'))
calculator_widget['class'] = 'kCrYT ip-text-div'
widget_tag.append(calculator_text)
widget_tag.append(calculator_widget)
main_div.insert_before(widget_tag)
widget_file.close()
return html_soup

@ -1,7 +0,0 @@
import os
optional_dev_tag = ''
if os.getenv('DEV_BUILD'):
optional_dev_tag = '.dev' + os.getenv('DEV_BUILD')
__version__ = '0.8.4' + optional_dev_tag

@ -3,7 +3,7 @@ name: whoogle
description: A self hosted search engine on Kubernetes
type: application
version: 0.1.0
appVersion: 0.8.4
appVersion: 0.7.1
icon: https://github.com/benbusby/whoogle-search/raw/main/app/static/img/favicon/favicon-96x96.png

@ -52,20 +52,10 @@ spec:
httpGet:
path: /
port: http
{{- if and .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS }}
httpHeaders:
- name: Authorization
value: Basic {{ b64enc (printf "%s:%s" .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS) }}
{{- end }}
readinessProbe:
httpGet:
path: /
port: http
{{- if and .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS }}
httpHeaders:
- name: Authorization
value: Basic {{ b64enc (printf "%s:%s" .Values.conf.WHOOGLE_USER .Values.conf.WHOOGLE_PASS) }}
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.nodeSelector }}

@ -24,7 +24,6 @@ serviceAccount:
name: ""
conf: {}
# WHOOGLE_URL_PREFIX: "" # The URL prefix to use for the whoogle instance (i.e. "/whoogle")
# WHOOGLE_DOTENV: "" # Load environment variables in whoogle.env
# WHOOGLE_USER: "" # The username for basic auth. WHOOGLE_PASS must also be set if used.
# WHOOGLE_PASS: "" # The password for basic auth. WHOOGLE_USER must also be set if used.
@ -36,33 +35,30 @@ conf: {}
# HTTPS_ONLY: "" # Enforce HTTPS. (See https://github.com/benbusby/whoogle-search#https-enforcement)
# WHOOGLE_ALT_TW: "" # The twitter.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_YT: "" # The youtube.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_IG: "" # The instagram.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_RD: "" # The reddit.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_TL: "" # The Google Translate alternative to use. This is used for all "translate ____" searches.
# WHOOGLE_ALT_MD: "" # The medium.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_IMG: "" # The imgur.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_WIKI: "" # The wikipedia.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_IMDB: "" # The imdb.com alternative to use. Set to "" to continue using imdb.com when site alternatives are enabled.
# WHOOGLE_ALT_QUORA: "" # The quora.com alternative to use. Set to "" to continue using quora.com when site alternatives are enabled.
# WHOOGLE_ALT_IMG: "" # The imgur.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_ALT_WIKI: "" # The wikipedia.com alternative to use when site alternatives are enabled in the config.
# WHOOGLE_AUTOCOMPLETE: "" # Controls visibility of autocomplete/search suggestions. Default on -- use '0' to disable
# WHOOGLE_MINIMAL: "" # Remove everything except basic result cards from all search queries.
# WHOOGLE_CONFIG_DISABLE: "" # Hide config from UI and disallow changes to config by client
# WHOOGLE_CONFIG_COUNTRY: "" # Filter results by hosting country
# WHOOGLE_CONFIG_LANGUAGE: "" # Set interface language
# WHOOGLE_CONFIG_SEARCH_LANGUAGE: "" # Set search result language
# WHOOGLE_CONFIG_BLOCK: "" # Block websites from search results (use comma-separated list)
# WHOOGLE_CONFIG_THEME: "" # Set theme mode (light, dark, or system)
# WHOOGLE_CONFIG_SAFE: "" # Enable safe searches
# WHOOGLE_CONFIG_ALTS: "" # Use social media site alternatives (nitter, invidious, etc)
# WHOOGLE_CONFIG_NEAR: "" # Restrict results to only those near a particular city
# WHOOGLE_CONFIG_TOR: "" # Use Tor routing (if available)
# WHOOGLE_CONFIG_NEW_TAB: "" # Always open results in new tab
# WHOOGLE_CONFIG_VIEW_IMAGE: "" # Enable View Image option
# WHOOGLE_CONFIG_GET_ONLY: "" # Search using GET requests only
# WHOOGLE_CONFIG_URL: "" # The root url of the instance (https://<your url>/)
# WHOOGLE_CONFIG_STYLE: "" # The custom CSS to use for styling (should be single line)
# WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED: "" # Encrypt preferences token, requires key
# WHOOGLE_CONFIG_PREFERENCES_KEY: "" # Key to encrypt preferences in URL (REQUIRED to show url)
# WHOOGLE_CONFIG_DISABLE: "" # Hide config from UI and disallow changes to config by client
# WHOOGLE_CONFIG_COUNTRY: "" # Filter results by hosting country
# WHOOGLE_CONFIG_LANGUAGE: "" # Set interface language
# WHOOGLE_CONFIG_SEARCH_LANGUAGE: "" # Set search result language
# WHOOGLE_CONFIG_BLOCK: "" # Block websites from search results (use comma-separated list)
# WHOOGLE_CONFIG_THEME: "" # Set theme mode (light, dark, or system)
# WHOOGLE_CONFIG_SAFE: "" # Enable safe searches
# WHOOGLE_CONFIG_ALTS: "" # Use social media site alternatives (nitter, invidious, etc)
# WHOOGLE_CONFIG_NEAR: "" # Restrict results to only those near a particular city
# WHOOGLE_CONFIG_TOR: "" # Use Tor routing (if available)
# WHOOGLE_CONFIG_NEW_TAB: "" # Always open results in new tab
# WHOOGLE_CONFIG_VIEW_IMAGE: "" # Enable View Image option
# WHOOGLE_CONFIG_GET_ONLY: "" # Search using GET requests only
# WHOOGLE_CONFIG_URL: "" # The root url of the instance (https://<your url>/)
# WHOOGLE_CONFIG_STYLE: "" # The custom CSS to use for styling (should be single line)
podAnnotations: {}
podSecurityContext: {}

@ -1,80 +0,0 @@
# can't use mem_limit in a 3.x docker-compose file in non swarm mode
# see https://github.com/docker/compose/issues/4513
version: "2.4"
services:
traefik:
image: "traefik:v2.7"
container_name: "traefik"
command:
#- "--log.level=DEBUG"
- "--api.insecure=true"
- "--providers.docker=true"
- "--providers.docker.exposedbydefault=false"
- "--entrypoints.websecure.address=:443"
- "--certificatesresolvers.myresolver.acme.tlschallenge=true"
#- "--certificatesresolvers.myresolver.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory"
- "--certificatesresolvers.myresolver.acme.email=change@domain.name"
- "--certificatesresolvers.myresolver.acme.storage=/letsencrypt/acme.json"
ports:
- "443:443"
- "8080:8080"
volumes:
- "./letsencrypt:/letsencrypt"
- "/var/run/docker.sock:/var/run/docker.sock:ro"
whoogle-search:
labels:
- "traefik.enable=true"
- "traefik.http.routers.whoami.rule=Host(`change.host.name`)"
- "traefik.http.routers.whoami.entrypoints=websecure"
- "traefik.http.routers.whoami.tls.certresolver=myresolver"
- "traefik.http.services.whoogle-search.loadbalancer.server.port=5000"
image: ${WHOOGLE_IMAGE:-benbusby/whoogle-search}
container_name: whoogle-search
restart: unless-stopped
pids_limit: 50
mem_limit: 256mb
memswap_limit: 256mb
# user debian-tor from tor package
user: whoogle
security_opt:
- no-new-privileges
cap_drop:
- ALL
tmpfs:
- /config/:size=10M,uid=927,gid=927,mode=1700
- /var/lib/tor/:size=15M,uid=927,gid=927,mode=1700
- /run/tor/:size=1M,uid=927,gid=927,mode=1700
environment: # Uncomment to configure environment variables
# Basic auth configuration, uncomment to enable
#- WHOOGLE_USER=<auth username>
#- WHOOGLE_PASS=<auth password>
# Proxy configuration, uncomment to enable
#- WHOOGLE_PROXY_USER=<proxy username>
#- WHOOGLE_PROXY_PASS=<proxy password>
#- WHOOGLE_PROXY_TYPE=<proxy type (http|https|socks4|socks5)
#- WHOOGLE_PROXY_LOC=<proxy host/ip>
# Site alternative configurations, uncomment to enable
# Note: If not set, the feature will still be available
# with default values.
#- WHOOGLE_ALT_TW=farside.link/nitter
#- WHOOGLE_ALT_YT=farside.link/invidious
#- WHOOGLE_ALT_IG=farside.link/bibliogram/u
#- WHOOGLE_ALT_RD=farside.link/libreddit
#- WHOOGLE_ALT_MD=farside.link/scribe
#- WHOOGLE_ALT_TL=farside.link/lingva
#- WHOOGLE_ALT_IMG=farside.link/rimgo
#- WHOOGLE_ALT_WIKI=farside.link/wikiless
#- WHOOGLE_ALT_IMDB=farside.link/libremdb
#- WHOOGLE_ALT_QUORA=farside.link/quetre
# - WHOOGLE_CONFIG_DISABLE=1
# - WHOOGLE_CONFIG_SEARCH_LANGUAGE=lang_en
# - WHOOGLE_CONFIG_GET_ONLY=1
# - WHOOGLE_CONFIG_COUNTRY=FR
# - WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED=1
# - WHOOGLE_CONFIG_PREFERENCES_KEY="NEEDS_TO_BE_MODIFIED"
#env_file: # Alternatively, load variables from whoogle.env
#- whoogle.env
ports:
- 8000:5000

@ -1,4 +1,4 @@
# can't use mem_limit in a 3.x docker-compose file in non swarm mode
# cant use mem_limit in a 3.x docker-compose file in non swarm mode
# see https://github.com/docker/compose/issues/4513
version: "2.4"
@ -18,7 +18,7 @@ services:
- ALL
tmpfs:
- /config/:size=10M,uid=927,gid=927,mode=1700
- /var/lib/tor/:size=15M,uid=927,gid=927,mode=1700
- /var/lib/tor/:size=10M,uid=927,gid=927,mode=1700
- /run/tor/:size=1M,uid=927,gid=927,mode=1700
#environment: # Uncomment to configure environment variables
# Basic auth configuration, uncomment to enable
@ -37,11 +37,9 @@ services:
#- WHOOGLE_ALT_IG=farside.link/bibliogram/u
#- WHOOGLE_ALT_RD=farside.link/libreddit
#- WHOOGLE_ALT_MD=farside.link/scribe
#- WHOOGLE_ALT_TL=farside.link/lingva
#- WHOOGLE_ALT_IMG=farside.link/rimgo
#- WHOOGLE_ALT_WIKI=farside.link/wikiless
#- WHOOGLE_ALT_IMDB=farside.link/libremdb
#- WHOOGLE_ALT_QUORA=farside.link/quetre
#- WHOOGLE_ALT_TL=lingva.ml
#- WHOOGLE_ALT_IMG=imgin.voidnet.tech
#- WHOOGLE_ALT_WIKI=wikiless.org
#env_file: # Alternatively, load variables from whoogle.env
#- whoogle.env
ports:

@ -1,24 +1,9 @@
https://s.alefvanoon.xyz
https://search.albony.xyz
https://search.exonip.de
https://search.garudalinux.org
https://search.dr460nf1r3.org
https://search.nezumi.party
https://s.tokhmi.xyz
https://search.sethforprivacy.com
https://whoogle.fossho.st
https://whooglesearch.net
https://www.whooglesearch.ml
https://whoogle.dcs0.hu
https://whoogle.lunar.icu
https://gowogle.voring.me
https://whoogle.privacydev.net
https://whoogle.hostux.net
https://wg.vern.cc
https://whoogle.hxvy0.gq
https://whoogle.ungovernable.men
https://whoogle2.ungovernable.men
https://whoogle3.ungovernable.men
https://wgl.frail.duckdns.org
https://whoogle.no-logs.com
https://whoogle.ftw.lol
https://whoogle-search--replitcomreside.repl.co
https://search.notrustverify.ch
https://whoogle.datura.network
https://whoogle.yepserver.xyz
https://search.snine.nl

@ -1,5 +0,0 @@
import subprocess
# A plague upon Replit and all who have built it
replit_cmd = "killall -q python3 > /dev/null 2>&1; pip install -r requirements.txt && ./run"
subprocess.run(replit_cmd, shell=True)

@ -1 +0,0 @@
# Place password here. Keep this safe.

@ -1,27 +1,5 @@
#!/bin/sh
FF_STRING="FascistFirewall 1"
if [ "$WHOOGLE_TOR_SERVICE" == "0" ]; then
echo "Skipping Tor startup..."
exit 0
fi
if [ "$WHOOGLE_TOR_FF" == "1" ]; then
if (grep -q "$FF_STRING" /etc/tor/torrc); then
echo "FascistFirewall feature already enabled."
else
echo "$FF_STRING" >> /etc/tor/torrc
if [ "$?" -eq 0 ]; then
echo "FascistFirewall added to /etc/tor/torrc"
else
echo "ERROR: Unable to modify /etc/tor/torrc with $FF_STRING."
exit 1
fi
fi
fi
if [ "$(whoami)" != "root" ]; then
tor -f /etc/tor/torrc
else

@ -6,7 +6,3 @@ CookieAuthFileGroupReadable 1
ExtORPortCookieAuthFileGroupReadable 1
CacheDirectoryGroupReadable 1
CookieAuthFile /var/lib/tor/control_auth_cookie
Log debug-notice file /dev/null
# UseBridges 1
# ClientTransportPlugin obfs4 exec /usr/bin/obfs4proxy
# Bridge obfs4 ip and so on

@ -1,67 +0,0 @@
import json
import pathlib
import requests
lingva = 'https://lingva.ml/api/v1/en'
def format_lang(lang: str) -> str:
# Chinese (traditional and simplified) require
# a different format for lingva translations
if 'zh-' in lang:
if lang == 'zh-TW':
return 'zh_HANT'
return 'zh'
# Strip lang prefix to leave only the actual
# language code (i.e. 'en', 'fr', etc)
return lang.replace('lang_', '')
def translate(v: str, lang: str) -> str:
# Strip lang prefix to leave only the actual
#language code (i.e. "es", "fr", etc)
lang = format_lang(lang)
lingva_req = f'{lingva}/{lang}/{v}'
response = requests.get(lingva_req).json()
if 'translation' in response:
return response['translation']
return ''
if __name__ == '__main__':
file_path = pathlib.Path(__file__).parent.resolve()
tl_path = 'app/static/settings/translations.json'
with open(f'{file_path}/../{tl_path}', 'r+', encoding='utf-8') as tl_file:
tl_data = json.load(tl_file)
# If there are any english translations that don't
# exist for other languages, extract them and translate
# them now
en_tl = tl_data['lang_en']
for k, v in en_tl.items():
for lang in tl_data:
if lang == 'lang_en' or k in tl_data[lang]:
continue
translation = ''
if len(k) == 0:
# Special case for placeholder text that gets used
# for translations without any key present
translation = v
else:
# Translate the string using lingva
translation = translate(v, lang)
if len(translation) == 0:
print(f'! Unable to translate {lang}[{k}]')
continue
print(f'{lang}[{k}] = {translation}')
tl_data[lang][k] = translation
# Write out updated translations json
print(json.dumps(tl_data, indent=4, ensure_ascii=False))

@ -1,3 +0,0 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

@ -1,37 +1,33 @@
attrs==22.2.0
beautifulsoup4==4.11.2
brotli==1.0.9
cachelib==0.10.2
certifi==2023.7.22
cffi==1.15.1
chardet==5.1.0
click==8.1.3
cryptography==3.3.2; platform_machine == 'armv7l'
cryptography==42.0.4; platform_machine != 'armv7l'
cssutils==2.6.0
defusedxml==0.7.1
Flask==2.3.2
idna==3.4
itsdangerous==2.1.2
Jinja2==3.1.3
MarkupSafe==2.1.2
more-itertools==9.0.0
packaging==23.0
pluggy==1.0.0
pycodestyle==2.10.0
attrs==19.3.0
beautifulsoup4==4.10.0
cachelib==0.4.1
certifi==2020.4.5.1
cffi==1.15.0
chardet==3.0.4
click==8.0.3
cryptography==3.3.2
Flask==1.1.1
Flask-Session==0.4.0
idna==2.9
itsdangerous==1.1.0
Jinja2==2.11.3
MarkupSafe==1.1.1
more-itertools==8.3.0
packaging==20.4
pluggy==0.13.1
py==1.10.0
pycodestyle==2.6.0
pycparser==2.21
pyOpenSSL==19.1.0; platform_machine == 'armv7l'
pyOpenSSL==24.0.0; platform_machine != 'armv7l'
pyparsing==3.0.9
pyOpenSSL==19.1.0
pyparsing==2.4.7
PySocks==1.7.1
pytest==7.2.1
python-dateutil==2.8.2
requests==2.31.0
soupsieve==2.4
stem==1.8.1
urllib3==1.26.18
validators==0.22.0
waitress==2.1.2
wcwidth==0.2.6
Werkzeug==3.0.1
python-dotenv==0.21.1
pytest==6.2.5
python-dateutil==2.8.1
requests==2.25.1
soupsieve==1.9.5
stem==1.8.0
urllib3==1.26.5
waitress==1.4.3
wcwidth==0.1.9
Werkzeug==0.16.0
python-dotenv==0.16.0

15
run

@ -3,7 +3,7 @@
# ./run # Runs the full web app
# ./run test # Runs the testing suite
set -e
set -eu
SCRIPT_DIR="$(CDPATH= command cd -- "$(dirname -- "$0")" && pwd -P)"
@ -24,14 +24,7 @@ if [ "$SUBDIR" = "test" ]; then
pytest -sv
else
mkdir -p "$STATIC_FOLDER"
if [ ! -z "$UNIX_SOCKET" ]; then
python3 -um app \
--unix-socket "$UNIX_SOCKET"
else
echo "Running on http://${ADDRESS:-0.0.0.0}:${PORT:-"${EXPOSE_PORT:-5000}"}"
python3 -um app \
--host "${ADDRESS:-0.0.0.0}" \
--port "${PORT:-"${EXPOSE_PORT:-5000}"}"
fi
python3 -um app \
--host "${ADDRESS:-0.0.0.0}" \
--port "${PORT:-"${EXPOSE_PORT:-5000}"}"
fi

@ -1,45 +0,0 @@
[metadata]
name = whoogle-search
version = attr: app.version.__version__
url = https://github.com/benbusby/whoogle-search
description = Self-hosted, ad-free, privacy-respecting metasearch engine
long_description = file: README.md
long_description_content_type = text/markdown
keywords = search, metasearch, flask, adblock, degoogle, privacy
author = Ben Busby
author_email = contact@benbusby.com
license = MIT
classifiers =
Programming Language :: Python :: 3
License :: OSI Approved :: MIT License
Operating System :: OS Independent
[options]
packages = find:
include_package_data = True
install_requires=
beautifulsoup4
brotli
cssutils
cryptography
defusedxml
Flask
python-dotenv
requests
stem
validators
waitress
[options.extras_require]
test =
pytest
python-dateutil
dev = pycodestyle
[options.packages.find]
exclude =
test*
[options.entry_points]
console_scripts =
whoogle-search = app.routes:run_app

@ -0,0 +1,34 @@
import os
import setuptools
long_description = open('README.md', 'r').read()
requirements = list(open('requirements.txt', 'r'))
optional_dev_tag = ''
if os.getenv('DEV_BUILD'):
optional_dev_tag = '.dev' + os.getenv('DEV_BUILD')
setuptools.setup(
author='Ben Busby',
author_email='contact@benbusby.com',
name='whoogle-search',
version='0.7.1' + optional_dev_tag,
include_package_data=True,
install_requires=requirements,
description='Self-hosted, ad-free, privacy-respecting metasearch engine',
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/benbusby/whoogle-search',
entry_points={
'console_scripts': [
'whoogle-search=app.routes:run_app',
]
},
packages=setuptools.find_packages(),
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
],
)

@ -1,5 +1,5 @@
from app import app
from app.utils.session import generate_key
from app.utils.session import generate_user_key
import pytest
import random
@ -18,7 +18,6 @@ def client():
with app.test_client() as client:
with client.session_transaction() as session:
session['uuid'] = 'test'
session['key'] = app.enc_key
session['key'] = generate_user_key()
session['config'] = {}
session['auth'] = False
yield client

@ -2,27 +2,13 @@ from cryptography.fernet import Fernet
from app import app
from app.models.endpoint import Endpoint
from app.utils.session import generate_key, valid_user_session
JAPAN_PREFS = 'uG-gGIJwHdqxl6DrS3mnu_511HlQcRpxYlG03Xs-' \
+ '_znXNiJWI9nLOkRLkiiFwIpeUYMTGfUF5-t9fP5DGmzDLEt04DCx703j3nPf' \
+ '29v_RWkU7gXw_44m2oAFIaKGmYlu4Z0bKyu9k5WXfL9Dy6YKKnpcR5CiaFsG' \
+ 'rccNRkAPYm-eYGAFUV8M59f8StsGd_M-gHKGS9fLok7EhwBWjHxBJ2Kv8hsT' \
+ '87zftP2gMJOevTdNnezw2Y5WOx-ZotgeheCW1BYCFcRqatlov21PHp22NGVG' \
+ '8ZuBNAFW0bE99WSdyT7dUIvzeWCLJpbdSsq-3FUUZkxbRdFYlGd8vY1UgVAp' \
+ 'OSie2uAmpgLFXygO-VfNBBZ68Q7gAap2QtzHCiKD5cFYwH3LPgVJ-DoZvJ6k' \
+ 'alt34TaYiJphgiqFKV4SCeVmLWTkr0SF3xakSR78yYJU_d41D2ng-TojA9XZ' \
+ 'uR2ZqjSvPKOWvjimu89YhFOgJxG1Po8Henj5h9OL9VXXvdvlJwBSAKw1E3FV' \
+ '7UHWiglMxPblfxqou1cYckMYkFeIMCD2SBtju68mBiQh2k328XRPTsQ_ocby' \
+ 'cgVKnleGperqbD6crRk3Z9xE5sVCjujn9JNVI-7mqOITMZ0kntq9uJ3R5n25' \
+ 'Vec0TJ0P19nEtvjY0nJIrIjtnBg=='
from app.utils.session import generate_user_key, valid_user_session
def test_generate_user_keys():
key = generate_key()
key = generate_user_key()
assert Fernet(key)
assert generate_key() != key
assert generate_user_key() != key
def test_valid_session(client):
@ -63,16 +49,3 @@ def test_query_decryption(client):
with client.session_transaction() as session:
assert valid_user_session(session)
def test_prefs_url(client):
base_url = f'/{Endpoint.search}?q=wikipedia'
rv = client.get(base_url)
assert rv._status_code == 200
assert b'wikipedia.org' in rv.data
assert b'ja.wikipedia.org' not in rv.data
rv = client.get(f'{base_url}&preferences={JAPAN_PREFS}')
assert rv._status_code == 200
assert b'ja.wikipedia.org' in rv.data

@ -2,17 +2,16 @@ from bs4 import BeautifulSoup
from app.filter import Filter
from app.models.config import Config
from app.models.endpoint import Endpoint
from app.utils import results
from app.utils.session import generate_key
from app.utils.session import generate_user_key
from datetime import datetime
from dateutil.parser import ParserError, parse
from dateutil.parser import *
from urllib.parse import urlparse
from test.conftest import demo_config
def get_search_results(data):
secret_key = generate_key()
secret_key = generate_user_key()
soup = Filter(user_key=secret_key, config=Config(**demo_config)).clean(
BeautifulSoup(data, 'html.parser'))
@ -45,21 +44,27 @@ def test_get_results(client):
def test_post_results(client):
rv = client.post(f'/{Endpoint.search}', data=dict(q='test'))
assert rv._status_code == 302
assert rv._status_code == 200
# Depending on the search, there can be more
# than 10 result divs
results = get_search_results(rv.data)
assert len(results) >= 10
assert len(results) <= 15
def test_translate_search(client):
rv = client.get(f'/{Endpoint.search}?q=translate hola')
rv = client.post(f'/{Endpoint.search}', data=dict(q='translate hola'))
assert rv._status_code == 200
# Pretty weak test, but better than nothing
str_data = str(rv.data)
assert 'iframe' in str_data
assert '/auto/en/ hola' in str_data
assert 'lingva.ml/auto/en/ hola' in str_data
def test_block_results(client):
rv = client.get(f'/{Endpoint.search}?q=pinterest')
rv = client.post(f'/{Endpoint.search}', data=dict(q='pinterest'))
assert rv._status_code == 200
has_pinterest = False
@ -74,7 +79,7 @@ def test_block_results(client):
rv = client.post(f'/{Endpoint.config}', data=demo_config)
assert rv._status_code == 302
rv = client.get(f'/{Endpoint.search}?q=pinterest')
rv = client.post(f'/{Endpoint.search}', data=dict(q='pinterest'))
assert rv._status_code == 200
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
@ -84,25 +89,15 @@ def test_block_results(client):
assert result_site not in 'pinterest.com'
def test_view_my_ip(client):
rv = client.get(f'/{Endpoint.search}?q=my ip address')
assert rv._status_code == 200
# Pretty weak test, but better than nothing
str_data = str(rv.data)
assert 'Your public IP address' in str_data
assert '127.0.0.1' in str_data
def test_recent_results(client):
times = {
'tbs=qdr:y': 365,
'tbs=qdr:m': 31,
'tbs=qdr:w': 7
'past year': 365,
'past month': 31,
'past week': 7
}
for time, num_days in times.items():
rv = client.get(f'/{Endpoint.search}?q=test&' + time)
rv = client.post(f'/{Endpoint.search}', data=dict(q='test :' + time))
result_divs = get_search_results(rv.data)
current_date = datetime.now()
@ -117,42 +112,3 @@ def test_recent_results(client):
assert (current_date - date).days <= (num_days + 5)
except ParserError:
pass
def test_leading_slash_search(client):
# Ensure searches with a leading slash are interpreted
# correctly as queries and not endpoints
q = '/test'
rv = client.get(f'/{Endpoint.search}?q={q}')
assert rv._status_code == 200
soup = Filter(
user_key=generate_key(),
config=Config(**demo_config),
query=q
).clean(BeautifulSoup(rv.data, 'html.parser'))
for link in soup.find_all('a', href=True):
if 'start=' not in link['href']:
continue
assert link['href'].startswith(f'{Endpoint.search}')
def test_site_alt_prefix_skip():
# Ensure prefixes are skipped correctly for site alts
# default silte_alts (farside.link)
assert results.get_site_alt(link = 'https://www.reddit.com') == 'https://farside.link/libreddit'
assert results.get_site_alt(link = 'https://www.twitter.com') == 'https://farside.link/nitter'
assert results.get_site_alt(link = 'https://www.youtube.com') == 'https://farside.link/invidious'
test_site_alts = {
'reddit.com': 'reddit.endswithmobile.domain',
'twitter.com': 'https://twitter.endswithm.domain',
'youtube.com': 'http://yt.endswithwww.domain',
}
# Domains with part of SKIP_PREFIX in them
assert results.get_site_alt(link = 'https://www.reddit.com', site_alts = test_site_alts) == 'https://reddit.endswithmobile.domain'
assert results.get_site_alt(link = 'https://www.twitter.com', site_alts = test_site_alts) == 'https://twitter.endswithm.domain'
assert results.get_site_alt(link = 'https://www.youtube.com', site_alts = test_site_alts) == 'http://yt.endswithwww.domain'

@ -17,15 +17,8 @@ def test_search(client):
def test_feeling_lucky(client):
# Bang at beginning of query
rv = client.get(f'/{Endpoint.search}?q=!%20wikipedia')
assert rv._status_code == 303
assert rv.headers.get('Location').startswith('https://www.wikipedia.org')
# Move bang to end of query
rv = client.get(f'/{Endpoint.search}?q=github%20!')
rv = client.get(f'/{Endpoint.search}?q=!%20test')
assert rv._status_code == 303
assert rv.headers.get('Location').startswith('https://github.com')
def test_ddg_bang(client):
@ -44,24 +37,17 @@ def test_ddg_bang(client):
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('https://www.reddit.com')
# Ensure bang is case insensitive
rv = client.get(f'/{Endpoint.search}?q=!GH%20whoogle')
# Move '!' to end of the bang
rv = client.get(f'/{Endpoint.search}?q=gitlab%20w!')
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('https://github.com')
assert rv.headers.get('Location').startswith('https://en.wikipedia.org')
# Ensure bang without a query still redirects to the result
rv = client.get(f'/{Endpoint.search}?q=!gh')
# Ensure bang is case insensitive
rv = client.get(f'/{Endpoint.search}?q=!GH%20whoogle')
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('https://github.com')
def test_custom_bang(client):
# Bang at beginning of query
rv = client.get(f'/{Endpoint.search}?q=!i%20whoogle')
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('search?q=')
def test_config(client):
rv = client.post(f'/{Endpoint.config}', data=demo_config)
assert rv._status_code == 302

@ -12,11 +12,9 @@
#WHOOGLE_ALT_IG=farside.link/bibliogram/u
#WHOOGLE_ALT_RD=farside.link/libreddit
#WHOOGLE_ALT_MD=farside.link/scribe
#WHOOGLE_ALT_TL=farside.link/lingva
#WHOOGLE_ALT_IMG=farside.link/rimgo
#WHOOGLE_ALT_WIKI=farside.link/wikiless
#WHOOGLE_ALT_IMDB=farside.link/libremdb
#WHOOGLE_ALT_QUORA=farside.link/quetre
#WHOOGLE_ALT_TL=lingva.ml
#WHOOGLE_ALT_IMG=imgin.voidnet.tech
#WHOOGLE_ALT_WIKI=wikiless.org
#WHOOGLE_USER=""
#WHOOGLE_PASS=""
#WHOOGLE_PROXY_USER=""
@ -26,9 +24,6 @@
#WHOOGLE_CSP=1
#HTTPS_ONLY=1
# The URL prefix to use for the whoogle instance (i.e. "/whoogle")
#WHOOGLE_URL_PREFIX=""
# Restrict results to only those near a particular city
#WHOOGLE_CONFIG_NEAR=denver
@ -85,9 +80,3 @@
# Set custom CSS styling/theming
#WHOOGLE_CONFIG_STYLE=":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }"
# Enable preferences encryption (requires key)
#WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED=1
# Set Key to encode config in url
#WHOOGLE_CONFIG_PREFERENCES_KEY="NEEDS_TO_BE_MODIFIED"
Loading…
Cancel
Save