diff --git a/.gitignore b/.gitignore index 82de859..469cf3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ venv/ +.idea/ __pycache__/ *.pyc *.pem *.xml -run.sh +config.json diff --git a/app/routes.py b/app/routes.py index 14b2d30..81c98ca 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,40 +1,43 @@ from app import app from bs4 import BeautifulSoup from flask import request, redirect, Response, render_template +import json import os import pycurl +import rhyme import re -from .url import url_parse import urllib.parse as urlparse from urllib.parse import parse_qs from io import BytesIO -MOBILE_UA = os.environ.get('MOZ') + '/5.0 (Android 4.20; Mobile; rv:54.0) Gecko/54.0 ' + os.environ.get('FF') + '/54.0' -DESKTOP_UA = os.environ.get('MOZ') + '/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Mobile ' + os.environ.get('FF') + '/59.0' +# Get Mozilla Firefox rhyme (important) and form a new user agent +mozilla = rhyme.get_rhyme('Mo') + 'zilla' +firefox = rhyme.get_rhyme('Fire') + 'fox' +MOBILE_UA = mozilla + '/5.0 (Android 4.20; Mobile; rv:54.0) Gecko/54.0 ' + firefox + '/59.0' +DESKTOP_UA = mozilla + '/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Mobile ' + firefox + '/59.0' + +# Base search url SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' +# Optional nojs tag - opens links in a contained window with all js removed +# (can be useful for achieving nojs on mobile) nojs = int(os.environ.get('NOJS')) +config = json.load(open('config.json')) + def get_ua(user_agent): return MOBILE_UA if ('Android' in user_agent or 'iPhone' in user_agent) else DESKTOP_UA -def send_request(url, ua): +def send_request(curl_url, ua): request_header = [] - # Update as an optional param - # Todo: this doesn't seem to work - ip = '64.22.92.48' - request_header.append('CLIENT-IP: ' + ip) - request_header.append('X-FORWARDED-FOR: ' + ip) - b_obj = BytesIO() crl = pycurl.Curl() - crl.setopt(crl.URL, url) + crl.setopt(crl.URL, curl_url) crl.setopt(crl.USERAGENT, ua) - crl.setopt(crl.HTTPHEADER, request_header) crl.setopt(crl.WRITEDATA, b_obj) crl.perform() crl.close() @@ -52,34 +55,44 @@ def search(): q = request.args.get('q') if q is None or len(q) <= 0: return render_template('error.html') + q = urlparse.quote(q) + # Pass along type of results (news, images, books, etc) tbm = '' if 'tbm' in request.args: tbm = '&tbm=' + request.args.get('tbm') + # Get results page start value (10 per page, ie page 2 start val = 20) start = '' if 'start' in request.args: start = '&start=' + request.args.get('start') - # Change to a config setting - near = '&near=boulder' - if 'near' in request.args: - near = '&near=' + request.args.get('near') + # Grab city from config, if available + near = '' + if 'near' in config: + near = '&near=' + config['near'] user_agent = request.headers.get('User-Agent') - full_query = url_parse(q) + tbm + start + near + full_query = q + tbm + start + near + # Aesthetic only re-skinning get_body = send_request(SEARCH_URL + full_query, get_ua(user_agent)) get_body = get_body.replace('>G<', '>Sh<') pattern = re.compile('4285f4|ea4335|fbcc05|34a853|fbbc05', re.IGNORECASE) - get_body = pattern.sub('0000ff', get_body) + get_body = pattern.sub('685e79', get_body) soup = BeautifulSoup(get_body, 'html.parser') - ad_divs = soup.find('div', {'id':'main'}).findAll('div', {'class':'ZINbbc'}, recursive=False) + # Remove all ads (TODO: Ad specific div class may change over time, look into a more generic method) + ad_divs = soup.find('div', {'id': 'main'}).findAll('div', {'class': 'ZINbbc'}, recursive=False) for div in ad_divs: div.decompose() + # Remove unnecessary button(s) + for button in soup.find_all('button'): + button.decompose() + + # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): href = a['href'] if 'url?q=' in href: @@ -87,8 +100,10 @@ def search(): href = parse_qs(href.query)['q'][0] if nojs: a['href'] = '/window?location=' + href - #else: + # else: # Automatically go to reader mode in ff? Not sure if possible # a['href'] = 'about:reader?url=' + href + + # Ensure no extra scripts passed through try: for script in soup("script"): script.decompose() diff --git a/app/static/css/main.css b/app/static/css/main.css index 120b572..f3472bd 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -29,26 +29,26 @@ body { #search-bar { width: 100%; - border: 3px solid #0000FF; + border: 3px solid #685e79; padding: 5px; height: 40px; border-radius: 10px; outline: none; font-size: 24px; - color: #0000FF; + color: #685e79; border-radius: 10px 10px 0 0; max-width: 600px; } #search-bar:focus{ - color: #0000FF; + color: #685e79; } #search-submit { width: 100%; height: 40px; - border: 1px solid #0000FF; - background: #0000FF; + border: 1px solid #685e79; + background: #685e79; text-align: center; color: #fff; border-radius: 10px; diff --git a/app/static/img/logo.png b/app/static/img/logo.png index 9c1701b..1e9dcd0 100644 Binary files a/app/static/img/logo.png and b/app/static/img/logo.png differ diff --git a/app/url.py b/app/url.py deleted file mode 100644 index e896dcf..0000000 --- a/app/url.py +++ /dev/null @@ -1,4 +0,0 @@ - - -def url_parse(str): - return str.replace(' ', '+') diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..2878c2a --- /dev/null +++ b/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +# Set NOJS mode to false if unavailable +if [[ -z "$NOJS" ]]; then + export NOJS=0 +fi + +# Create config json if it doesn't exist +if [[ -f $SCRIPT_DIR/config.json ]]; then + echo "{}" > $SCRIPT_DIR/config.json +fi + +pkill flask + +# TODO: Set up the following for running over https +#--cert=./app/cert.pem --key=./app/key.pem +$SCRIPT_DIR/venv/bin/flask run