mirror of
https://github.com/benbusby/whoogle-search
synced 2024-11-01 03:20:30 +00:00
0c0ebb8917
The implementation of POST search support comes with a few benefits. The most apparent is the avoidance of search queries appearing in web server logs -- instead of the prior GET approach (i.e. /search?q=my+search+query), using POST requests with the query stored in the request body creates logs that simply appear as "/search". Since a lot of relative links are generated in the results page, I came up with a way to generate a unique key at run time that is used to encrypt any query strings before sending to the user. This benefits both regular text queries as well as fetching of image links and means that web logs will only show an encrypted string where a link or query string might slip through. Unfortunately, GET search requests still need to be supported, as it doesn't seem that Firefox (on iOS) supports loading search engines by their opensearch.xml file, but instead relies on manual entry of a search query string. Once this is updated, I'll probably remove GET request search support.
82 lines
2.3 KiB
Python
82 lines
2.3 KiB
Python
from app import rhyme
|
|
from io import BytesIO
|
|
import pycurl
|
|
import urllib.parse as urlparse
|
|
|
|
# Base search url
|
|
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
|
|
|
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
|
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
|
|
|
# Valid query params
|
|
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
|
|
|
|
|
|
def gen_user_agent(normal_ua):
|
|
is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
|
|
|
|
mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla')
|
|
firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox')
|
|
linux = rhyme.get_rhyme('Lin') + 'ux'
|
|
|
|
if is_mobile:
|
|
return MOBILE_UA.format(mozilla, firefox)
|
|
else:
|
|
return DESKTOP_UA.format(mozilla, linux, firefox)
|
|
|
|
|
|
def gen_query(q, args, near_city=None):
|
|
# Use :past(hour/day/week/month/year) if available
|
|
# example search "new restaurants :past month"
|
|
tbs = ''
|
|
if ':past' in q:
|
|
time_range = str.strip(q.split(':past', 1)[-1])
|
|
tbs = '&tbs=qdr:' + str.lower(time_range[0])
|
|
|
|
# Ensure search query is parsable
|
|
q = urlparse.quote(q)
|
|
|
|
# Pass along type of results (news, images, books, etc)
|
|
tbm = ''
|
|
if 'tbm' in args:
|
|
tbm = '&tbm=' + args.get('tbm')
|
|
|
|
# Get results page start value (10 per page, ie page 2 start val = 20)
|
|
start = ''
|
|
if 'start' in args:
|
|
start = '&start=' + args.get('start')
|
|
|
|
# Search for results near a particular city, if available
|
|
near = ''
|
|
if near_city is not None:
|
|
near = '&near=' + urlparse.quote(near_city)
|
|
|
|
return q + tbs + tbm + start + near
|
|
|
|
|
|
class Request:
|
|
def __init__(self, normal_ua):
|
|
self.modified_user_agent = gen_user_agent(normal_ua)
|
|
|
|
def __getitem__(self, name):
|
|
return getattr(self, name)
|
|
|
|
def send(self, base_url=SEARCH_URL, query='', return_bytes=False):
|
|
response_header = []
|
|
|
|
b_obj = BytesIO()
|
|
crl = pycurl.Curl()
|
|
crl.setopt(crl.URL, base_url + query)
|
|
crl.setopt(crl.USERAGENT, self.modified_user_agent)
|
|
crl.setopt(crl.WRITEDATA, b_obj)
|
|
crl.setopt(crl.HEADERFUNCTION, response_header.append)
|
|
crl.setopt(pycurl.FOLLOWLOCATION, 1)
|
|
crl.perform()
|
|
crl.close()
|
|
|
|
if return_bytes:
|
|
return b_obj.getvalue()
|
|
else:
|
|
return b_obj.getvalue().decode('utf-8', 'ignore')
|