2020-04-15 23:41:53 +00:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from app.filter import Filter
|
2020-07-26 17:53:59 +00:00
|
|
|
from app.utils.session_utils import generate_user_keys
|
2020-04-15 23:41:53 +00:00
|
|
|
from datetime import datetime
|
|
|
|
from dateutil.parser import *
|
|
|
|
|
|
|
|
|
|
|
|
def get_search_results(data):
|
2020-06-02 18:54:47 +00:00
|
|
|
secret_key = generate_user_keys()
|
2020-12-17 21:06:47 +00:00
|
|
|
soup = Filter(user_keys=secret_key).clean(
|
|
|
|
BeautifulSoup(data, 'html.parser'))
|
2020-04-15 23:41:53 +00:00
|
|
|
|
|
|
|
main_divs = soup.find('div', {'id': 'main'})
|
|
|
|
assert len(main_divs) > 1
|
|
|
|
|
|
|
|
result_divs = []
|
|
|
|
for div in main_divs:
|
|
|
|
# Result divs should only have 1 inner div
|
2020-12-17 21:06:47 +00:00
|
|
|
if (len(list(div.children)) != 1
|
|
|
|
or not div.findChild()
|
|
|
|
or 'div' not in div.findChild().name):
|
2020-04-15 23:41:53 +00:00
|
|
|
continue
|
|
|
|
|
|
|
|
result_divs.append(div)
|
|
|
|
|
|
|
|
return result_divs
|
|
|
|
|
|
|
|
|
2020-04-29 00:59:33 +00:00
|
|
|
def test_get_results(client):
|
2020-04-15 23:41:53 +00:00
|
|
|
rv = client.get('/search?q=test')
|
|
|
|
assert rv._status_code == 200
|
|
|
|
|
2020-04-15 23:54:38 +00:00
|
|
|
# Depending on the search, there can be more
|
|
|
|
# than 10 result divs
|
|
|
|
assert len(get_search_results(rv.data)) >= 10
|
|
|
|
assert len(get_search_results(rv.data)) <= 15
|
2020-04-15 23:41:53 +00:00
|
|
|
|
|
|
|
|
2020-04-29 00:59:33 +00:00
|
|
|
def test_post_results(client):
|
|
|
|
rv = client.post('/search', data=dict(q='test'))
|
|
|
|
assert rv._status_code == 200
|
|
|
|
|
|
|
|
# Depending on the search, there can be more
|
|
|
|
# than 10 result divs
|
|
|
|
assert len(get_search_results(rv.data)) >= 10
|
|
|
|
assert len(get_search_results(rv.data)) <= 15
|
|
|
|
|
|
|
|
|
2020-12-12 00:21:32 +00:00
|
|
|
# TODO: Unit test the site alt method instead -- the results returned
|
|
|
|
# are too unreliable for this test in particular.
|
|
|
|
# def test_site_alts(client):
|
|
|
|
# rv = client.post('/search', data=dict(q='twitter official account'))
|
|
|
|
# assert b'twitter.com/Twitter' in rv.data
|
|
|
|
|
|
|
|
# client.post('/config', data=dict(alts=True))
|
|
|
|
# assert json.loads(client.get('/config').data)['alts']
|
|
|
|
|
|
|
|
# rv = client.post('/search', data=dict(q='twitter official account'))
|
|
|
|
# assert b'twitter.com/Twitter' not in rv.data
|
|
|
|
# assert b'nitter.net/Twitter' in rv.data
|
2020-12-05 22:01:21 +00:00
|
|
|
|
|
|
|
|
2020-04-15 23:41:53 +00:00
|
|
|
def test_recent_results(client):
|
|
|
|
times = {
|
2020-04-29 00:59:33 +00:00
|
|
|
'past year': 365,
|
|
|
|
'past month': 31,
|
|
|
|
'past week': 7
|
2020-04-15 23:41:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for time, num_days in times.items():
|
2020-04-29 00:59:33 +00:00
|
|
|
rv = client.post('/search', data=dict(q='test :' + time))
|
2020-04-15 23:41:53 +00:00
|
|
|
result_divs = get_search_results(rv.data)
|
|
|
|
|
|
|
|
current_date = datetime.now()
|
2020-06-28 16:52:53 +00:00
|
|
|
for div in [_ for _ in result_divs if _.find('span')]:
|
2020-04-15 23:41:53 +00:00
|
|
|
date_span = div.find('span').decode_contents()
|
2020-04-27 00:11:02 +00:00
|
|
|
if not date_span or len(date_span) > 15 or len(date_span) < 7:
|
2020-04-15 23:41:53 +00:00
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
|
|
date = parse(date_span)
|
2020-12-17 21:06:47 +00:00
|
|
|
# Date can have a little bit of wiggle room
|
|
|
|
assert (current_date - date).days <= (num_days + 5)
|
2020-04-15 23:41:53 +00:00
|
|
|
except ParserError:
|
2020-05-23 20:27:23 +00:00
|
|
|
pass
|