From ef98d85dc54aa416a84c40c5cc292fb432851188 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 3 Jun 2022 14:03:57 -0600 Subject: [PATCH] Ensure searches with a leading slash are treated as queries A user reported a bug where searches with a leading slash (in this case: "/e/OS apps" were interpreted as a Google specific link when clicking the next page of results. This was due to the behavior that Google's search results exhibit, where internal links for pages like support.google.com are delivered with params like "?q=/support" rather than a direct link. This fixes that scenario by checking the "q" param value against the user's original query to ensure they don't match before assuming that the result is intended as a redirect. Fixes #776 --- app/filter.py | 4 +++- app/utils/search.py | 3 ++- test/test_results.py | 20 ++++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/app/filter.py b/app/filter.py index 457fdc1..98360e5 100644 --- a/app/filter.py +++ b/app/filter.py @@ -89,11 +89,13 @@ class Filter: config: Config, root_url='', page_url='', + query='', mobile=False) -> None: self.config = config self.mobile = mobile self.user_key = user_key self.page_url = page_url + self.query = query self.main_divs = ResultSet('') self._elements = 0 self._av = set() @@ -429,7 +431,7 @@ class Filter: result_link = urlparse.urlparse(href) q = extract_q(result_link.query, href) - if q.startswith('/'): + if q.startswith('/') and q not in self.query: # Internal google links (i.e. mail, maps, etc) should still # be forwarded to Google link['href'] = 'https://google.com' + q diff --git a/app/utils/search.py b/app/utils/search.py index e7571db..7815d24 100644 --- a/app/utils/search.py +++ b/app/utils/search.py @@ -118,7 +118,8 @@ class Search: content_filter = Filter(self.session_key, root_url=self.request.url_root, mobile=mobile, - config=self.config) + config=self.config, + query=self.query) full_query = gen_query(self.query, self.request_params, self.config) diff --git a/test/test_results.py b/test/test_results.py index 8b16b0f..a8e5cd0 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -122,3 +122,23 @@ def test_recent_results(client): assert (current_date - date).days <= (num_days + 5) except ParserError: pass + + +def test_leading_slash_search(client): + # Ensure searches with a leading slash are interpreted + # correctly as queries and not endpoints + q = '/test' + rv = client.get(f'/{Endpoint.search}?q={q}') + assert rv._status_code == 200 + + soup = Filter( + user_key=generate_user_key(), + config=Config(**demo_config), + query=q + ).clean(BeautifulSoup(rv.data, 'html.parser')) + + for link in soup.find_all('a', href=True): + if 'start=' not in link['href']: + continue + + assert link['href'].startswith(f'{Endpoint.search}')