From f18bf07ac397dd2b1585d32ace3f67409347065b Mon Sep 17 00:00:00 2001
From: David Shen <2984600+pantherman594@users.noreply.github.com>
Date: Fri, 19 Apr 2024 14:40:06 -0400
Subject: [PATCH] Fix feeling lucky (#1130)

* Fix feeling lucky, fall through to display results if doesn't work

* Allow lucky bang anywhere

* Update feeling lucky test
---
 app/utils/results.py | 20 +++++++++++++++---
 app/utils/search.py  | 49 ++++++++++++++++++++++++++------------------
 test/test_routes.py  |  9 +++++++-
 3 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/app/utils/results.py b/app/utils/results.py
index c78f866..dbd60cc 100644
--- a/app/utils/results.py
+++ b/app/utils/results.py
@@ -144,12 +144,26 @@ def get_first_link(soup: BeautifulSoup) -> str:
         str: A str link to the first result
 
     """
+    first_link = ''
+    orig_details = []
+
+    # Temporarily remove details so we don't grab those links
+    for details in soup.find_all('details'):
+        temp_details = soup.new_tag('removed_details')
+        orig_details.append(details.replace_with(temp_details))
+
     # Replace hrefs with only the intended destination (no "utm" type tags)
     for a in soup.find_all('a', href=True):
         # Return the first search result URL
-        if 'url?q=' in a['href']:
-            return filter_link_args(a['href'])
-    return ''
+        if a['href'].startswith('http://') or a['href'].startswith('https://'):
+            first_link = a['href']
+            break
+
+    # Add the details back
+    for orig_detail, details in zip(orig_details, soup.find_all('removed_details')):
+        details.replace_with(orig_detail)
+
+    return first_link
 
 
 def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str:
diff --git a/app/utils/search.py b/app/utils/search.py
index 6e2d62d..ed3b0f6 100644
--- a/app/utils/search.py
+++ b/app/utils/search.py
@@ -102,9 +102,15 @@ class Search:
             except InvalidToken:
                 pass
 
-        # Strip leading '! ' for "feeling lucky" queries
-        self.feeling_lucky = q.startswith('! ')
-        self.query = q[2:] if self.feeling_lucky else q
+        # Strip '!' for "feeling lucky" queries
+        if match := re.search("(^|\s)!($|\s)", q):
+            self.feeling_lucky = True
+            start, end = match.span()
+            self.query = " ".join([seg for seg in [q[:start], q[end:]] if seg])
+        else:
+            self.feeling_lucky = False
+            self.query = q
+
         # Check for possible widgets
         self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" +
                         "($|( *[^a-z0-9] *(((addres|address|adres|" +
@@ -161,22 +167,25 @@ class Search:
         if g.user_request.tor_valid:
             html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser'))
 
+        formatted_results = content_filter.clean(html_soup)
         if self.feeling_lucky:
-            return get_first_link(html_soup)
-        else:
-            formatted_results = content_filter.clean(html_soup)
-
-            # Append user config to all search links, if available
-            param_str = ''.join('&{}={}'.format(k, v)
-                                for k, v in
-                                self.request_params.to_dict(flat=True).items()
-                                if self.config.is_safe_key(k))
-            for link in formatted_results.find_all('a', href=True):
-                link['rel'] = "nofollow noopener noreferrer"
-                if 'search?' not in link['href'] or link['href'].index(
-                        'search?') > 1:
-                    continue
-                link['href'] += param_str
-
-            return str(formatted_results)
+            if lucky_link := get_first_link(formatted_results):
+                return lucky_link
+
+            # Fall through to regular search if unable to find link
+            self.feeling_lucky = False
+
+        # Append user config to all search links, if available
+        param_str = ''.join('&{}={}'.format(k, v)
+                            for k, v in
+                            self.request_params.to_dict(flat=True).items()
+                            if self.config.is_safe_key(k))
+        for link in formatted_results.find_all('a', href=True):
+            link['rel'] = "nofollow noopener noreferrer"
+            if 'search?' not in link['href'] or link['href'].index(
+                    'search?') > 1:
+                continue
+            link['href'] += param_str
+
+        return str(formatted_results)
 
diff --git a/test/test_routes.py b/test/test_routes.py
index 6409f2d..1f64827 100644
--- a/test/test_routes.py
+++ b/test/test_routes.py
@@ -17,8 +17,15 @@ def test_search(client):
 
 
 def test_feeling_lucky(client):
-    rv = client.get(f'/{Endpoint.search}?q=!%20test')
+    # Bang at beginning of query
+    rv = client.get(f'/{Endpoint.search}?q=!%20wikipedia')
     assert rv._status_code == 303
+    assert rv.headers.get('Location').startswith('https://www.wikipedia.org')
+
+    # Move bang to end of query
+    rv = client.get(f'/{Endpoint.search}?q=github%20!')
+    assert rv._status_code == 303
+    assert rv.headers.get('Location').startswith('https://github.com')
 
 
 def test_ddg_bang(client):