diff --git a/app/utils/results.py b/app/utils/results.py index 2f75646..dbd60cc 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -144,12 +144,26 @@ def get_first_link(soup: BeautifulSoup) -> str: str: A str link to the first result """ + first_link = '' + orig_details = [] + + # Temporarily remove details so we don't grab those links + for details in soup.find_all('details'): + temp_details = soup.new_tag('removed_details') + orig_details.append(details.replace_with(temp_details)) + # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): # Return the first search result URL if a['href'].startswith('http://') or a['href'].startswith('https://'): - return a['href'] - return '' + first_link = a['href'] + break + + # Add the details back + for orig_detail, details in zip(orig_details, soup.find_all('removed_details')): + details.replace_with(orig_detail) + + return first_link def get_site_alt(link: str, site_alts: dict = SITE_ALTS) -> str: diff --git a/app/utils/search.py b/app/utils/search.py index 59f630d..ed3b0f6 100644 --- a/app/utils/search.py +++ b/app/utils/search.py @@ -102,9 +102,15 @@ class Search: except InvalidToken: pass - # Strip leading '! ' for "feeling lucky" queries - self.feeling_lucky = q.startswith('! ') - self.query = q[2:] if self.feeling_lucky else q + # Strip '!' for "feeling lucky" queries + if match := re.search("(^|\s)!($|\s)", q): + self.feeling_lucky = True + start, end = match.span() + self.query = " ".join([seg for seg in [q[:start], q[end:]] if seg]) + else: + self.feeling_lucky = False + self.query = q + # Check for possible widgets self.widget = "ip" if re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" + "($|( *[^a-z0-9] *(((addres|address|adres|" +