Bold search query in results (#487)

This modifies the search result page by bold-ing all appearances of any word in the original query. If portions of the query are in quotes (i.e. "ice cream"), only exact matches of the sequence of words will be made bold. Co-authored-by: Ben Busby <noreply+git@benbusby.com>
3 years ago · 2c9cf3ecc6
parent 90441b2668
commit 2c9cf3ecc6
2 changed files with 43 additions and 1 deletions
--- a/app/routes.py
+++ b/app/routes.py
@ -14,6 +14,7 @@ from app.request import Request, TorError
 from app.utils.bangs import resolve_bang
 from app.utils.misc import read_config_bool
 from app.utils.results import add_ip_card
+from app.utils.results import bold_search_terms
 from app.utils.search import *
 from app.utils.session import generate_user_key, valid_user_session
 from bs4 import BeautifulSoup as bsoup
@ -250,7 +251,7 @@ def search():

    # Return 503 if temporarily blocked by captcha
    resp_code = 503 if has_captcha(str(response)) else 200
-
+    response = bold_search_terms(response, query)
    # Feature to display IP address
    if search_util.check_kw_ip():
        html_soup = bsoup(response, "html.parser")
--- a/app/utils/results.py
+++ b/app/utils/results.py
@ -2,6 +2,8 @@ from bs4 import BeautifulSoup
 import os
 import urllib.parse as urlparse
 from urllib.parse import parse_qs
+import re
+from bs4 import NavigableString


 SKIP_ARGS = ['ref_src', 'utm']
@ -34,6 +36,45 @@ SITE_ALTS = {
 }


+def bold_search_terms(response: str, query: str) -> BeautifulSoup:
+    """Wraps all search terms in bold tags (<b>). If any terms are wrapped
+    in quotes, only that exact phrase will be made bold.
+
+    Args:
+        response: The initial response body for the query
+        query: The original search query
+
+    Returns:
+        BeautifulSoup: modified soup object with bold items
+    """
+    response = BeautifulSoup(response, 'html.parser')
+
+    def replace_any_case(element: NavigableString, target_word: str) -> None:
+        # Replace all instances of the word, but maintaining the same case in
+        # the replacement
+        element.replace_with(
+            element.replace(
+                target_word.lower(), f'<b>{target_word.lower()}</b>'
+            ).replace(
+                target_word.capitalize(), f'<b>{target_word.capitalize()}</b>'
+            ).replace(
+                target_word.title(), f'<b>{target_word.title()}</b>'
+            ).replace(
+                target_word.upper(), f'<b>{target_word.upper()}</b>'
+            )
+        )
+
+    # Split all words out of query, grouping the ones wrapped in quotes
+    for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query):
+        word = re.sub(r'[^A-Za-z0-9 ]+', '', word)
+        target = response.find_all(
+            text=re.compile(r'' + re.escape(word), re.I))
+        for nav_str in target:
+            replace_any_case(nav_str, word)
+
+    return response
+
+
 def has_ad_content(element: str) -> bool:
    """Inspects an HTML element for ad related content