whoogle-search/app/utils/bangs.py
Ben Busby 2395bb7a6a
Remove version from DDG bangs url
Including the version portion of the URL now redirects to search results
for the name of the bang file, rather than returning the bang file
itself. Removing the version from the URL returns the correct bang file.
2024-03-06 09:35:48 -07:00

91 lines
2.5 KiB
Python

import json
import requests
import urllib.parse as urlparse
DDG_BANGS = 'https://duckduckgo.com/bang.js'
def gen_bangs_json(bangs_file: str) -> None:
"""Generates a json file from the DDG bangs list
Args:
bangs_file: The str path to the new DDG bangs json file
Returns:
None
"""
try:
# Request full list from DDG
r = requests.get(DDG_BANGS)
r.raise_for_status()
except requests.exceptions.HTTPError as err:
raise SystemExit(err)
# Convert to json
data = json.loads(r.text)
# Set up a json object (with better formatting) for all available bangs
bangs_data = {}
for row in data:
bang_command = '!' + row['t']
bangs_data[bang_command] = {
'url': row['u'].replace('{{{s}}}', '{}'),
'suggestion': bang_command + ' (' + row['s'] + ')'
}
json.dump(bangs_data, open(bangs_file, 'w'))
print('* Finished creating ddg bangs json')
def resolve_bang(query: str, bangs_dict: dict) -> str:
"""Transform's a user's query to a bang search, if an operator is found
Args:
query: The search query
bangs_dict: The dict of available bang operators, with corresponding
format string search URLs
(i.e. "!w": "https://en.wikipedia.org...?search={}")
Returns:
str: A formatted redirect for a bang search, or an empty str if there
wasn't a match or didn't contain a bang operator
"""
#if ! not in query simply return (speed up processing)
if '!' not in query:
return ''
split_query = query.strip().split(' ')
# look for operator in query if one is found, list operator should be of
# length 1, operator should not be case-sensitive here to remove it later
operator = [
word
for word in split_query
if word.lower() in bangs_dict
]
if len(operator) == 1:
# get operator
operator = operator[0]
# removes operator from query
split_query.remove(operator)
# rebuild the query string
bang_query = ' '.join(split_query).strip()
# Check if operator is a key in bangs and get bang if exists
bang = bangs_dict.get(operator.lower(), None)
if bang:
bang_url = bang['url']
if bang_query:
return bang_url.replace('{}', bang_query, 1)
else:
parsed_url = urlparse.urlparse(bang_url)
return f'{parsed_url.scheme}://{parsed_url.netloc}'
return ''