2024-03-11 13:06:26 +00:00
|
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
# pylint: disable=missing-module-docstring
|
2015-06-09 14:16:07 +00:00
|
|
|
|
|
|
|
import re
|
2020-08-06 15:42:46 +00:00
|
|
|
from urllib.parse import urlunparse, parse_qsl, urlencode
|
2015-06-09 14:16:07 +00:00
|
|
|
|
2024-03-11 13:06:26 +00:00
|
|
|
from flask_babel import gettext
|
|
|
|
|
2021-12-27 08:26:22 +00:00
|
|
|
regexes = {
|
|
|
|
re.compile(r'utm_[^&]+'),
|
|
|
|
re.compile(r'(wkey|wemail)[^&]*'),
|
|
|
|
re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'),
|
|
|
|
re.compile(r'&$'),
|
|
|
|
}
|
2015-06-09 14:16:07 +00:00
|
|
|
|
|
|
|
name = gettext('Tracker URL remover')
|
|
|
|
description = gettext('Remove trackers arguments from the returned URL')
|
|
|
|
default_on = True
|
2017-02-12 14:06:01 +00:00
|
|
|
preference_section = 'privacy'
|
2015-06-09 14:16:07 +00:00
|
|
|
|
|
|
|
|
2024-03-11 13:06:26 +00:00
|
|
|
def on_result(_request, _search, result):
|
2019-09-23 15:14:32 +00:00
|
|
|
if 'parsed_url' not in result:
|
|
|
|
return True
|
|
|
|
|
2016-10-22 12:01:53 +00:00
|
|
|
query = result['parsed_url'].query
|
2015-06-09 14:16:07 +00:00
|
|
|
|
2015-06-18 12:27:15 +00:00
|
|
|
if query == "":
|
2015-06-15 18:34:02 +00:00
|
|
|
return True
|
2019-10-14 12:58:20 +00:00
|
|
|
parsed_query = parse_qsl(query)
|
|
|
|
|
2019-10-23 06:17:00 +00:00
|
|
|
changes = 0
|
2019-10-14 13:09:39 +00:00
|
|
|
for i, (param_name, _) in enumerate(list(parsed_query)):
|
2019-10-14 12:58:20 +00:00
|
|
|
for reg in regexes:
|
|
|
|
if reg.match(param_name):
|
2019-10-23 06:17:00 +00:00
|
|
|
parsed_query.pop(i - changes)
|
|
|
|
changes += 1
|
|
|
|
result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query))
|
|
|
|
result['url'] = urlunparse(result['parsed_url'])
|
2019-10-14 12:58:20 +00:00
|
|
|
break
|
|
|
|
|
2015-06-09 14:16:07 +00:00
|
|
|
return True
|