From 991fe6d910f0d191f7e0aca3af8724b55f074fb9 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 4 Feb 2023 16:36:16 -0700 Subject: [PATCH] Exclude subdomain in Medium->Scribe redirects Medium redirects needed further cleanup to account for instances where a link contains a subdomain that would not make sense in a Farside redirect link. Fixes #947 --- app/filter.py | 9 ++++----- app/utils/results.py | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/app/filter.py b/app/filter.py index 1fee1dc..87b8404 100644 --- a/app/filter.py +++ b/app/filter.py @@ -459,7 +459,7 @@ class Filter: if any(url in link_netloc for url in unsupported_g_pages): # FIXME: The "Shopping" tab requires further filtering (see #136) # Temporarily removing all links to that tab for now. - + # Replaces the /url google unsupported link to the direct url link['href'] = link_netloc parent = link.parent @@ -588,10 +588,9 @@ class Filter: # replaced (i.e. 'philomedium.com' should stay as it is). if 'medium.com' in link_str: if link_str.startswith('medium.com') or '.medium.com' in link_str: - new_desc.string = link_str.replace( - 'medium.com', 'farside.link/scribe') - else: - new_desc.string = link_str + link_str = 'farside.link/scribe' + link_str[ + link_str.find('medium.com') + len('medium.com'):] + new_desc.string = link_str else: new_desc.string = link_str.replace(site, alt) diff --git a/app/utils/results.py b/app/utils/results.py index 640a93b..4ae8c9f 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -186,6 +186,8 @@ def get_site_alt(link: str) -> str: if 'wikipedia' in hostname and len(subdomain) == 2: hostname = f'{subdomain}.{hostname}' params = f'?lang={subdomain}' + elif 'medium' in hostname and len(subdomain) > 0: + hostname = f'{subdomain}.{hostname}' parsed_alt = urlparse.urlparse(SITE_ALTS[site_key]) link = link.replace(hostname, SITE_ALTS[site_key]) + params