From 3dc6d143777e903fa7cab649de661be1394ec8dc Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Thu, 8 Dec 2022 10:54:21 -0700 Subject: [PATCH] Only extract domain+ext when using site alts Parent sites using a 'www' subdomain or something similar were not redirecting properly. This updates the hostname check to only validate against the primary domain, except for Wikipedia since the subdomain is used for interface translation in that case. Fixes #901 --- app/utils/results.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/app/utils/results.py b/app/utils/results.py index 4a330fd..f8944bc 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -134,7 +134,12 @@ def get_site_alt(link: str) -> str: # Need to replace full hostname with alternative to encapsulate # subdomains as well parsed_link = urlparse.urlparse(link) - hostname = parsed_link.hostname + + # Extract subdomain separately from the domain+tld. The subdomain + # is used for wikiless translations. + split_host = parsed_link.netloc.split('.') + subdomain = split_host[0] if len(split_host) > 2 else '' + hostname = '.'.join(split_host[-2:]) # The full scheme + hostname is used when comparing against the list of # available alternative services, due to how Medium links are constructed. @@ -151,10 +156,9 @@ def get_site_alt(link: str) -> str: # a 2-char language code) to be passed as a URL param to Wikiless # in order to preserve the language setting. params = '' - if 'wikipedia' in hostname: - subdomain = hostname.split('.')[0] - if len(subdomain) == 2: - params = f'?lang={subdomain}' + if 'wikipedia' in hostname and len(subdomain) == 2: + hostname = f'{subdomain}.{hostname}' + params = f'?lang={subdomain}' parsed_alt = urlparse.urlparse(SITE_ALTS[site_key]) link = link.replace(hostname, SITE_ALTS[site_key]) + params