From fab65d720d2f192a03be41c69695fc1657fa8229 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 4 Feb 2023 16:17:12 -0700 Subject: [PATCH 1/2] Update public instance info [skip ci] Closes #948 --- README.md | 2 +- misc/instances.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 87deba2..f7bb14e 100644 --- a/README.md +++ b/README.md @@ -625,7 +625,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching | [https://gowogle.voring.me](https://gowogle.voring.me) | πŸ‡ΊπŸ‡Έ US | Multi-choice | | | [https://whoogle.privacydev.net](https://whoogle.privacydev.net) | πŸ‡ΊπŸ‡Έ US | English | | | [https://wg.vern.cc](https://wg.vern.cc) | πŸ‡ΊπŸ‡Έ US | English | | -| [https://www.indexia.gq](https://www.indexia.gq) | πŸ‡¨πŸ‡¦ CA | Multi-choice | βœ… | +| [https://whoogle.hxvy0.gq](https://whoogle.hxvy0.gq) | πŸ‡¨πŸ‡¦ CA | Turkish Only | βœ… | | [https://whoogle.hostux.net](https://whoogle.hostux.net) |Β πŸ‡«πŸ‡· FR | Multi-choice | | | [https://whoogle.lunar.icu](https://whoogle.lunar.icu) | πŸ‡©πŸ‡ͺ DE | Multi-choice | βœ… | | [https://whoogle.rhyshl.live](https://whoogle.rhyshl.live) | πŸ‡¬πŸ‡§ GB | Multi-choice | βœ… | diff --git a/misc/instances.txt b/misc/instances.txt index 9f471ae..dc045a8 100644 --- a/misc/instances.txt +++ b/misc/instances.txt @@ -11,7 +11,7 @@ https://gowogle.voring.me https://whoogle.privacydev.net https://whoogle.hostux.net https://wg.vern.cc -https://www.indexia.gq +https://whoogle.hxvy0.gq https://whoogle.ungovernable.men https://whoogle2.ungovernable.men https://whoogle3.ungovernable.men From 991fe6d910f0d191f7e0aca3af8724b55f074fb9 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 4 Feb 2023 16:36:16 -0700 Subject: [PATCH 2/2] Exclude subdomain in Medium->Scribe redirects Medium redirects needed further cleanup to account for instances where a link contains a subdomain that would not make sense in a Farside redirect link. Fixes #947 --- app/filter.py | 9 ++++----- app/utils/results.py | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/app/filter.py b/app/filter.py index 1fee1dc..87b8404 100644 --- a/app/filter.py +++ b/app/filter.py @@ -459,7 +459,7 @@ class Filter: if any(url in link_netloc for url in unsupported_g_pages): # FIXME: The "Shopping" tab requires further filtering (see #136) # Temporarily removing all links to that tab for now. - + # Replaces the /url google unsupported link to the direct url link['href'] = link_netloc parent = link.parent @@ -588,10 +588,9 @@ class Filter: # replaced (i.e. 'philomedium.com' should stay as it is). if 'medium.com' in link_str: if link_str.startswith('medium.com') or '.medium.com' in link_str: - new_desc.string = link_str.replace( - 'medium.com', 'farside.link/scribe') - else: - new_desc.string = link_str + link_str = 'farside.link/scribe' + link_str[ + link_str.find('medium.com') + len('medium.com'):] + new_desc.string = link_str else: new_desc.string = link_str.replace(site, alt) diff --git a/app/utils/results.py b/app/utils/results.py index 640a93b..4ae8c9f 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -186,6 +186,8 @@ def get_site_alt(link: str) -> str: if 'wikipedia' in hostname and len(subdomain) == 2: hostname = f'{subdomain}.{hostname}' params = f'?lang={subdomain}' + elif 'medium' in hostname and len(subdomain) > 0: + hostname = f'{subdomain}.{hostname}' parsed_alt = urlparse.urlparse(SITE_ALTS[site_key]) link = link.replace(hostname, SITE_ALTS[site_key]) + params