From ee9b6268d450bc62c275e095cce2d0d275a92cbd Mon Sep 17 00:00:00 2001 From: Alexander Khapaev Date: Fri, 7 Apr 2023 18:28:44 +0300 Subject: [PATCH] Updated the get_domain_hyperlinks function to include handling of tel: links in addition to mailto: links, to exclude them from the clean links list. --- apps/web-crawl-q-and-a/web-qa.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/web-crawl-q-and-a/web-qa.py b/apps/web-crawl-q-and-a/web-qa.py index 0feb382..2be2a60 100644 --- a/apps/web-crawl-q-and-a/web-qa.py +++ b/apps/web-crawl-q-and-a/web-qa.py @@ -87,7 +87,11 @@ def get_domain_hyperlinks(local_domain, url): else: if link.startswith("/"): link = link[1:] - elif link.startswith("#") or link.startswith("mailto:"): + elif ( + link.startswith("#") + or link.startswith("mailto:") + or link.startswith("tel:") + ): continue clean_link = "https://" + local_domain + "/" + link