From 5a80ef2571305af61b5d789011be7041c83983f9 Mon Sep 17 00:00:00 2001 From: fabiofranco85 Date: Mon, 27 Mar 2023 07:38:35 -0300 Subject: [PATCH] Improve regex --- apps/web-crawl-q-and-a/web-qa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/web-crawl-q-and-a/web-qa.py b/apps/web-crawl-q-and-a/web-qa.py index ccc34fdb..0feb3825 100644 --- a/apps/web-crawl-q-and-a/web-qa.py +++ b/apps/web-crawl-q-and-a/web-qa.py @@ -17,7 +17,7 @@ import numpy as np from openai.embeddings_utils import distances_from_embeddings, cosine_similarity # Regex pattern to match a URL -HTTP_URL_PATTERN = r'^http[s]*://.+' +HTTP_URL_PATTERN = r'^http[s]{0,1}://.+$' # Define root domain to crawl domain = "openai.com"