From 8faf5947737b19a4ce812b9419f4a38514c91fac Mon Sep 17 00:00:00 2001 From: fabiofranco85 Date: Mon, 27 Mar 2023 07:38:35 -0300 Subject: [PATCH] Improve regex --- apps/web-crawl-q-and-a/web-qa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/web-crawl-q-and-a/web-qa.py b/apps/web-crawl-q-and-a/web-qa.py index ccc34fdb..0feb3825 100644 --- a/apps/web-crawl-q-and-a/web-qa.py +++ b/apps/web-crawl-q-and-a/web-qa.py @@ -17,7 +17,7 @@ import numpy as np from openai.embeddings_utils import distances_from_embeddings, cosine_similarity # Regex pattern to match a URL -HTTP_URL_PATTERN = r'^http[s]*://.+' +HTTP_URL_PATTERN = r'^http[s]{0,1}://.+$' # Define root domain to crawl domain = "openai.com"