Merge pull request #289 from fabiofranco85/fabiofranco85-patch-1

Improve regex in web crawler
This commit is contained in:
Ted Sanders 2023-03-28 09:50:37 -07:00 committed by GitHub
commit 14f35297da

View File

@ -17,7 +17,7 @@ import numpy as np
from openai.embeddings_utils import distances_from_embeddings, cosine_similarity from openai.embeddings_utils import distances_from_embeddings, cosine_similarity
# Regex pattern to match a URL # Regex pattern to match a URL
HTTP_URL_PATTERN = r'^http[s]*://.+' HTTP_URL_PATTERN = r'^http[s]{0,1}://.+$'
# Define root domain to crawl # Define root domain to crawl
domain = "openai.com" domain = "openai.com"