From 027230baaf4167c5d470859201641a9ff4eb894f Mon Sep 17 00:00:00 2001 From: Logan Kilpatrick <23kilpatrick23@gmail.com> Date: Mon, 6 Feb 2023 11:28:31 -0600 Subject: [PATCH] Update web-qa.py --- solutions/web_crawl_Q&A/web-qa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solutions/web_crawl_Q&A/web-qa.py b/solutions/web_crawl_Q&A/web-qa.py index 45c823c1..b4c05c01 100644 --- a/solutions/web_crawl_Q&A/web-qa.py +++ b/solutions/web_crawl_Q&A/web-qa.py @@ -135,7 +135,7 @@ def crawl(url): print(url) # for debugging and to see the progress # Save text from the url to a .txt file - with open('text/'+local_domain+'/'+url[8:].replace("/", "_") + ".txt", "w", encoding="utf-8") as f: + with open('text/'+local_domain+'/'+url[8:].replace("/", "_") + ".txt", "w", encoding="UTF-8") as f: # Get the text from the URL using BeautifulSoup soup = BeautifulSoup(requests.get(url).text, "html.parser") @@ -181,7 +181,7 @@ texts=[] for file in os.listdir("text/" + domain + "/"): # Open the file and read the text - with open("text/" + domain + "/" + file, "r", encoding="utf-8") as f: + with open("text/" + domain + "/" + file, "r", encoding="UTF-8") as f: text = f.read() # Omit the first 11 lines and the last 4 lines, then replace -, _, and #update with spaces.