From 58b63dfb0ebe7613cfd714f35344c3a1c29fb5eb Mon Sep 17 00:00:00 2001 From: Sung Kim Date: Sun, 19 Feb 2023 11:00:27 +0900 Subject: [PATCH] Add handling for last chunk in split_into_sentences function I have added handling for the last chunk in the split_into_sentences function. Previously, the function did not account for the last chunk, which could lead to incomplete sentences in the output. To solve this, I added a conditional statement to check if the last chunk is non-empty. If it is, I append it to the list of chunks with a period to ensure the last sentence is complete. This change improves the accuracy of the split_into_sentences function and ensures that all sentences in the input text are properly segmented. Please review and let me know if you have any feedback or concerns. --- apps/web-crawl-q-and-a/web-qa.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/web-crawl-q-and-a/web-qa.py b/apps/web-crawl-q-and-a/web-qa.py index 4a2224a3..a5a86dd5 100644 --- a/apps/web-crawl-q-and-a/web-qa.py +++ b/apps/web-crawl-q-and-a/web-qa.py @@ -248,6 +248,10 @@ def split_into_many(text, max_tokens = max_tokens): # Otherwise, add the sentence to the chunk and add the number of tokens to the total chunk.append(sentence) tokens_so_far += token + 1 + + # Add the last chunk to the list of chunks + if chunk: + chunks.append(". ".join(chunk) + ".") return chunks