Fixed regex bug in RetrievalQAWithSources in previous update (#9898)

- Description: In my previous PR, I had modified the code to catch all
kinds of [SOURCES, sources, Source, Sources]. However, this change
included checking for a colon or a white space which should actually
have been only checking for a colon.
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
This commit is contained in:
Bagatur 2023-08-29 17:32:24 -07:00 committed by GitHub
commit ec362ecbe2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 2 deletions

View File

@ -120,9 +120,9 @@ class BaseQAWithSourcesChain(Chain, ABC):
def _split_sources(self, answer: str) -> Tuple[str, str]:
"""Split sources from answer."""
if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE):
if re.search(r"SOURCES?:", answer, re.IGNORECASE):
answer, sources = re.split(
r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE
r"SOURCES?:|QUESTION:\s", answer, flags=re.IGNORECASE
)[:2]
sources = re.split(r"\n", sources)[0].strip()
else:

View File

@ -27,6 +27,12 @@ from tests.unit_tests.llms.fake_llm import FakeLLM
"This Agreement is governed by English law.\n",
"28-pl",
),
(
"According to the sources the agreement is governed by English law.\n"
"Source: 28-pl",
"According to the sources the agreement is governed by English law.\n",
"28-pl",
),
(
"This Agreement is governed by English law.\n"
"SOURCES: 28-pl\n\n"