From 859502b16c132e6d2f02d5233233f20f78847bdb Mon Sep 17 00:00:00 2001 From: Walter Beller-Morales Date: Tue, 28 Mar 2023 18:28:20 -0400 Subject: [PATCH] Fix issue#1712: Update `BaseQAWithSourcesChain` to handle space & newline after `SOURCES:` (#2118) Fix the issue outlined in #1712 to ensure the `BaseQAWithSourcesChain` can properly separate the sources from an agent response even when they are delineated by a newline. This will ensure the `BaseQAWithSourcesChain` can reliably handle both of these agent outputs: * `"This Agreement is governed by English law.\nSOURCES: 28-pl"` -> `"This Agreement is governed by English law.\n`, `"28-pl"` * `"This Agreement is governed by English law.\nSOURCES:\n28-pl"` -> `"This Agreement is governed by English law.\n`, `"28-pl"` I couldn't find any unit tests for this but please let me know if you'd like me to add any test coverage. --- langchain/chains/qa_with_sources/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/langchain/chains/qa_with_sources/base.py b/langchain/chains/qa_with_sources/base.py index 628a7b3603..499e4e2531 100644 --- a/langchain/chains/qa_with_sources/base.py +++ b/langchain/chains/qa_with_sources/base.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional @@ -116,8 +117,8 @@ class BaseQAWithSourcesChain(Chain, BaseModel, ABC): def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]: docs = self._get_docs(inputs) answer, _ = self.combine_documents_chain.combine_docs(docs, **inputs) - if "SOURCES: " in answer: - answer, sources = answer.split("SOURCES: ") + if re.search(r"SOURCES:\s", answer): + answer, sources = re.split(r"SOURCES:\s", answer) else: sources = "" result: Dict[str, Any] = {