Fix issue#1712: Update `BaseQAWithSourcesChain` to handle space & newline after `SOURCES:` (#2118)

Fix the issue outlined in #1712 to ensure the `BaseQAWithSourcesChain` can properly separate the sources from an agent response even when they are delineated by a newline. This will ensure the `BaseQAWithSourcesChain` can reliably handle both of these agent outputs: * `"This Agreement is governed by English law.\nSOURCES: 28-pl"` -> `"This Agreement is governed by English law.\n`, `"28-pl"` * `"This Agreement is governed by English law.\nSOURCES:\n28-pl"` -> `"This Agreement is governed by English law.\n`, `"28-pl"` I couldn't find any unit tests for this but please let me know if you'd like me to add any test coverage.
1 year ago · 859502b16c
parent c33e055f17
commit 859502b16c
1 changed files with 3 additions and 2 deletions
--- a/langchain/chains/qa_with_sources/base.py
+++ b/langchain/chains/qa_with_sources/base.py
@ -2,6 +2,7 @@

 from __future__ import annotations

+import re
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional

@ -116,8 +117,8 @@ class BaseQAWithSourcesChain(Chain, BaseModel, ABC):
    def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        docs = self._get_docs(inputs)
        answer, _ = self.combine_documents_chain.combine_docs(docs, **inputs)
-        if "SOURCES: " in answer:
-            answer, sources = answer.split("SOURCES: ")
+        if re.search(r"SOURCES:\s", answer):
+            answer, sources = re.split(r"SOURCES:\s", answer)
        else:
            sources = ""
        result: Dict[str, Any] = {