langchain[patch]: make BooleanOutputParser check words not substrings (#20064)

- **Description**: fixes BooleanOutputParser detecting sub-words ("NOW this is likely (YES)" -> `True`, not `AmbiguousError`) - **Issue(s)**: fixes #11408 (follow-up to #17810) - **Dependencies**: None - **GitHub handle**: @casperdcl  - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
6 months ago · b972f394c8
parent add31f46d0
commit b972f394c8
2 changed files with 38 additions and 29 deletions
--- a/libs/langchain/langchain/output_parsers/boolean.py
+++ b/libs/langchain/langchain/output_parsers/boolean.py
@ -1,3 +1,5 @@
 import re
 from langchain_core.output_parsers import BaseOutputParser
@ -17,26 +19,31 @@ class BooleanOutputParser(BaseOutputParser[bool]):
        Returns:
            boolean
        """
-        cleaned_upper_text = text.strip().upper()
+        regexp = rf"\b({self.true_val}|{self.false_val})\b"
-        if (
+
-            self.true_val.upper() in cleaned_upper_text
+        truthy = {
-            and self.false_val.upper() in cleaned_upper_text
+            val.upper()
-        ):
+            for val in re.findall(regexp, text, flags=re.IGNORECASE | re.MULTILINE)
-            raise ValueError(
+        }
-                f"Ambiguous response. Both {self.true_val} and {self.false_val} in "
+        if self.true_val.upper() in truthy:
-                f"received: {text}."
+            if self.false_val.upper() in truthy:
-            )
+                raise ValueError(
-        elif self.true_val.upper() in cleaned_upper_text:
+                    f"Ambiguous response. Both {self.true_val} and {self.false_val} "
                    f"in received: {text}."
                )
            return True
-        elif self.false_val.upper() in cleaned_upper_text:
+        elif self.false_val.upper() in truthy:
            if self.true_val.upper() in truthy:
                raise ValueError(
                    f"Ambiguous response. Both {self.true_val} and {self.false_val} "
                    f"in received: {text}."
                )
            return False
-        else:
+        raise ValueError(
-            raise ValueError(
+            f"BooleanOutputParser expected output value to include either "
-                f"BooleanOutputParser expected output value to include either "
+            f"{self.true_val} or {self.false_val}. Received {text}."
-                f"{self.true_val} or {self.false_val}. Received {text}."
+        )
            )
    @property
    def _type(self) -> str:
--- a/libs/langchain/tests/unit_tests/output_parsers/test_boolean_parser.py
+++ b/libs/langchain/tests/unit_tests/output_parsers/test_boolean_parser.py
@ -1,3 +1,5 @@
 import pytest
 from langchain.output_parsers.boolean import BooleanOutputParser
@ -24,16 +26,16 @@ def test_boolean_output_parser_parse() -> None:
    result = parser.parse("Not relevant (NO)")
    assert result is False
    # Test valid input
    result = parser.parse("NOW this is relevant (YES)")
    assert result is True
    # Test ambiguous input
-    try:
+    with pytest.raises(ValueError):
-        parser.parse("yes and no")
+        parser.parse("YES NO")
-        assert False, "Should have raised ValueError"
+
-    except ValueError:
+    with pytest.raises(ValueError):
-        pass
+        parser.parse("NO YES")
-
+    # Bad input
-    # Test invalid input
+    with pytest.raises(ValueError):
-    try:
+        parser.parse("BOOM")
        parser.parse("INVALID")
        assert False, "Should have raised ValueError"
    except ValueError:
        pass