From b972f394c8bd3a56dea336b74fae998ea1e5c375 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 9 Apr 2024 21:43:31 +0100 Subject: [PATCH] langchain[patch]: make BooleanOutputParser check words not substrings (#20064) - **Description**: fixes BooleanOutputParser detecting sub-words ("NOW this is likely (YES)" -> `True`, not `AmbiguousError`) - **Issue(s)**: fixes #11408 (follow-up to #17810) - **Dependencies**: None - **GitHub handle**: @casperdcl - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ --------- Co-authored-by: Eugene Yurtsev --- .../langchain/output_parsers/boolean.py | 41 +++++++++++-------- .../output_parsers/test_boolean_parser.py | 26 ++++++------ 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/libs/langchain/langchain/output_parsers/boolean.py b/libs/langchain/langchain/output_parsers/boolean.py index c9e6dc615b..5114b975a7 100644 --- a/libs/langchain/langchain/output_parsers/boolean.py +++ b/libs/langchain/langchain/output_parsers/boolean.py @@ -1,3 +1,5 @@ +import re + from langchain_core.output_parsers import BaseOutputParser @@ -17,26 +19,31 @@ class BooleanOutputParser(BaseOutputParser[bool]): Returns: boolean - """ - cleaned_upper_text = text.strip().upper() - if ( - self.true_val.upper() in cleaned_upper_text - and self.false_val.upper() in cleaned_upper_text - ): - raise ValueError( - f"Ambiguous response. Both {self.true_val} and {self.false_val} in " - f"received: {text}." - ) - elif self.true_val.upper() in cleaned_upper_text: + regexp = rf"\b({self.true_val}|{self.false_val})\b" + + truthy = { + val.upper() + for val in re.findall(regexp, text, flags=re.IGNORECASE | re.MULTILINE) + } + if self.true_val.upper() in truthy: + if self.false_val.upper() in truthy: + raise ValueError( + f"Ambiguous response. Both {self.true_val} and {self.false_val} " + f"in received: {text}." + ) return True - elif self.false_val.upper() in cleaned_upper_text: + elif self.false_val.upper() in truthy: + if self.true_val.upper() in truthy: + raise ValueError( + f"Ambiguous response. Both {self.true_val} and {self.false_val} " + f"in received: {text}." + ) return False - else: - raise ValueError( - f"BooleanOutputParser expected output value to include either " - f"{self.true_val} or {self.false_val}. Received {text}." - ) + raise ValueError( + f"BooleanOutputParser expected output value to include either " + f"{self.true_val} or {self.false_val}. Received {text}." + ) @property def _type(self) -> str: diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_boolean_parser.py b/libs/langchain/tests/unit_tests/output_parsers/test_boolean_parser.py index 60cad855be..bae5992875 100644 --- a/libs/langchain/tests/unit_tests/output_parsers/test_boolean_parser.py +++ b/libs/langchain/tests/unit_tests/output_parsers/test_boolean_parser.py @@ -1,3 +1,5 @@ +import pytest + from langchain.output_parsers.boolean import BooleanOutputParser @@ -24,16 +26,16 @@ def test_boolean_output_parser_parse() -> None: result = parser.parse("Not relevant (NO)") assert result is False + # Test valid input + result = parser.parse("NOW this is relevant (YES)") + assert result is True + # Test ambiguous input - try: - parser.parse("yes and no") - assert False, "Should have raised ValueError" - except ValueError: - pass - - # Test invalid input - try: - parser.parse("INVALID") - assert False, "Should have raised ValueError" - except ValueError: - pass + with pytest.raises(ValueError): + parser.parse("YES NO") + + with pytest.raises(ValueError): + parser.parse("NO YES") + # Bad input + with pytest.raises(ValueError): + parser.parse("BOOM")