From d9f1bcf366b5a66021d246d8e9c56e76fe60ead1 Mon Sep 17 00:00:00 2001 From: Donato Azevedo Date: Wed, 25 Oct 2023 17:34:58 -0300 Subject: [PATCH] Strips leading/trailing whitespace before parsing xml (#12297) **Description:** When llms output leading or trailing whitespace for xml (when using XMLOutputParser) the parser would raise a `ValueError: Could not parse output: ...`. However, leading or trailing whitespace are "ignorable" in the sense of XML standard. **Issue:** I did not find an issue related. **Dependencies:** None **Tag maintainer:** **Twitter handle:** donatoaz Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. Done, updated unit test and ran `make docker_test`. --- libs/langchain/langchain/output_parsers/xml.py | 2 ++ .../tests/unit_tests/output_parsers/test_xml_parser.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/output_parsers/xml.py b/libs/langchain/langchain/output_parsers/xml.py index 16dde71acc..8ffe4167b6 100644 --- a/libs/langchain/langchain/output_parsers/xml.py +++ b/libs/langchain/langchain/output_parsers/xml.py @@ -22,6 +22,8 @@ class XMLOutputParser(BaseOutputParser): encoding_match = self.encoding_matcher.search(text) if encoding_match: text = encoding_match.group(2) + + text = text.strip() if (text.startswith("<") or text.startswith("\n<")) and ( text.endswith(">") or text.endswith(">\n") ): diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_xml_parser.py b/libs/langchain/tests/unit_tests/output_parsers/test_xml_parser.py index c26fa57912..3830d25f8d 100644 --- a/libs/langchain/tests/unit_tests/output_parsers/test_xml_parser.py +++ b/libs/langchain/tests/unit_tests/output_parsers/test_xml_parser.py @@ -4,7 +4,7 @@ import pytest from langchain.output_parsers.xml import XMLOutputParser DEF_RESULT_ENCODING = """ - + slim.shady