fix regex pattern of structured output parser (#12929)

- **Description:** fix the regex pattern of [StructuredChatOutputParser](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/structured_chat/output_parser.py#L18) and add unit tests for the code change. - **Issue:** #12158 #12922 - **Dependencies:** None - **Tag maintainer:** - **Twitter handle:** @hmdev3 - **NOTE:** This PR conflicts #7495 . After #7495 is merged, I am going to update PR.
11 months ago · 622bf12c2e
parent 8c02f4fbd8
commit 622bf12c2e
2 changed files with 61 additions and 3 deletions
--- a/libs/langchain/langchain/agents/structured_chat/output_parser.py
+++ b/libs/langchain/langchain/agents/structured_chat/output_parser.py
@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
 class StructuredChatOutputParser(AgentOutputParser):
    """Output parser for the structured chat agent."""

-    pattern = re.compile(r"```(?:json)?\n(.*?)```", re.DOTALL)
+    pattern = re.compile(r"```(?:json\s+)?(\W.*?)```", re.DOTALL)

    def get_format_instructions(self) -> str:
        return FORMAT_INSTRUCTIONS
--- a/libs/langchain/tests/unit_tests/agents/test_structured_chat.py
+++ b/libs/langchain/tests/unit_tests/agents/test_structured_chat.py
@ -2,7 +2,7 @@
 from typing import Tuple

 from langchain.agents.structured_chat.output_parser import StructuredChatOutputParser
-from langchain.schema import AgentAction
+from langchain.schema import AgentAction, AgentFinish

 output_parser = StructuredChatOutputParser()

@ -11,8 +11,10 @@ def get_action_and_input(text: str) -> Tuple[str, str]:
    output = output_parser.parse(text)
    if isinstance(output, AgentAction):
        return output.tool, str(output.tool_input)
+    elif isinstance(output, AgentFinish):
+        return output.return_values["output"], output.log
    else:
-        return "Final Answer", output.return_values["output"]
+        raise ValueError("Unexpected output type")


 def test_parse_with_language() -> None:
@ -45,3 +47,59 @@ def test_parse_without_language() -> None:
    action, action_input = get_action_and_input(llm_output)
    assert action == "foo"
    assert action_input == "bar"
+
+
+def test_parse_with_language_and_spaces() -> None:
+    llm_output = """I can use the `foo` tool to achieve the goal.
+
+    Action:
+    ```json     
+
+    {
+      "action": "foo",
+      "action_input": "bar"
+    }
+    ```
+    """
+    action, action_input = get_action_and_input(llm_output)
+    assert action == "foo"
+    assert action_input == "bar"
+
+
+def test_parse_without_language_without_a_new_line() -> None:
+    llm_output = """I can use the `foo` tool to achieve the goal.
+
+    Action:
+    ```{"action": "foo", "action_input": "bar"}```
+    """
+    action, action_input = get_action_and_input(llm_output)
+    assert action == "foo"
+    assert action_input == "bar"
+
+
+def test_parse_with_language_without_a_new_line() -> None:
+    llm_output = """I can use the `foo` tool to achieve the goal.
+
+    Action:
+    ```json{"action": "foo", "action_input": "bar"}```
+    """
+    # TODO: How should this be handled?
+    output, log = get_action_and_input(llm_output)
+    assert output == llm_output
+    assert log == llm_output
+
+
+def test_parse_case_matched_and_final_answer() -> None:
+    llm_output = """I can use the `foo` tool to achieve the goal.
+
+    Action:
+    ```json
+    {
+      "action": "Final Answer",
+      "action_input": "This is the final answer"
+    }
+    ```
+    """
+    output, log = get_action_and_input(llm_output)
+    assert output == "This is the final answer"
+    assert log == llm_output