From 3f8f3de28e9e33cbf0889111a0362116de7a8928 Mon Sep 17 00:00:00 2001 From: Davide Menini <48685774+dmenini@users.noreply.github.com> Date: Fri, 1 Sep 2023 02:11:52 +0200 Subject: [PATCH] fix (parsers/json): do not escape double quotes if already escaped (#9916) This PR fixes an issues I found when upgrading to a more recent version of Langchain. I was using 0.0.142 before, and this issue popped up already when the `_custom_parser` was added to `output_parsers/json`. Anyway, the issue is that the parser tries to escape quotes when they are double-escaped (e.g. `\\"`), leading to OutputParserException. This is particularly undesired in my app, because I have an Agent that uses a single input Tool, which expects as input a JSON string with the structure: ```python { "foo": string, "bar": string } ``` The LLM (GPT3.5) response is (almost) always something like `"action_input": "{\\"foo\\": \\"bar\\", \\"bar\\": \\"foo\\"}"` and since the upgrade this is not correctly parsed. --------- Co-authored-by: taamedag --- .../langchain/output_parsers/json.py | 2 +- .../unit_tests/output_parsers/test_json.py | 52 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/output_parsers/json.py b/libs/langchain/langchain/output_parsers/json.py index f0f653eca2..7465aba2fe 100644 --- a/libs/langchain/langchain/output_parsers/json.py +++ b/libs/langchain/langchain/output_parsers/json.py @@ -13,7 +13,7 @@ def _replace_new_line(match: re.Match[str]) -> str: value = re.sub(r"\n", r"\\n", value) value = re.sub(r"\r", r"\\r", value) value = re.sub(r"\t", r"\\t", value) - value = re.sub('"', r"\"", value) + value = re.sub(r'(? None: "action": "Final Answer", "action_input": '```bar\n
\n\ttext\n
```', } + + +TEST_CASES_ESCAPED_QUOTES = [ + JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON, + JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON, + JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON, +] + + +@pytest.mark.parametrize("json_string", TEST_CASES_ESCAPED_QUOTES) +def test_parse_nested_json_with_escaped_quotes(json_string: str) -> None: + parsed = parse_json_markdown(json_string) + assert parsed == { + "action": "Final Answer", + "action_input": '{"foo": "bar", "bar": "foo"}', + } + + +def test_parse_json_with_python_dict() -> None: + parsed = parse_json_markdown(JSON_WITH_PYTHON_DICT) + assert parsed == { + "action": "Final Answer", + "action_input": {"foo": "bar", "bar": "foo"}, + }