fix (parsers/json): do not escape double quotes if already escaped (#9916)

This PR fixes an issues I found when upgrading to a more recent version
of Langchain. I was using 0.0.142 before, and this issue popped up
already when the `_custom_parser` was added to `output_parsers/json`.

Anyway, the issue is that the parser tries to escape quotes when they
are double-escaped (e.g. `\\"`), leading to OutputParserException.
This is particularly undesired in my app, because I have an Agent that
uses a single input Tool, which expects as input a JSON string with the
structure:
```python
{
    "foo": string,
    "bar": string
}
```
The LLM (GPT3.5) response is (almost) always something like
`"action_input": "{\\"foo\\": \\"bar\\", \\"bar\\": \\"foo\\"}"` and
since the upgrade this is not correctly parsed.

---------

Co-authored-by: taamedag <Davide.Menini@swisscom.com>
This commit is contained in:
Davide Menini 2023-09-01 02:11:52 +02:00 committed by GitHub
parent ad9e242a7a
commit 3f8f3de28e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 1 deletions

View File

@ -13,7 +13,7 @@ def _replace_new_line(match: re.Match[str]) -> str:
value = re.sub(r"\n", r"\\n", value)
value = re.sub(r"\r", r"\\r", value)
value = re.sub(r"\t", r"\\t", value)
value = re.sub('"', r"\"", value)
value = re.sub(r'(?<!\\)"', r"\"", value)
return match.group(1) + value + match.group(3)

View File

@ -67,6 +67,34 @@ JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
}
```"""
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
{
"action": "Final Answer",
"action_input": "{"foo": "bar", "bar": "foo"}"
}
```"""
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
{
"action": "Final Answer",
"action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
}
```"""
JSON_WITH_PYTHON_DICT = """```json
{
"action": "Final Answer",
"action_input": {"foo": "bar", "bar": "foo"}
}
```"""
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON = """```json
{
"action": "Final Answer",
"action_input": "{\\"foo\\": \\"bar\\", \\"bar\\": \\"foo\\"}"
}
```"""
NO_TICKS = """{
"foo": "bar"
}"""
@ -131,3 +159,27 @@ def test_parse_json_with_code_blocks() -> None:
"action": "Final Answer",
"action_input": '```bar\n<div id="1" class="value">\n\ttext\n</div>```',
}
TEST_CASES_ESCAPED_QUOTES = [
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
]
@pytest.mark.parametrize("json_string", TEST_CASES_ESCAPED_QUOTES)
def test_parse_nested_json_with_escaped_quotes(json_string: str) -> None:
parsed = parse_json_markdown(json_string)
assert parsed == {
"action": "Final Answer",
"action_input": '{"foo": "bar", "bar": "foo"}',
}
def test_parse_json_with_python_dict() -> None:
parsed = parse_json_markdown(JSON_WITH_PYTHON_DICT)
assert parsed == {
"action": "Final Answer",
"action_input": {"foo": "bar", "bar": "foo"},
}