mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
fix (parsers/json): do not escape double quotes if already escaped (#9916)
This PR fixes an issues I found when upgrading to a more recent version of Langchain. I was using 0.0.142 before, and this issue popped up already when the `_custom_parser` was added to `output_parsers/json`. Anyway, the issue is that the parser tries to escape quotes when they are double-escaped (e.g. `\\"`), leading to OutputParserException. This is particularly undesired in my app, because I have an Agent that uses a single input Tool, which expects as input a JSON string with the structure: ```python { "foo": string, "bar": string } ``` The LLM (GPT3.5) response is (almost) always something like `"action_input": "{\\"foo\\": \\"bar\\", \\"bar\\": \\"foo\\"}"` and since the upgrade this is not correctly parsed. --------- Co-authored-by: taamedag <Davide.Menini@swisscom.com>
This commit is contained in:
parent
ad9e242a7a
commit
3f8f3de28e
@ -13,7 +13,7 @@ def _replace_new_line(match: re.Match[str]) -> str:
|
||||
value = re.sub(r"\n", r"\\n", value)
|
||||
value = re.sub(r"\r", r"\\r", value)
|
||||
value = re.sub(r"\t", r"\\t", value)
|
||||
value = re.sub('"', r"\"", value)
|
||||
value = re.sub(r'(?<!\\)"', r"\"", value)
|
||||
|
||||
return match.group(1) + value + match.group(3)
|
||||
|
||||
|
@ -67,6 +67,34 @@ JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{"foo": "bar", "bar": "foo"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_PYTHON_DICT = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": {"foo": "bar", "bar": "foo"}
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{\\"foo\\": \\"bar\\", \\"bar\\": \\"foo\\"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
NO_TICKS = """{
|
||||
"foo": "bar"
|
||||
}"""
|
||||
@ -131,3 +159,27 @@ def test_parse_json_with_code_blocks() -> None:
|
||||
"action": "Final Answer",
|
||||
"action_input": '```bar\n<div id="1" class="value">\n\ttext\n</div>```',
|
||||
}
|
||||
|
||||
|
||||
TEST_CASES_ESCAPED_QUOTES = [
|
||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
|
||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
|
||||
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("json_string", TEST_CASES_ESCAPED_QUOTES)
|
||||
def test_parse_nested_json_with_escaped_quotes(json_string: str) -> None:
|
||||
parsed = parse_json_markdown(json_string)
|
||||
assert parsed == {
|
||||
"action": "Final Answer",
|
||||
"action_input": '{"foo": "bar", "bar": "foo"}',
|
||||
}
|
||||
|
||||
|
||||
def test_parse_json_with_python_dict() -> None:
|
||||
parsed = parse_json_markdown(JSON_WITH_PYTHON_DICT)
|
||||
assert parsed == {
|
||||
"action": "Final Answer",
|
||||
"action_input": {"foo": "bar", "bar": "foo"},
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user