core: update json output parser (#15079)

- **Description:** changed json.py to handle additional cases of partial
json string to be parsed, basically by dropping the last character in
the string until a valid json string is found or the string is empty.
Also added additional test cases.
  
- **Issue:** function parse_partial_json could not parse cases where the
key is present but the value is not.

---------

Co-authored-by: Nuno Campos <nuno@langchain.dev>
This commit is contained in:
Antonio Pisani 2024-01-02 19:34:43 -05:00 committed by GitHub
parent eecfa81918
commit d4a98e4e04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 5 deletions

View File

@ -102,12 +102,27 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
if is_inside_string:
new_s += '"'
# Close any remaining open structures in the reverse order that they were opened.
for closing_char in reversed(stack):
new_s += closing_char
# Try to parse mods of string until we succeed or run out of characters.
while new_s:
final_s = new_s
# Attempt to parse the modified string as JSON.
return json.loads(new_s, strict=strict)
# Close any remaining open structures in the reverse
# order that they were opened.
for closing_char in reversed(stack):
final_s += closing_char
# Attempt to parse the modified string as JSON.
try:
return json.loads(final_s, strict=strict)
except json.JSONDecodeError:
# If we still can't parse the string as JSON,
# try removing the last character
new_s = new_s[:-1]
# If we got here, we ran out of characters to remove
# and still couldn't parse the string as JSON, so return the parse error
# for the original string.
return json.loads(s, strict=strict)
def parse_json_markdown(

View File

@ -199,6 +199,9 @@ TEST_CASES_PARTIAL = [
('{"foo": "bar", "bar": "foo}', '{"foo": "bar", "bar": "foo}"}'),
('{"foo": "bar", "bar": "foo[', '{"foo": "bar", "bar": "foo["}'),
('{"foo": "bar", "bar": "foo\\"', '{"foo": "bar", "bar": "foo\\""}'),
('{"foo": "bar", "bar":', '{"foo": "bar"}'),
('{"foo": "bar", "bar"', '{"foo": "bar"}'),
('{"foo": "bar", ', '{"foo": "bar"}'),
]