mirror of https://github.com/hwchase17/langchain
parent
2b2176a3c1
commit
94cf391ef1
@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
from langchain.schema import OutputParserException
|
||||
|
||||
|
||||
def parse_json_markdown(json_string: str) -> dict:
|
||||
# Remove the triple backticks if present
|
||||
json_string = json_string.replace("```json", "").replace("```", "")
|
||||
|
||||
# Strip whitespace and newlines from the start and end
|
||||
json_string = json_string.strip()
|
||||
|
||||
# Parse the JSON string into a Python dictionary
|
||||
parsed = json.loads(json_string)
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def parse_and_check_json_markdown(text: str, expected_keys: List[str]) -> dict:
|
||||
try:
|
||||
json_obj = parse_json_markdown(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise OutputParserException(f"Got invalid JSON object. Error: {e}")
|
||||
for key in expected_keys:
|
||||
if key not in json_obj:
|
||||
raise OutputParserException(
|
||||
f"Got invalid return object. Expected key `{key}` "
|
||||
f"to be present, but got {json_obj}"
|
||||
)
|
||||
return json_obj
|
@ -0,0 +1,81 @@
|
||||
import pytest
|
||||
|
||||
from langchain.output_parsers.json import parse_json_markdown
|
||||
|
||||
GOOD_JSON = """```json
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_NEW_LINES = """
|
||||
|
||||
```json
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
JSON_WITH_NEW_LINES_INSIDE = """```json
|
||||
{
|
||||
|
||||
"foo": "bar"
|
||||
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_NEW_LINES_EVERYWHERE = """
|
||||
|
||||
```json
|
||||
|
||||
{
|
||||
|
||||
"foo": "bar"
|
||||
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
TICKS_WITH_NEW_LINES_EVERYWHERE = """
|
||||
|
||||
```
|
||||
|
||||
{
|
||||
|
||||
"foo": "bar"
|
||||
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
NO_TICKS = """{
|
||||
"foo": "bar"
|
||||
}"""
|
||||
|
||||
NO_TICKS_WHITE_SPACE = """
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
"""
|
||||
|
||||
TEST_CASES = [
|
||||
GOOD_JSON,
|
||||
JSON_WITH_NEW_LINES,
|
||||
JSON_WITH_NEW_LINES_INSIDE,
|
||||
JSON_WITH_NEW_LINES_EVERYWHERE,
|
||||
TICKS_WITH_NEW_LINES_EVERYWHERE,
|
||||
NO_TICKS,
|
||||
NO_TICKS_WHITE_SPACE,
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("json_string", TEST_CASES)
|
||||
def test_parse_json(json_string: str) -> None:
|
||||
parsed = parse_json_markdown(json_string)
|
||||
assert parsed == {"foo": "bar"}
|
Loading…
Reference in New Issue