You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/langchain/output_parsers/json.py

42 lines
1.2 KiB
Python

from __future__ import annotations
import json
import re
from typing import List
from langchain.schema import OutputParserException
def parse_json_markdown(json_string: str) -> dict:
# Try to find JSON string within triple backticks
match = re.search(r"```(json)?(.*?)```", json_string, re.DOTALL)
# If no match found, assume the entire string is a JSON string
if match is None:
json_str = json_string
else:
# If match found, use the content within the backticks
json_str = match.group(2)
# Strip whitespace and newlines from the start and end
json_str = json_str.strip()
# Parse the JSON string into a Python dictionary
parsed = json.loads(json_str)
return parsed
def parse_and_check_json_markdown(text: str, expected_keys: List[str]) -> dict:
try:
json_obj = parse_json_markdown(text)
except json.JSONDecodeError as e:
raise OutputParserException(f"Got invalid JSON object. Error: {e}")
for key in expected_keys:
if key not in json_obj:
raise OutputParserException(
f"Got invalid return object. Expected key `{key}` "
f"to be present, but got {json_obj}"
)
return json_obj