From 6df90ad9fd1ee6d64e112d8d58f9524ca11b0757 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 29 May 2023 06:18:19 -0700 Subject: [PATCH] handle json parsing errors (#5371) adds tests cases, consolidates a lot of PRs --- langchain/output_parsers/json.py | 16 +++++++++--- tests/unit_tests/output_parsers/test_json.py | 27 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/langchain/output_parsers/json.py b/langchain/output_parsers/json.py index e0c9ac55..f7f34b08 100644 --- a/langchain/output_parsers/json.py +++ b/langchain/output_parsers/json.py @@ -1,20 +1,28 @@ from __future__ import annotations import json +import re from typing import List from langchain.schema import OutputParserException def parse_json_markdown(json_string: str) -> dict: - # Remove the triple backticks if present - json_string = json_string.replace("```json", "").replace("```", "") + # Try to find JSON string within triple backticks + match = re.search(r"```(json)?(.*?)```", json_string, re.DOTALL) + + # If no match found, assume the entire string is a JSON string + if match is None: + json_str = json_string + else: + # If match found, use the content within the backticks + json_str = match.group(2) # Strip whitespace and newlines from the start and end - json_string = json_string.strip() + json_str = json_str.strip() # Parse the JSON string into a Python dictionary - parsed = json.loads(json_string) + parsed = json.loads(json_str) return parsed diff --git a/tests/unit_tests/output_parsers/test_json.py b/tests/unit_tests/output_parsers/test_json.py index 4055dd2d..4f83bc47 100644 --- a/tests/unit_tests/output_parsers/test_json.py +++ b/tests/unit_tests/output_parsers/test_json.py @@ -64,6 +64,31 @@ NO_TICKS_WHITE_SPACE = """ } """ +TEXT_BEFORE = """Thought: I need to use the search tool + +Action: +``` +{ + "foo": "bar" +} +```""" + +TEXT_AFTER = """``` +{ + "foo": "bar" +} +``` +This should do the trick""" + +TEXT_BEFORE_AND_AFTER = """Action: Testing + +``` +{ + "foo": "bar" +} +``` +This should do the trick""" + TEST_CASES = [ GOOD_JSON, JSON_WITH_NEW_LINES, @@ -72,6 +97,8 @@ TEST_CASES = [ TICKS_WITH_NEW_LINES_EVERYWHERE, NO_TICKS, NO_TICKS_WHITE_SPACE, + TEXT_BEFORE, + TEXT_AFTER, ]