handle json parsing errors (#5371)

adds tests cases, consolidates a lot of PRs
searx_updates
Harrison Chase 12 months ago committed by GitHub
parent 99a1e3f3a3
commit 6df90ad9fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,20 +1,28 @@
from __future__ import annotations
import json
import re
from typing import List
from langchain.schema import OutputParserException
def parse_json_markdown(json_string: str) -> dict:
# Remove the triple backticks if present
json_string = json_string.replace("```json", "").replace("```", "")
# Try to find JSON string within triple backticks
match = re.search(r"```(json)?(.*?)```", json_string, re.DOTALL)
# If no match found, assume the entire string is a JSON string
if match is None:
json_str = json_string
else:
# If match found, use the content within the backticks
json_str = match.group(2)
# Strip whitespace and newlines from the start and end
json_string = json_string.strip()
json_str = json_str.strip()
# Parse the JSON string into a Python dictionary
parsed = json.loads(json_string)
parsed = json.loads(json_str)
return parsed

@ -64,6 +64,31 @@ NO_TICKS_WHITE_SPACE = """
}
"""
TEXT_BEFORE = """Thought: I need to use the search tool
Action:
```
{
"foo": "bar"
}
```"""
TEXT_AFTER = """```
{
"foo": "bar"
}
```
This should do the trick"""
TEXT_BEFORE_AND_AFTER = """Action: Testing
```
{
"foo": "bar"
}
```
This should do the trick"""
TEST_CASES = [
GOOD_JSON,
JSON_WITH_NEW_LINES,
@ -72,6 +97,8 @@ TEST_CASES = [
TICKS_WITH_NEW_LINES_EVERYWHERE,
NO_TICKS,
NO_TICKS_WHITE_SPACE,
TEXT_BEFORE,
TEXT_AFTER,
]

Loading…
Cancel
Save