handle json parsing errors (#5371)

adds tests cases, consolidates a lot of PRs
searx_updates
Harrison Chase 1 year ago committed by GitHub
parent 99a1e3f3a3
commit 6df90ad9fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,20 +1,28 @@
from __future__ import annotations from __future__ import annotations
import json import json
import re
from typing import List from typing import List
from langchain.schema import OutputParserException from langchain.schema import OutputParserException
def parse_json_markdown(json_string: str) -> dict: def parse_json_markdown(json_string: str) -> dict:
# Remove the triple backticks if present # Try to find JSON string within triple backticks
json_string = json_string.replace("```json", "").replace("```", "") match = re.search(r"```(json)?(.*?)```", json_string, re.DOTALL)
# If no match found, assume the entire string is a JSON string
if match is None:
json_str = json_string
else:
# If match found, use the content within the backticks
json_str = match.group(2)
# Strip whitespace and newlines from the start and end # Strip whitespace and newlines from the start and end
json_string = json_string.strip() json_str = json_str.strip()
# Parse the JSON string into a Python dictionary # Parse the JSON string into a Python dictionary
parsed = json.loads(json_string) parsed = json.loads(json_str)
return parsed return parsed

@ -64,6 +64,31 @@ NO_TICKS_WHITE_SPACE = """
} }
""" """
TEXT_BEFORE = """Thought: I need to use the search tool
Action:
```
{
"foo": "bar"
}
```"""
TEXT_AFTER = """```
{
"foo": "bar"
}
```
This should do the trick"""
TEXT_BEFORE_AND_AFTER = """Action: Testing
```
{
"foo": "bar"
}
```
This should do the trick"""
TEST_CASES = [ TEST_CASES = [
GOOD_JSON, GOOD_JSON,
JSON_WITH_NEW_LINES, JSON_WITH_NEW_LINES,
@ -72,6 +97,8 @@ TEST_CASES = [
TICKS_WITH_NEW_LINES_EVERYWHERE, TICKS_WITH_NEW_LINES_EVERYWHERE,
NO_TICKS, NO_TICKS,
NO_TICKS_WHITE_SPACE, NO_TICKS_WHITE_SPACE,
TEXT_BEFORE,
TEXT_AFTER,
] ]

Loading…
Cancel
Save