fix issue #8357 by making json backtick regex greedy (#8528)

- Description: Markdown code blocks in json response should not break
the parser
  - Issue: #8357

@baskaryan @hinthornw
This commit is contained in:
Bruno Bornsztein 2023-07-31 18:36:57 -05:00 committed by GitHub
parent 64d0a0fcc0
commit 5a490a79f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 1 deletions

View File

@ -19,7 +19,7 @@ def parse_json_markdown(json_string: str) -> dict:
The parsed JSON object as a Python dictionary. The parsed JSON object as a Python dictionary.
""" """
# Try to find JSON string within triple backticks # Try to find JSON string within triple backticks
match = re.search(r"```(json)?(.*?)```", json_string, re.DOTALL) match = re.search(r"```(json)?(.*)```", json_string, re.DOTALL)
# If no match found, assume the entire string is a JSON string # If no match found, assume the entire string is a JSON string
if match is None: if match is None:

View File

@ -54,6 +54,12 @@ TICKS_WITH_NEW_LINES_EVERYWHERE = """
""" """
JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
{
"foo": "```bar```"
}
```"""
NO_TICKS = """{ NO_TICKS = """{
"foo": "bar" "foo": "bar"
}""" }"""
@ -106,3 +112,8 @@ TEST_CASES = [
def test_parse_json(json_string: str) -> None: def test_parse_json(json_string: str) -> None:
parsed = parse_json_markdown(json_string) parsed = parse_json_markdown(json_string)
assert parsed == {"foo": "bar"} assert parsed == {"foo": "bar"}
def test_parse_json_with_code_block() -> None:
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
assert parsed == {"foo": "```bar```"}