fix issue #8357 by making json backtick regex greedy (#8528)

- Description: Markdown code blocks in json response should not break
the parser
  - Issue: #8357

@baskaryan @hinthornw
pull/8553/head
Bruno Bornsztein 1 year ago committed by GitHub
parent 64d0a0fcc0
commit 5a490a79f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -19,7 +19,7 @@ def parse_json_markdown(json_string: str) -> dict:
The parsed JSON object as a Python dictionary.
"""
# Try to find JSON string within triple backticks
match = re.search(r"```(json)?(.*?)```", json_string, re.DOTALL)
match = re.search(r"```(json)?(.*)```", json_string, re.DOTALL)
# If no match found, assume the entire string is a JSON string
if match is None:

@ -54,6 +54,12 @@ TICKS_WITH_NEW_LINES_EVERYWHERE = """
"""
JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
{
"foo": "```bar```"
}
```"""
NO_TICKS = """{
"foo": "bar"
}"""
@ -106,3 +112,8 @@ TEST_CASES = [
def test_parse_json(json_string: str) -> None:
parsed = parse_json_markdown(json_string)
assert parsed == {"foo": "bar"}
def test_parse_json_with_code_block() -> None:
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
assert parsed == {"foo": "```bar```"}

Loading…
Cancel
Save