Implement diff

pull/11222/head
Nuno Campos 1 year ago
parent 6c0a6b70e0
commit 5cbe2b7b6a

@ -3,7 +3,9 @@ from __future__ import annotations
import json
import re
from json import JSONDecodeError
from typing import Any, List
from typing import Any, List, Optional
import jsonpatch
from langchain.schema import BaseOutputParser, OutputParserException
from langchain.schema.output import ChatGeneration, Generation
@ -42,7 +44,7 @@ def _custom_parser(multiline_string: str) -> str:
# Adapted from https://github.com/KillianLucas/open-interpreter/blob/main/interpreter/utils/parse_partial_json.py
# MIT License
def parse_partial_json(s):
def parse_partial_json(s: str) -> Any:
# Attempt to parse the string as-is.
try:
return json.loads(s)
@ -84,7 +86,8 @@ def parse_partial_json(s):
# Append the processed character to the new string.
new_s += char
# If we're still inside a string at the end of processing, we need to close the string.
# If we're still inside a string at the end of processing,
# we need to close the string.
if is_inside_string:
new_s += '"'
@ -197,6 +200,9 @@ class PartialFunctionsJsonOutputParser(BaseCumulativeTransformOutputParser[Any])
except KeyError:
return None
def _diff(self, prev: Optional[Any], next: Any) -> Any:
return jsonpatch.make_patch(prev, next).patch
def parse(self, text: str) -> Any:
pass
@ -206,5 +212,8 @@ class PartialJsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
def _type(self) -> str:
return "partial_functions_json"
def _diff(self, prev: Optional[Any], next: Any) -> Any:
return jsonpatch.make_patch(prev, next).patch
def parse(self, text: str) -> Any:
return parse_json_markdown(text)

@ -1,8 +1,15 @@
import json
from typing import Iterator, Tuple
from typing import Any, Iterator, Tuple
import pytest
from langchain.output_parsers.json import parse_json_markdown, parse_partial_json
from langchain.output_parsers.json import (
PartialFunctionsJsonOutputParser,
PartialJsonOutputParser,
parse_json_markdown,
parse_partial_json,
)
from langchain.schema.messages import AIMessageChunk
GOOD_JSON = """```json
{
@ -206,7 +213,6 @@ def test_parse_partial_json(json_strings: Tuple[str, str]) -> None:
STREAMED_TOKENS = """
{
"
setup
":
@ -215,36 +221,50 @@ Why
did
the
bears
go
on
start
a
picnic
?",
band
called
Bears
Bears
Bears
?
"
,
"
p
unch
line
punchline
":
"
Because
they
wanted
to
have
a
play
bear
-y
-y
good
time
!"
music
!
"
,
"
audience
":
[
"
Haha
"
,
"
So
funny
"
]
}
""".splitlines()
EXPECTED_STREAMED_JSON = [
{},
{},
{"setup": ""},
{"setup": "Why"},
@ -258,62 +278,217 @@ EXPECTED_STREAMED_JSON = [
{"setup": "Why did the bears start a band called Bears"},
{"setup": "Why did the bears start a band called Bears Bears"},
{"setup": "Why did the bears start a band called Bears Bears Bears"},
{"setup": "Why did the bears start a band called Bears Bears Bears ?"},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they wanted",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they wanted to",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they wanted to play",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they wanted to play bear",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"punchline": "Because they wanted to play bear-y",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they wanted to play bear -y",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"punchline": "Because they wanted to play bear-y good",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they wanted to play bear -y good",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"punchline": "Because they wanted to play bear-y good music",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they wanted to play bear -y good music",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"punchline": "Because they wanted to play bear-y good music!",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"punchline": "Because they wanted to play bear -y good music !",
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"punchline": "Because they wanted to play bear-y good music!",
"punchline": "Because they wanted to play bear -y good music !",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"audience": [],
},
{
"setup": "Why did the bears start a band called Bears Bears Bears?",
"punchline": "Because they wanted to play bear-y good music!",
"punchline": "Because they wanted to play bear -y good music !",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"audience": [""],
},
{
"punchline": "Because they wanted to play bear -y good music !",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"audience": ["Haha"],
},
{
"punchline": "Because they wanted to play bear -y good music !",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"audience": ["Haha", ""],
},
{
"punchline": "Because they wanted to play bear -y good music !",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"audience": ["Haha", "So"],
},
{
"punchline": "Because they wanted to play bear -y good music !",
"setup": "Why did the bears start a band called Bears Bears Bears ?",
"audience": ["Haha", "So funny"],
},
]
EXPECTED_STREAMED_JSON_DIFF = [
[{"op": "replace", "path": "", "value": {}}],
[{"op": "add", "path": "/setup", "value": ""}],
[{"op": "replace", "path": "/setup", "value": "Why"}],
[{"op": "replace", "path": "/setup", "value": "Why did"}],
[{"op": "replace", "path": "/setup", "value": "Why did the"}],
[{"op": "replace", "path": "/setup", "value": "Why did the bears"}],
[{"op": "replace", "path": "/setup", "value": "Why did the bears start"}],
[{"op": "replace", "path": "/setup", "value": "Why did the bears start a"}],
[{"op": "replace", "path": "/setup", "value": "Why did the bears start a band"}],
[
{
"op": "replace",
"path": "/setup",
"value": "Why did the bears start a band called",
}
],
[
{
"op": "replace",
"path": "/setup",
"value": "Why did the bears start a band called Bears",
}
],
[
{
"op": "replace",
"path": "/setup",
"value": "Why did the bears start a band called Bears Bears",
}
],
[
{
"op": "replace",
"path": "/setup",
"value": "Why did the bears start a band called Bears Bears Bears",
}
],
[
{
"op": "replace",
"path": "/setup",
"value": "Why did the bears start a band called Bears Bears Bears ?",
}
],
[{"op": "add", "path": "/punchline", "value": ""}],
[{"op": "replace", "path": "/punchline", "value": "Because"}],
[{"op": "replace", "path": "/punchline", "value": "Because they"}],
[{"op": "replace", "path": "/punchline", "value": "Because they wanted"}],
[{"op": "replace", "path": "/punchline", "value": "Because they wanted to"}],
[{"op": "replace", "path": "/punchline", "value": "Because they wanted to play"}],
[
{
"op": "replace",
"path": "/punchline",
"value": "Because they wanted to play bear",
}
],
[
{
"op": "replace",
"path": "/punchline",
"value": "Because they wanted to play bear -y",
}
],
[
{
"op": "replace",
"path": "/punchline",
"value": "Because they wanted to play bear -y good",
}
],
[
{
"op": "replace",
"path": "/punchline",
"value": "Because they wanted to play bear -y good music",
}
],
[
{
"op": "replace",
"path": "/punchline",
"value": "Because they wanted to play bear -y good music !",
}
],
[{"op": "add", "path": "/audience", "value": []}],
[{"op": "add", "path": "/audience/0", "value": ""}],
[{"op": "replace", "path": "/audience/0", "value": "Haha"}],
[{"op": "add", "path": "/audience/1", "value": ""}],
[{"op": "replace", "path": "/audience/1", "value": "So"}],
[{"op": "replace", "path": "/audience/1", "value": "So funny"}],
]
def test_partial_text_json_output_parser() -> None:
def input_iter() -> Iterator[str]:
def input_iter(_: Any) -> Iterator[str]:
for token in STREAMED_TOKENS:
yield token
chain = input_iter | PartialJsonOutputParser()
assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON
def test_partial_functions_json_output_parser() -> None:
def input_iter(_: Any) -> Iterator[AIMessageChunk]:
for token in STREAMED_TOKENS:
yield AIMessageChunk(
content="", additional_kwargs={"function_call": {"arguments": token}}
)
chain = input_iter | PartialFunctionsJsonOutputParser()
assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON
def test_partial_text_json_output_parser_diff() -> None:
def input_iter(_: Any) -> Iterator[str]:
for token in STREAMED_TOKENS:
yield token
chain = input_iter | PartialJsonOutputParser(diff=True)
assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON_DIFF
def test_partial_functions_json_output_parser_diff() -> None:
def input_iter(_: Any) -> Iterator[AIMessageChunk]:
for token in STREAMED_TOKENS:
yield AIMessageChunk(
content="", additional_kwargs={"function_call": {"arguments": token}}
)
chain = input_iter | PartialFunctionsJsonOutputParser(diff=True)
assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON_DIFF

Loading…
Cancel
Save