From 5cbe2b7b6aef6299d0ccaadc76dcaab02bf9102f Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Fri, 29 Sep 2023 14:06:07 +0100 Subject: [PATCH] Implement diff --- .../langchain/output_parsers/json.py | 15 +- .../unit_tests/output_parsers/test_json.py | 251 +++++++++++++++--- 2 files changed, 225 insertions(+), 41 deletions(-) diff --git a/libs/langchain/langchain/output_parsers/json.py b/libs/langchain/langchain/output_parsers/json.py index f512ed6e47..7a4660ef6b 100644 --- a/libs/langchain/langchain/output_parsers/json.py +++ b/libs/langchain/langchain/output_parsers/json.py @@ -3,7 +3,9 @@ from __future__ import annotations import json import re from json import JSONDecodeError -from typing import Any, List +from typing import Any, List, Optional + +import jsonpatch from langchain.schema import BaseOutputParser, OutputParserException from langchain.schema.output import ChatGeneration, Generation @@ -42,7 +44,7 @@ def _custom_parser(multiline_string: str) -> str: # Adapted from https://github.com/KillianLucas/open-interpreter/blob/main/interpreter/utils/parse_partial_json.py # MIT License -def parse_partial_json(s): +def parse_partial_json(s: str) -> Any: # Attempt to parse the string as-is. try: return json.loads(s) @@ -84,7 +86,8 @@ def parse_partial_json(s): # Append the processed character to the new string. new_s += char - # If we're still inside a string at the end of processing, we need to close the string. + # If we're still inside a string at the end of processing, + # we need to close the string. if is_inside_string: new_s += '"' @@ -197,6 +200,9 @@ class PartialFunctionsJsonOutputParser(BaseCumulativeTransformOutputParser[Any]) except KeyError: return None + def _diff(self, prev: Optional[Any], next: Any) -> Any: + return jsonpatch.make_patch(prev, next).patch + def parse(self, text: str) -> Any: pass @@ -206,5 +212,8 @@ class PartialJsonOutputParser(BaseCumulativeTransformOutputParser[Any]): def _type(self) -> str: return "partial_functions_json" + def _diff(self, prev: Optional[Any], next: Any) -> Any: + return jsonpatch.make_patch(prev, next).patch + def parse(self, text: str) -> Any: return parse_json_markdown(text) diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_json.py b/libs/langchain/tests/unit_tests/output_parsers/test_json.py index fea7b290d0..00dbd4d3b3 100644 --- a/libs/langchain/tests/unit_tests/output_parsers/test_json.py +++ b/libs/langchain/tests/unit_tests/output_parsers/test_json.py @@ -1,8 +1,15 @@ import json -from typing import Iterator, Tuple +from typing import Any, Iterator, Tuple + import pytest -from langchain.output_parsers.json import parse_json_markdown, parse_partial_json +from langchain.output_parsers.json import ( + PartialFunctionsJsonOutputParser, + PartialJsonOutputParser, + parse_json_markdown, + parse_partial_json, +) +from langchain.schema.messages import AIMessageChunk GOOD_JSON = """```json { @@ -206,7 +213,6 @@ def test_parse_partial_json(json_strings: Tuple[str, str]) -> None: STREAMED_TOKENS = """ { - " setup ": @@ -215,36 +221,50 @@ Why did the bears - go - on + start a - picnic -?", - - + band + called + Bears + Bears + Bears + ? +" +, " -p -unch -line +punchline ": " Because they wanted to - have - a + play bear --y + -y good - time -!" + music + ! +" +, + " +audience +": + [ +" +Haha +" +, + " +So + funny +" +] } """.splitlines() EXPECTED_STREAMED_JSON = [ - {}, {}, {"setup": ""}, {"setup": "Why"}, @@ -258,62 +278,217 @@ EXPECTED_STREAMED_JSON = [ {"setup": "Why did the bears start a band called Bears"}, {"setup": "Why did the bears start a band called Bears Bears"}, {"setup": "Why did the bears start a band called Bears Bears Bears"}, + {"setup": "Why did the bears start a band called Bears Bears Bears ?"}, { - "setup": "Why did the bears start a band called Bears Bears Bears?", + "setup": "Why did the bears start a band called Bears Bears Bears ?", "punchline": "", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", + "setup": "Why did the bears start a band called Bears Bears Bears ?", "punchline": "Because", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", + "setup": "Why did the bears start a band called Bears Bears Bears ?", "punchline": "Because they", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", + "setup": "Why did the bears start a band called Bears Bears Bears ?", "punchline": "Because they wanted", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", + "setup": "Why did the bears start a band called Bears Bears Bears ?", "punchline": "Because they wanted to", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", + "setup": "Why did the bears start a band called Bears Bears Bears ?", "punchline": "Because they wanted to play", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", + "setup": "Why did the bears start a band called Bears Bears Bears ?", "punchline": "Because they wanted to play bear", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", - "punchline": "Because they wanted to play bear-y", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "punchline": "Because they wanted to play bear -y", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", - "punchline": "Because they wanted to play bear-y good", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "punchline": "Because they wanted to play bear -y good", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", - "punchline": "Because they wanted to play bear-y good music", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "punchline": "Because they wanted to play bear -y good music", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", - "punchline": "Because they wanted to play bear-y good music!", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "punchline": "Because they wanted to play bear -y good music !", }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", - "punchline": "Because they wanted to play bear-y good music!", + "punchline": "Because they wanted to play bear -y good music !", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "audience": [], }, { - "setup": "Why did the bears start a band called Bears Bears Bears?", - "punchline": "Because they wanted to play bear-y good music!", + "punchline": "Because they wanted to play bear -y good music !", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "audience": [""], }, + { + "punchline": "Because they wanted to play bear -y good music !", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "audience": ["Haha"], + }, + { + "punchline": "Because they wanted to play bear -y good music !", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "audience": ["Haha", ""], + }, + { + "punchline": "Because they wanted to play bear -y good music !", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "audience": ["Haha", "So"], + }, + { + "punchline": "Because they wanted to play bear -y good music !", + "setup": "Why did the bears start a band called Bears Bears Bears ?", + "audience": ["Haha", "So funny"], + }, +] + +EXPECTED_STREAMED_JSON_DIFF = [ + [{"op": "replace", "path": "", "value": {}}], + [{"op": "add", "path": "/setup", "value": ""}], + [{"op": "replace", "path": "/setup", "value": "Why"}], + [{"op": "replace", "path": "/setup", "value": "Why did"}], + [{"op": "replace", "path": "/setup", "value": "Why did the"}], + [{"op": "replace", "path": "/setup", "value": "Why did the bears"}], + [{"op": "replace", "path": "/setup", "value": "Why did the bears start"}], + [{"op": "replace", "path": "/setup", "value": "Why did the bears start a"}], + [{"op": "replace", "path": "/setup", "value": "Why did the bears start a band"}], + [ + { + "op": "replace", + "path": "/setup", + "value": "Why did the bears start a band called", + } + ], + [ + { + "op": "replace", + "path": "/setup", + "value": "Why did the bears start a band called Bears", + } + ], + [ + { + "op": "replace", + "path": "/setup", + "value": "Why did the bears start a band called Bears Bears", + } + ], + [ + { + "op": "replace", + "path": "/setup", + "value": "Why did the bears start a band called Bears Bears Bears", + } + ], + [ + { + "op": "replace", + "path": "/setup", + "value": "Why did the bears start a band called Bears Bears Bears ?", + } + ], + [{"op": "add", "path": "/punchline", "value": ""}], + [{"op": "replace", "path": "/punchline", "value": "Because"}], + [{"op": "replace", "path": "/punchline", "value": "Because they"}], + [{"op": "replace", "path": "/punchline", "value": "Because they wanted"}], + [{"op": "replace", "path": "/punchline", "value": "Because they wanted to"}], + [{"op": "replace", "path": "/punchline", "value": "Because they wanted to play"}], + [ + { + "op": "replace", + "path": "/punchline", + "value": "Because they wanted to play bear", + } + ], + [ + { + "op": "replace", + "path": "/punchline", + "value": "Because they wanted to play bear -y", + } + ], + [ + { + "op": "replace", + "path": "/punchline", + "value": "Because they wanted to play bear -y good", + } + ], + [ + { + "op": "replace", + "path": "/punchline", + "value": "Because they wanted to play bear -y good music", + } + ], + [ + { + "op": "replace", + "path": "/punchline", + "value": "Because they wanted to play bear -y good music !", + } + ], + [{"op": "add", "path": "/audience", "value": []}], + [{"op": "add", "path": "/audience/0", "value": ""}], + [{"op": "replace", "path": "/audience/0", "value": "Haha"}], + [{"op": "add", "path": "/audience/1", "value": ""}], + [{"op": "replace", "path": "/audience/1", "value": "So"}], + [{"op": "replace", "path": "/audience/1", "value": "So funny"}], ] def test_partial_text_json_output_parser() -> None: - def input_iter() -> Iterator[str]: + def input_iter(_: Any) -> Iterator[str]: for token in STREAMED_TOKENS: yield token + + chain = input_iter | PartialJsonOutputParser() + + assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON + + +def test_partial_functions_json_output_parser() -> None: + def input_iter(_: Any) -> Iterator[AIMessageChunk]: + for token in STREAMED_TOKENS: + yield AIMessageChunk( + content="", additional_kwargs={"function_call": {"arguments": token}} + ) + + chain = input_iter | PartialFunctionsJsonOutputParser() + + assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON + + +def test_partial_text_json_output_parser_diff() -> None: + def input_iter(_: Any) -> Iterator[str]: + for token in STREAMED_TOKENS: + yield token + + chain = input_iter | PartialJsonOutputParser(diff=True) + + assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON_DIFF + + +def test_partial_functions_json_output_parser_diff() -> None: + def input_iter(_: Any) -> Iterator[AIMessageChunk]: + for token in STREAMED_TOKENS: + yield AIMessageChunk( + content="", additional_kwargs={"function_call": {"arguments": token}} + ) + + chain = input_iter | PartialFunctionsJsonOutputParser(diff=True) + + assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON_DIFF