langchain[patch], core[patch], openai[patch], fireworks[minor]: ChatFireworks.with_structured_output (#18078)

<img width="1192" alt="Screenshot 2024-02-24 at 3 39 39 PM"
src="https://github.com/langchain-ai/langchain/assets/22008038/1cf74774-a23f-4b06-9b9b-85dfa2f75b63">
pull/18151/head
Bagatur 4 months ago committed by GitHub
parent 3589a135ef
commit 1e8ab83d7b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,123 @@
import copy
import json
from json import JSONDecodeError
from typing import Any, List, Type
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import BaseGenerationOutputParser
from langchain_core.output_parsers.json import parse_partial_json
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.pydantic_v1 import BaseModel
class JsonOutputToolsParser(BaseGenerationOutputParser[Any]):
"""Parse tools from OpenAI response."""
strict: bool = False
"""Whether to allow non-JSON-compliant strings.
See: https://docs.python.org/3/library/json.html#encoders-and-decoders
Useful when the parsed output may include unicode characters or new lines.
"""
return_id: bool = False
"""Whether to return the tool call id."""
first_tool_only: bool = False
"""Whether to return only the first tool call.
If False, the result will be a list of tool calls, or an empty list
if no tool calls are found.
If true, and multiple tool calls are found, only the first one will be returned,
and the other tool calls will be ignored.
If no tool calls are found, None will be returned.
"""
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
generation = result[0]
if not isinstance(generation, ChatGeneration):
raise OutputParserException(
"This output parser can only be used with a chat generation."
)
message = generation.message
try:
tool_calls = copy.deepcopy(message.additional_kwargs["tool_calls"])
except KeyError:
return []
final_tools = []
exceptions = []
for tool_call in tool_calls:
if "function" not in tool_call:
continue
try:
if partial:
function_args = parse_partial_json(
tool_call["function"]["arguments"], strict=self.strict
)
else:
function_args = json.loads(
tool_call["function"]["arguments"], strict=self.strict
)
except JSONDecodeError as e:
exceptions.append(
f"Function {tool_call['function']['name']} arguments:\n\n"
f"{tool_call['function']['arguments']}\n\nare not valid JSON. "
f"Received JSONDecodeError {e}"
)
continue
parsed = {
"type": tool_call["function"]["name"],
"args": function_args,
}
if self.return_id:
parsed["id"] = tool_call["id"]
final_tools.append(parsed)
if exceptions:
raise OutputParserException("\n\n".join(exceptions))
if self.first_tool_only:
return final_tools[0] if final_tools else None
return final_tools
class JsonOutputKeyToolsParser(JsonOutputToolsParser):
"""Parse tools from OpenAI response."""
key_name: str
"""The type of tools to return."""
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
parsed_result = super().parse_result(result, partial=partial)
if self.first_tool_only:
single_result = (
parsed_result
if parsed_result and parsed_result["type"] == self.key_name
else None
)
if self.return_id:
return single_result
elif single_result:
return single_result["args"]
else:
return None
parsed_result = [res for res in parsed_result if res["type"] == self.key_name]
if not self.return_id:
parsed_result = [res["args"] for res in parsed_result]
return parsed_result
class PydanticToolsParser(JsonOutputToolsParser):
"""Parse tools from OpenAI response."""
tools: List[Type[BaseModel]]
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
parsed_result = super().parse_result(result, partial=partial)
name_dict = {tool.__name__: tool for tool in self.tools}
if self.first_tool_only:
return (
name_dict[parsed_result["type"]](**parsed_result["args"])
if parsed_result
else None
)
return [name_dict[res["type"]](**res["args"]) for res in parsed_result]

@ -1,135 +1,7 @@
import copy
import json
from json import JSONDecodeError
from typing import Any, List, Type
from langchain_core.output_parsers.openai_tools import (
JsonOutputKeyToolsParser,
JsonOutputToolsParser,
PydanticToolsParser,
)
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import BaseGenerationOutputParser
from langchain_core.output_parsers.json import parse_partial_json
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.pydantic_v1 import BaseModel
class JsonOutputToolsParser(BaseGenerationOutputParser[Any]):
"""Parse tools from OpenAI response."""
strict: bool = False
"""Whether to allow non-JSON-compliant strings.
See: https://docs.python.org/3/library/json.html#encoders-and-decoders
Useful when the parsed output may include unicode characters or new lines.
"""
return_id: bool = False
"""Whether to return the tool call id."""
first_tool_only: bool = False
"""Whether to return only the first tool call.
If False, the result will be a list of tool calls, or an empty list
if no tool calls are found.
If true, and multiple tool calls are found, only the first one will be returned,
and the other tool calls will be ignored.
If no tool calls are found, None will be returned.
"""
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
generation = result[0]
if not isinstance(generation, ChatGeneration):
raise OutputParserException(
"This output parser can only be used with a chat generation."
)
message = generation.message
try:
tool_calls = copy.deepcopy(message.additional_kwargs["tool_calls"])
except KeyError:
return []
final_tools = []
exceptions = []
for tool_call in tool_calls:
if "function" not in tool_call:
continue
try:
if partial:
function_args = parse_partial_json(
tool_call["function"]["arguments"], strict=self.strict
)
else:
function_args = json.loads(
tool_call["function"]["arguments"], strict=self.strict
)
except JSONDecodeError as e:
exceptions.append(
f"Function {tool_call['function']['name']} arguments:\n\n"
f"{tool_call['function']['arguments']}\n\nare not valid JSON. "
f"Received JSONDecodeError {e}"
)
continue
parsed = {
"type": tool_call["function"]["name"],
"args": function_args,
}
if self.return_id:
parsed["id"] = tool_call["id"]
final_tools.append(parsed)
if exceptions:
raise OutputParserException("\n\n".join(exceptions))
if self.first_tool_only:
return final_tools[0] if final_tools else None
return final_tools
class JsonOutputKeyToolsParser(JsonOutputToolsParser):
"""Parse tools from OpenAI response."""
key_name: str
"""The type of tools to return."""
def __init__(self, key_name: str, **kwargs: Any) -> None:
"""Allow init with positional args."""
# Backwards compatibility for old argument name.
if "return_single" in kwargs:
if not kwargs.get("first_tool_only"):
kwargs["first_tool_only"] = kwargs.pop("return_single")
else:
raise ValueError(
"Cannot use both 'return_single' and 'first_tool_only' arguments."
)
super().__init__(key_name=key_name, **kwargs)
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
parsed_result = super().parse_result(result, partial=partial)
if self.first_tool_only:
single_result = (
parsed_result
if parsed_result and parsed_result["type"] == self.key_name
else None
)
if self.return_id:
return single_result
elif single_result:
return single_result["args"]
else:
return None
parsed_result = [res for res in parsed_result if res["type"] == self.key_name]
if not self.return_id:
parsed_result = [res["args"] for res in parsed_result]
return parsed_result
class PydanticToolsParser(JsonOutputToolsParser):
"""Parse tools from OpenAI response."""
tools: List[Type[BaseModel]]
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
parsed_result = super().parse_result(result, partial=partial)
name_dict = {tool.__name__: tool for tool in self.tools}
if self.first_tool_only:
return (
name_dict[parsed_result["type"]](**parsed_result["args"])
if parsed_result
else None
)
return [name_dict[res["type"]](**res["args"]) for res in parsed_result]
__all__ = ["PydanticToolsParser", "JsonOutputToolsParser", "JsonOutputKeyToolsParser"]

@ -4,6 +4,7 @@ from __future__ import annotations
import logging
import os
from operator import itemgetter
from typing import (
Any,
AsyncIterator,
@ -23,6 +24,7 @@ from typing import (
)
from fireworks.client import AsyncFireworks, Fireworks # type: ignore
from langchain_core._api import beta
from langchain_core.callbacks import (
AsyncCallbackManagerForLLMRun,
CallbackManagerForLLMRun,
@ -49,9 +51,15 @@ from langchain_core.messages import (
ToolMessage,
ToolMessageChunk,
)
from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
from langchain_core.output_parsers.base import OutputParserLike
from langchain_core.output_parsers.openai_tools import (
JsonOutputKeyToolsParser,
PydanticToolsParser,
)
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
from langchain_core.runnables import Runnable
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
from langchain_core.tools import BaseTool
from langchain_core.utils import (
convert_to_secret_str,
@ -189,7 +197,7 @@ class _FunctionCall(TypedDict):
name: str
# This is basically a copy and replace for ChatOpenAI, except
# This is basically a copy and replace for ChatFireworks, except
# - I needed to gut out tiktoken and some of the token estimation logic
# (not sure how important it is)
# - Environment variable is different
@ -573,7 +581,7 @@ class ChatFireworks(BaseChatModel):
self,
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
*,
tool_choice: Optional[Union[dict, str, Literal["auto", "none"]]] = None,
tool_choice: Optional[Union[dict, str, Literal["auto", "none"], bool]] = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, BaseMessage]:
"""Bind tool-like objects to this chat model.
@ -595,7 +603,7 @@ class ChatFireworks(BaseChatModel):
"""
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
if tool_choice is not None:
if tool_choice is not None and tool_choice:
if isinstance(tool_choice, str) and (tool_choice not in ("auto", "none")):
tool_choice = {"type": "function", "function": {"name": tool_choice}}
if isinstance(tool_choice, dict) and (len(formatted_tools) != 1):
@ -611,5 +619,223 @@ class ChatFireworks(BaseChatModel):
f"Tool choice {tool_choice} was specified, but the only "
f"provided tool was {formatted_tools[0]['function']['name']}."
)
if isinstance(tool_choice, bool):
if len(tools) > 1:
raise ValueError(
"tool_choice can only be True when there is one tool. Received "
f"{len(tools)} tools."
)
tool_choice = formatted_tools[0]
kwargs["tool_choice"] = tool_choice
return super().bind(tools=formatted_tools, **kwargs)
@beta()
def with_structured_output(
self,
schema: Optional[Union[Dict, Type[BaseModel]]] = None,
*,
method: Literal["function_calling", "json_mode"] = "function_calling",
include_raw: bool = False,
**kwargs: Any,
) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
"""Model wrapper that returns outputs formatted to match the given schema.
Args:
schema: The output schema as a dict or a Pydantic class. If a Pydantic class
then the model output will be an object of that class. If a dict then
the model output will be a dict. With a Pydantic class the returned
attributes will be validated, whereas with a dict they will not be. If
`method` is "function_calling" and `schema` is a dict, then the dict
must match the Fireworks function-calling spec.
method: The method for steering model generation, either "function_calling"
or "json_mode". If "function_calling" then the schema will be converted
to a Fireworks function and the returned model will make use of the
function-calling API. If "json_mode" then Fireworks's JSON mode will be
used. Note that if using "json_mode" then you must include instructions
for formatting the output into the desired schema into the model call.
include_raw: If False then only the parsed structured output is returned. If
an error occurs during model output parsing it will be raised. If True
then both the raw model response (a BaseMessage) and the parsed model
response will be returned. If an error occurs during output parsing it
will be caught and returned as well. The final output is always a dict
with keys "raw", "parsed", and "parsing_error".
Returns:
A Runnable that takes any ChatModel input and returns as output:
If include_raw is True then a dict with keys:
raw: BaseMessage
parsed: Optional[_DictOrPydantic]
parsing_error: Optional[BaseException]
If include_raw is False then just _DictOrPydantic is returned,
where _DictOrPydantic depends on the schema:
If schema is a Pydantic class then _DictOrPydantic is the Pydantic
class.
If schema is a dict then _DictOrPydantic is a dict.
Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False):
.. code-block:: python
from langchain_fireworks import ChatFireworks
from langchain_core.pydantic_v1 import BaseModel
class AnswerWithJustification(BaseModel):
'''An answer to the user question along with justification for the answer.'''
answer: str
justification: str
llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0)
structured_llm = llm.with_structured_output(AnswerWithJustification)
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
# -> AnswerWithJustification(
# answer='They weigh the same',
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
# )
Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True):
.. code-block:: python
from langchain_fireworks import ChatFireworks
from langchain_core.pydantic_v1 import BaseModel
class AnswerWithJustification(BaseModel):
'''An answer to the user question along with justification for the answer.'''
answer: str
justification: str
llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0)
structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True)
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
# -> {
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
# 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
# 'parsing_error': None
# }
Example: Function-calling, dict schema (method="function_calling", include_raw=False):
.. code-block:: python
from langchain_fireworks import ChatFireworks
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.utils.function_calling import convert_to_openai_tool
class AnswerWithJustification(BaseModel):
'''An answer to the user question along with justification for the answer.'''
answer: str
justification: str
dict_schema = convert_to_openai_tool(AnswerWithJustification)
llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0)
structured_llm = llm.with_structured_output(dict_schema)
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
# -> {
# 'answer': 'They weigh the same',
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
# }
Example: JSON mode, Pydantic schema (method="json_mode", include_raw=True):
.. code-block::
from langchain_fireworks import ChatFireworks
from langchain_core.pydantic_v1 import BaseModel
class AnswerWithJustification(BaseModel):
answer: str
justification: str
llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0)
structured_llm = llm.with_structured_output(
AnswerWithJustification,
method="json_mode",
include_raw=True
)
structured_llm.invoke(
"Answer the following question. "
"Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n"
"What's heavier a pound of bricks or a pound of feathers?"
)
# -> {
# 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'),
# 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
# 'parsing_error': None
# }
Example: JSON mode, no schema (schema=None, method="json_mode", include_raw=True):
.. code-block::
from langchain_fireworks import ChatFireworks
llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0)
structured_llm = llm.with_structured_output(method="json_mode", include_raw=True)
structured_llm.invoke(
"Answer the following question. "
"Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n"
"What's heavier a pound of bricks or a pound of feathers?"
)
# -> {
# 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'),
# 'parsed': {
# 'answer': 'They are both the same weight.',
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'
# },
# 'parsing_error': None
# }
""" # noqa: E501
if kwargs:
raise ValueError(f"Received unsupported arguments {kwargs}")
is_pydantic_schema = _is_pydantic_class(schema)
if method == "function_calling":
if schema is None:
raise ValueError(
"schema must be specified when method is 'function_calling'. "
"Received None."
)
llm = self.bind_tools([schema], tool_choice=True)
if is_pydantic_schema:
output_parser: OutputParserLike = PydanticToolsParser(
tools=[schema], first_tool_only=True
)
else:
key_name = convert_to_openai_tool(schema)["function"]["name"]
output_parser = JsonOutputKeyToolsParser(
key_name=key_name, first_tool_only=True
)
elif method == "json_mode":
llm = self.bind(response_format={"type": "json_object"})
output_parser = (
PydanticOutputParser(pydantic_object=schema)
if is_pydantic_schema
else JsonOutputParser()
)
else:
raise ValueError(
f"Unrecognized method argument. Expected one of 'function_calling' or "
f"'json_format'. Received: '{method}'"
)
if include_raw:
parser_assign = RunnablePassthrough.assign(
parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
)
parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
parser_with_fallback = parser_assign.with_fallbacks(
[parser_none], exception_key="parsing_error"
)
return RunnableMap(raw=llm) | parser_with_fallback
else:
return llm | output_parser
def _is_pydantic_class(obj: Any) -> bool:
return isinstance(obj, type) and issubclass(obj, BaseModel)

@ -60,6 +60,10 @@ from langchain_core.output_parsers import (
PydanticOutputParser,
)
from langchain_core.output_parsers.base import OutputParserLike
from langchain_core.output_parsers.openai_tools import (
JsonOutputKeyToolsParser,
PydanticToolsParser,
)
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
@ -75,11 +79,6 @@ from langchain_core.utils.function_calling import (
)
from langchain_core.utils.utils import build_extra_kwargs
from langchain_openai.output_parsers import (
JsonOutputKeyToolsParser,
PydanticToolsParser,
)
logger = logging.getLogger(__name__)

@ -1,11 +1,7 @@
from langchain_openai.output_parsers.tools import (
from langchain_core.output_parsers.openai_tools import (
JsonOutputKeyToolsParser,
JsonOutputToolsParser,
PydanticToolsParser,
)
__all__ = [
"JsonOutputToolsParser",
"JsonOutputKeyToolsParser",
"PydanticToolsParser",
]
__all__ = ["JsonOutputKeyToolsParser", "JsonOutputToolsParser", "PydanticToolsParser"]

@ -1,123 +1,7 @@
import copy
import json
from json import JSONDecodeError
from typing import Any, List, Type
from langchain_core.output_parsers.openai_tools import (
JsonOutputKeyToolsParser,
JsonOutputToolsParser,
PydanticToolsParser,
)
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers import BaseGenerationOutputParser
from langchain_core.output_parsers.json import parse_partial_json
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.pydantic_v1 import BaseModel
class JsonOutputToolsParser(BaseGenerationOutputParser[Any]):
"""Parse tools from OpenAI response."""
strict: bool = False
"""Whether to allow non-JSON-compliant strings.
See: https://docs.python.org/3/library/json.html#encoders-and-decoders
Useful when the parsed output may include unicode characters or new lines.
"""
return_id: bool = False
"""Whether to return the tool call id."""
first_tool_only: bool = False
"""Whether to return only the first tool call.
If False, the result will be a list of tool calls, or an empty list
if no tool calls are found.
If true, and multiple tool calls are found, only the first one will be returned,
and the other tool calls will be ignored.
If no tool calls are found, None will be returned.
"""
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
generation = result[0]
if not isinstance(generation, ChatGeneration):
raise OutputParserException(
"This output parser can only be used with a chat generation."
)
message = generation.message
try:
tool_calls = copy.deepcopy(message.additional_kwargs["tool_calls"])
except KeyError:
return []
final_tools = []
exceptions = []
for tool_call in tool_calls:
if "function" not in tool_call:
continue
try:
if partial:
function_args = parse_partial_json(
tool_call["function"]["arguments"], strict=self.strict
)
else:
function_args = json.loads(
tool_call["function"]["arguments"], strict=self.strict
)
except JSONDecodeError as e:
exceptions.append(
f"Function {tool_call['function']['name']} arguments:\n\n"
f"{tool_call['function']['arguments']}\n\nare not valid JSON. "
f"Received JSONDecodeError {e}"
)
continue
parsed = {
"type": tool_call["function"]["name"],
"args": function_args,
}
if self.return_id:
parsed["id"] = tool_call["id"]
final_tools.append(parsed)
if exceptions:
raise OutputParserException("\n\n".join(exceptions))
if self.first_tool_only:
return final_tools[0] if final_tools else None
return final_tools
class JsonOutputKeyToolsParser(JsonOutputToolsParser):
"""Parse tools from OpenAI response."""
key_name: str
"""The type of tools to return."""
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
parsed_result = super().parse_result(result, partial=partial)
if self.first_tool_only:
single_result = (
parsed_result
if parsed_result and parsed_result["type"] == self.key_name
else None
)
if self.return_id:
return single_result
elif single_result:
return single_result["args"]
else:
return None
parsed_result = [res for res in parsed_result if res["type"] == self.key_name]
if not self.return_id:
parsed_result = [res["args"] for res in parsed_result]
return parsed_result
class PydanticToolsParser(JsonOutputToolsParser):
"""Parse tools from OpenAI response."""
tools: List[Type[BaseModel]]
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
parsed_result = super().parse_result(result, partial=partial)
name_dict = {tool.__name__: tool for tool in self.tools}
if self.first_tool_only:
return (
name_dict[parsed_result["type"]](**parsed_result["args"])
if parsed_result
else None
)
return [name_dict[res["type"]](**res["args"]) for res in parsed_result]
__all__ = ["PydanticToolsParser", "JsonOutputToolsParser", "JsonOutputKeyToolsParser"]

Loading…
Cancel
Save