langchain[minor]: openai tools structured_output_chain (#17296)

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
pull/17885/head^2
Bagatur 4 months ago committed by GitHub
parent b5f8cf9509
commit b0cfb86c48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -310,31 +310,31 @@ def create_structured_output_runnable(
Example:
.. code-block:: python
from typing import Optional
from langchain.chains.ernie_functions import create_structured_output_chain
from langchain_community.chat_models import ErnieBotChat
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel, Field
class Dog(BaseModel):
\"\"\"Identifying information about a dog.\"\"\"
name: str = Field(..., description="The dog's name")
color: str = Field(..., description="The dog's color")
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
llm = ErnieBotChat(model_name="ERNIE-Bot-4")
prompt = ChatPromptTemplate.from_messages(
[
("user", "Use the given format to extract information from the following input: {input}"),
("assistant", "OK!"),
("user", "Tip: Make sure to answer in the correct format"),
]
)
chain = create_structured_output_chain(Dog, llm, prompt)
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
# -> Dog(name="Harry", color="brown", fav_food="chicken")
from typing import Optional
from langchain.chains.ernie_functions import create_structured_output_chain
from langchain_community.chat_models import ErnieBotChat
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel, Field
class Dog(BaseModel):
\"\"\"Identifying information about a dog.\"\"\"
name: str = Field(..., description="The dog's name")
color: str = Field(..., description="The dog's color")
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
llm = ErnieBotChat(model_name="ERNIE-Bot-4")
prompt = ChatPromptTemplate.from_messages(
[
("user", "Use the given format to extract information from the following input: {input}"),
("assistant", "OK!"),
("user", "Tip: Make sure to answer in the correct format"),
]
)
chain = create_structured_output_chain(Dog, llm, prompt)
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
# -> Dog(name="Harry", color="brown", fav_food="chicken")
""" # noqa: E501
if isinstance(output_schema, dict):
function: Any = {

@ -9,9 +9,16 @@ from langchain_core.output_parsers import (
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import Runnable
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_core.utils.function_calling import (
convert_to_openai_function,
convert_to_openai_tool,
)
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import (
JsonOutputKeyToolsParser,
PydanticOutputParser,
PydanticToolsParser,
)
from langchain.output_parsers.openai_functions import (
JsonOutputFunctionsParser,
PydanticAttrOutputFunctionsParser,
@ -26,7 +33,7 @@ def create_openai_fn_runnable(
*,
enforce_single_function_usage: bool = True,
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
**kwargs: Any,
**llm_kwargs: Any,
) -> Runnable:
"""Create a runnable sequence that uses OpenAI functions.
@ -53,6 +60,7 @@ def create_openai_fn_runnable(
passed in and they are not pydantic.BaseModels, the chain output will
include both the name of the function that was returned and the arguments
to pass to the function.
**llm_kwargs: Additional named arguments to pass to the language model.
Returns:
A runnable sequence that will pass in the given functions to the model when run.
@ -91,25 +99,27 @@ def create_openai_fn_runnable(
if not functions:
raise ValueError("Need to pass in at least one function. Received zero.")
openai_functions = [convert_to_openai_function(f) for f in functions]
llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
llm_kwargs_: Dict[str, Any] = {"functions": openai_functions, **llm_kwargs}
if len(openai_functions) == 1 and enforce_single_function_usage:
llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
llm_kwargs_["function_call"] = {"name": openai_functions[0]["name"]}
output_parser = output_parser or get_openai_output_parser(functions)
if prompt:
return prompt | llm.bind(**llm_kwargs) | output_parser
return prompt | llm.bind(**llm_kwargs_) | output_parser
else:
return llm.bind(**llm_kwargs) | output_parser
return llm.bind(**llm_kwargs_) | output_parser
# TODO: implement mode='openai-tools'.
def create_structured_output_runnable(
output_schema: Union[Dict[str, Any], Type[BaseModel]],
llm: Runnable,
prompt: Optional[BasePromptTemplate] = None,
*,
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
mode: Literal["openai-functions", "openai-json"] = "openai-functions",
enforce_single_function_usage: bool = True,
enforce_function_usage: bool = True,
return_single: bool = True,
mode: Literal[
"openai-functions", "openai-tools", "openai-json"
] = "openai-functions",
**kwargs: Any,
) -> Runnable:
"""Create a runnable for extracting structured outputs.
@ -130,19 +140,107 @@ def create_structured_output_runnable(
in, then the OutputParser will try to parse outputs using the pydantic
class. Otherwise model outputs will be parsed as JSON.
mode: How structured outputs are extracted from the model. If 'openai-functions'
then OpenAI function calling is used. If 'openai-json' then OpenAI model
then OpenAI function calling is used with the deprecated 'functions',
'function_call' schema. If 'openai-tools' then OpenAI function
calling with the latest 'tools', 'tool_choice' schema is used. This is
recommended over 'openai-functions'. If 'openai-json' then OpenAI model
with response_format set to JSON is used.
enforce_single_function_usage: Only used if mode is 'openai-functions'. Only
used if a single function is passed in. If
True, then the model will be forced to use the given function. If False,
then the model will be given the option to use the given function or not.
enforce_function_usage: Only applies when mode is 'openai-tools' or
'openai-functions'. If True, then the model will be forced to use the given
output schema. If False, then the model can elect whether to use the output
schema.
return_single: Only applies when mode is 'openai-tools'. Whether to a list of
structured outputs or a single one. If True and model does not return any
structured outputs then chain output is None. If False and model does not
return any structured outputs then chain output is an empty list.
**kwargs: Additional named arguments.
Returns:
A runnable sequence that will return a structured output matching the given
A runnable sequence that will return a structured output(s) matching the given
output_schema.
OpenAI tools example with Pydantic schema (mode='openai-tools'):
.. code-block:: python
from typing import Optional
from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field
class RecordDog(BaseModel):
'''Record some identifying information about a dog.'''
name: str = Field(..., description="The dog's name")
color: str = Field(..., description="The dog's color")
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
OpenAI functions example:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
prompt = ChatPromptTemplate.from_messages(
[
("system", "You are an extraction algorithm. Please extract every possible instance"),
('human', '{input}')
]
)
structured_llm = create_structured_output_runnable(
RecordDog,
llm,
mode="openai-tools",
enforce_function_usage=True,
return_single=True
)
structured_llm.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
OpenAI tools example with dict schema (mode="openai-tools"):
.. code-block:: python
from typing import Optional
from langchain.chains import create_structured_output_runnable
from langchain_openai import ChatOpenAI
dog_schema = {
"type": "function",
"function": {
"name": "record_dog",
"description": "Record some identifying information about a dog.",
"parameters": {
"type": "object",
"properties": {
"name": {
"description": "The dog's name",
"type": "string"
},
"color": {
"description": "The dog's color",
"type": "string"
},
"fav_food": {
"description": "The dog's favorite food",
"type": "string"
}
},
"required": ["name", "color"]
}
}
}
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = create_structured_output_runnable(
doc_schema,
llm,
mode="openai-tools",
enforce_function_usage=True,
return_single=True
)
structured_llm.invoke("Harry was a chubby brown beagle who loved chicken")
# -> {'name': 'Harry', 'color': 'brown', 'fav_food': 'chicken'}
OpenAI functions example (mode="openai-functions"):
.. code-block:: python
from typing import Optional
@ -189,7 +287,7 @@ def create_structured_output_runnable(
chain = prompt | structured_llm
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
# -> Dog(name="Harry", color="brown", fav_food="chicken")
OpenAI json response format example:
OpenAI json response format example (mode="openai-json"):
.. code-block:: python
from typing import Optional
@ -219,26 +317,96 @@ def create_structured_output_runnable(
chain = prompt | structured_llm
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
""" # noqa: E501
if mode == "openai-functions":
# for backwards compatibility
force_function_usage = kwargs.get(
"enforce_single_function_usage", enforce_function_usage
)
if mode == "openai-tools":
# Protect against typos in kwargs
keys_in_kwargs = set(kwargs.keys())
# Backwards compatibility keys
unrecognized_keys = keys_in_kwargs - {"enforce_single_function_usage"}
if unrecognized_keys:
raise TypeError(
f"Got an unexpected keyword argument(s): {unrecognized_keys}."
)
return _create_openai_tools_runnable(
output_schema,
llm,
prompt=prompt,
output_parser=output_parser,
enforce_tool_usage=force_function_usage,
first_tool_only=return_single,
)
elif mode == "openai-functions":
return _create_openai_functions_structured_output_runnable(
output_schema,
llm,
prompt=prompt,
output_parser=output_parser,
enforce_single_function_usage=enforce_single_function_usage,
**kwargs,
enforce_single_function_usage=force_function_usage,
**kwargs, # llm-specific kwargs
)
elif mode == "openai-json":
if force_function_usage:
raise ValueError(
"enforce_single_function_usage is not supported for mode='openai-json'."
)
return _create_openai_json_runnable(
output_schema, llm, prompt=prompt, output_parser=output_parser, **kwargs
)
else:
raise ValueError(
f"Invalid mode {mode}. Expected one of 'openai-functions', "
f"Invalid mode {mode}. Expected one of 'openai-tools', 'openai-functions', "
f"'openai-json'."
)
def _create_openai_tools_runnable(
tool: Union[Dict[str, Any], Type[BaseModel], Callable],
llm: Runnable,
*,
prompt: Optional[BasePromptTemplate],
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]],
enforce_tool_usage: bool,
first_tool_only: bool,
) -> Runnable:
oai_tool = convert_to_openai_tool(tool)
llm_kwargs: Dict[str, Any] = {"tools": [oai_tool]}
if enforce_tool_usage:
llm_kwargs["tool_choice"] = {
"type": "function",
"function": {"name": oai_tool["function"]["name"]},
}
output_parser = output_parser or _get_openai_tool_output_parser(
tool, first_tool_only=first_tool_only
)
if prompt:
return prompt | llm.bind(**llm_kwargs) | output_parser
else:
return llm.bind(**llm_kwargs) | output_parser
def _get_openai_tool_output_parser(
tool: Union[Dict[str, Any], Type[BaseModel], Callable],
*,
first_tool_only: bool = False,
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
if isinstance(tool, type) and issubclass(tool, BaseModel):
output_parser: Union[
BaseOutputParser, BaseGenerationOutputParser
] = PydanticToolsParser(tools=[tool], first_tool_only=first_tool_only)
else:
key_name = convert_to_openai_tool(tool)["function"]["name"]
output_parser = JsonOutputKeyToolsParser(
first_tool_only=first_tool_only, key_name=key_name
)
return output_parser
def get_openai_output_parser(
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
@ -255,11 +423,10 @@ def get_openai_output_parser(
not a Pydantic class, then the output parser will automatically extract
only the function arguments and not the function name.
"""
function_names = [convert_to_openai_function(f)["name"] for f in functions]
if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
if len(functions) > 1:
pydantic_schema: Union[Dict, Type[BaseModel]] = {
name: fn for name, fn in zip(function_names, functions)
convert_to_openai_function(fn)["name"]: fn for fn in functions
}
else:
pydantic_schema = functions[0]
@ -304,7 +471,7 @@ def _create_openai_functions_structured_output_runnable(
prompt: Optional[BasePromptTemplate] = None,
*,
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
**kwargs: Any,
**llm_kwargs: Any,
) -> Runnable:
if isinstance(output_schema, dict):
function: Any = {
@ -331,5 +498,5 @@ def _create_openai_functions_structured_output_runnable(
llm,
prompt=prompt,
output_parser=output_parser,
**kwargs,
**llm_kwargs,
)

@ -22,6 +22,16 @@ class JsonOutputToolsParser(BaseGenerationOutputParser[Any]):
"""
return_id: bool = False
"""Whether to return the tool call id."""
first_tool_only: bool = False
"""Whether to return only the first tool call.
If False, the result will be a list of tool calls, or an empty list
if no tool calls are found.
If true, and multiple tool calls are found, only the first one will be returned,
and the other tool calls will be ignored.
If no tool calls are found, None will be returned.
"""
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
generation = result[0]
@ -65,6 +75,8 @@ class JsonOutputToolsParser(BaseGenerationOutputParser[Any]):
final_tools.append(parsed)
if exceptions:
raise OutputParserException("\n\n".join(exceptions))
if self.first_tool_only:
return final_tools[0] if final_tools else None
return final_tools
@ -73,21 +85,37 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
key_name: str
"""The type of tools to return."""
return_single: bool = False
"""Whether to return only the first tool call."""
def __init__(self, key_name: str, **kwargs: Any) -> None:
"""Allow init with positional args."""
# Backwards compatibility for old argument name.
if "return_single" in kwargs:
if not kwargs.get("first_tool_only"):
kwargs["first_tool_only"] = kwargs.pop("return_single")
else:
raise ValueError(
"Cannot use both 'return_single' and 'first_tool_only' arguments."
)
super().__init__(key_name=key_name, **kwargs)
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
results = super().parse_result(result, partial=partial)
results = [res for res in results if res["type"] == self.key_name]
parsed_result = super().parse_result(result, partial=partial)
if self.first_tool_only:
single_result = (
parsed_result
if parsed_result and parsed_result["type"] == self.key_name
else None
)
if self.return_id:
return single_result
elif single_result:
return single_result["args"]
else:
return None
parsed_result = [res for res in parsed_result if res["type"] == self.key_name]
if not self.return_id:
results = [res["args"] for res in results]
if self.return_single:
return results[0] if results else None
return results
parsed_result = [res["args"] for res in parsed_result]
return parsed_result
class PydanticToolsParser(JsonOutputToolsParser):
@ -96,6 +124,12 @@ class PydanticToolsParser(JsonOutputToolsParser):
tools: List[Type[BaseModel]]
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
results = super().parse_result(result, partial=partial)
parsed_result = super().parse_result(result, partial=partial)
name_dict = {tool.__name__: tool for tool in self.tools}
return [name_dict[res["type"]](**res["args"]) for res in results]
if self.first_tool_only:
return (
name_dict[parsed_result["type"]](**parsed_result["args"])
if parsed_result
else None
)
return [name_dict[res["type"]](**res["args"]) for res in parsed_result]

Loading…
Cancel
Save