langchain/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py

298 lines
11 KiB
Python
Raw Normal View History

"""Agent for working with pandas objects."""
2024-01-30 17:39:46 +00:00
import warnings
from typing import Any, Dict, List, Literal, Optional, Sequence, Union
from langchain.agents import AgentType, create_openai_tools_agent, create_react_agent
from langchain.agents.agent import (
AgentExecutor,
BaseMultiActionAgent,
BaseSingleActionAgent,
RunnableAgent,
RunnableMultiActionAgent,
)
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
2024-01-30 17:39:46 +00:00
from langchain.agents.openai_functions_agent.base import (
OpenAIFunctionsAgent,
create_openai_functions_agent,
)
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models import LanguageModelLike
from langchain_core.messages import SystemMessage
from langchain_core.prompts import (
BasePromptTemplate,
ChatPromptTemplate,
PromptTemplate,
)
2024-01-30 17:39:46 +00:00
from langchain_core.tools import BaseTool
from langchain_core.utils.interactive_env import is_interactive_env
from langchain_experimental.agents.agent_toolkits.pandas.prompt import (
FUNCTIONS_WITH_DF,
FUNCTIONS_WITH_MULTI_DF,
MULTI_DF_PREFIX,
MULTI_DF_PREFIX_FUNCTIONS,
PREFIX,
PREFIX_FUNCTIONS,
SUFFIX_NO_DF,
SUFFIX_WITH_DF,
SUFFIX_WITH_MULTI_DF,
)
from langchain_experimental.tools.python.tool import PythonAstREPLTool
def _get_multi_prompt(
dfs: List[Any],
2024-01-30 17:39:46 +00:00
*,
prefix: Optional[str] = None,
suffix: Optional[str] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
2024-01-30 17:39:46 +00:00
) -> BasePromptTemplate:
if suffix is not None:
suffix_to_use = suffix
elif include_df_in_prompt:
suffix_to_use = SUFFIX_WITH_MULTI_DF
else:
suffix_to_use = SUFFIX_NO_DF
2024-01-30 17:39:46 +00:00
prefix = prefix if prefix is not None else MULTI_DF_PREFIX
template = "\n\n".join([prefix, "{tools}", FORMAT_INSTRUCTIONS, suffix_to_use])
prompt = PromptTemplate.from_template(template)
partial_prompt = prompt.partial()
2024-01-30 17:39:46 +00:00
if "dfs_head" in partial_prompt.input_variables:
dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs])
2024-01-30 17:39:46 +00:00
partial_prompt = partial_prompt.partial(dfs_head=dfs_head)
if "num_dfs" in partial_prompt.input_variables:
partial_prompt = partial_prompt.partial(num_dfs=str(len(dfs)))
return partial_prompt
def _get_single_prompt(
df: Any,
2024-01-30 17:39:46 +00:00
*,
prefix: Optional[str] = None,
suffix: Optional[str] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
2024-01-30 17:39:46 +00:00
) -> BasePromptTemplate:
if suffix is not None:
suffix_to_use = suffix
elif include_df_in_prompt:
suffix_to_use = SUFFIX_WITH_DF
else:
suffix_to_use = SUFFIX_NO_DF
2024-01-30 17:39:46 +00:00
prefix = prefix if prefix is not None else PREFIX
template = "\n\n".join([prefix, "{tools}", FORMAT_INSTRUCTIONS, suffix_to_use])
prompt = PromptTemplate.from_template(template)
partial_prompt = prompt.partial()
2024-01-30 17:39:46 +00:00
if "df_head" in partial_prompt.input_variables:
df_head = str(df.head(number_of_head_rows).to_markdown())
partial_prompt = partial_prompt.partial(df_head=df_head)
return partial_prompt
2024-01-30 17:39:46 +00:00
def _get_prompt(df: Any, **kwargs: Any) -> BasePromptTemplate:
return (
_get_multi_prompt(df, **kwargs)
if isinstance(df, list)
else _get_single_prompt(df, **kwargs)
)
def _get_functions_single_prompt(
df: Any,
2024-01-30 17:39:46 +00:00
*,
prefix: Optional[str] = None,
2024-01-30 17:39:46 +00:00
suffix: str = "",
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
2024-01-30 17:39:46 +00:00
) -> ChatPromptTemplate:
if include_df_in_prompt:
df_head = str(df.head(number_of_head_rows).to_markdown())
suffix = (suffix or FUNCTIONS_WITH_DF).format(df_head=df_head)
prefix = prefix if prefix is not None else PREFIX_FUNCTIONS
system_message = SystemMessage(content=prefix + suffix)
prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
2024-01-30 17:39:46 +00:00
return prompt
def _get_functions_multi_prompt(
dfs: Any,
2024-01-30 17:39:46 +00:00
*,
prefix: str = "",
suffix: str = "",
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
2024-01-30 17:39:46 +00:00
) -> ChatPromptTemplate:
if include_df_in_prompt:
dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs])
2024-01-30 17:39:46 +00:00
suffix = (suffix or FUNCTIONS_WITH_MULTI_DF).format(dfs_head=dfs_head)
prefix = (prefix or MULTI_DF_PREFIX_FUNCTIONS).format(num_dfs=str(len(dfs)))
system_message = SystemMessage(content=prefix + suffix)
prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
2024-01-30 17:39:46 +00:00
return prompt
2024-01-30 17:39:46 +00:00
def _get_functions_prompt(df: Any, **kwargs: Any) -> ChatPromptTemplate:
return (
_get_functions_multi_prompt(df, **kwargs)
if isinstance(df, list)
else _get_functions_single_prompt(df, **kwargs)
)
def create_pandas_dataframe_agent(
2024-01-30 17:39:46 +00:00
llm: LanguageModelLike,
df: Any,
2024-01-30 17:39:46 +00:00
agent_type: Union[
AgentType, Literal["openai-tools"]
] = AgentType.ZERO_SHOT_REACT_DESCRIPTION,
callback_manager: Optional[BaseCallbackManager] = None,
prefix: Optional[str] = None,
suffix: Optional[str] = None,
input_variables: Optional[List[str]] = None,
verbose: bool = False,
return_intermediate_steps: bool = False,
max_iterations: Optional[int] = 15,
max_execution_time: Optional[float] = None,
early_stopping_method: str = "force",
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
include_df_in_prompt: Optional[bool] = True,
number_of_head_rows: int = 5,
extra_tools: Sequence[BaseTool] = (),
2024-01-30 17:39:46 +00:00
**kwargs: Any,
) -> AgentExecutor:
2024-01-30 17:39:46 +00:00
"""Construct a Pandas agent from an LLM and dataframe(s).
Args:
llm: Language model to use for the agent.
df: Pandas dataframe or list of Pandas dataframes.
agent_type: One of "openai-tools", "openai-functions", or
"zero-shot-react-description". Defaults to "zero-shot-react-description".
"openai-tools" is recommended over "openai-functions".
callback_manager: DEPRECATED. Pass "callbacks" key into 'agent_executor_kwargs'
instead to pass constructor callbacks to AgentExecutor.
prefix: Prompt prefix string.
suffix: Prompt suffix string.
input_variables: DEPRECATED. Input variables automatically inferred from
constructed prompt.
verbose: AgentExecutor verbosity.
return_intermediate_steps: Passed to AgentExecutor init.
max_iterations: Passed to AgentExecutor init.
max_execution_time: Passed to AgentExecutor init.
early_stopping_method: Passed to AgentExecutor init.
agent_executor_kwargs: Arbitrary additional AgentExecutor args.
include_df_in_prompt: Whether to include the first number_of_head_rows in the
prompt. Must be None if suffix is not None.
number_of_head_rows: Number of initial rows to include in prompt if
include_df_in_prompt is True.
extra_tools: Additional tools to give to agent on top of a PythonAstREPLTool.
**kwargs: DEPRECATED. Not used, kept for backwards compatibility.
Returns:
An AgentExecutor with the specified agent_type agent and access to
a PythonAstREPLTool with the DataFrame(s) and any user-provided extra_tools.
Example:
.. code-block:: python
2024-02-01 18:17:26 +00:00
from langchain_openai import ChatOpenAI
from langchain_experimental.agents import create_pandas_dataframe_agent
import pandas as pd
df = pd.read_csv("titanic.csv")
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
agent_executor = create_pandas_dataframe_agent(
llm,
df,
agent_type="openai-tools",
verbose=True
)
2024-01-30 17:39:46 +00:00
""" # noqa: E501
try:
import pandas as pd
except ImportError as e:
raise ImportError(
"pandas package not found, please install with `pip install pandas`"
) from e
if is_interactive_env():
pd.set_option("display.max_columns", None)
for _df in df if isinstance(df, list) else [df]:
if not isinstance(_df, pd.DataFrame):
raise ValueError(f"Expected pandas DataFrame, got {type(_df)}")
if input_variables:
kwargs = kwargs or {}
kwargs["input_variables"] = input_variables
if kwargs:
warnings.warn(
f"Received additional kwargs {kwargs} which are no longer supported."
)
df_locals = {}
if isinstance(df, list):
for i, dataframe in enumerate(df):
df_locals[f"df{i + 1}"] = dataframe
else:
df_locals["df"] = df
tools = [PythonAstREPLTool(locals=df_locals)] + list(extra_tools)
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
2024-01-30 17:39:46 +00:00
if include_df_in_prompt is not None and suffix is not None:
raise ValueError(
"If suffix is specified, include_df_in_prompt should not be."
)
prompt = _get_prompt(
df,
prefix=prefix,
suffix=suffix,
include_df_in_prompt=include_df_in_prompt,
number_of_head_rows=number_of_head_rows,
)
2024-01-30 17:39:46 +00:00
agent: Union[BaseSingleActionAgent, BaseMultiActionAgent] = RunnableAgent(
runnable=create_react_agent(llm, tools, prompt), # type: ignore
input_keys_arg=["input"],
return_keys_arg=["output"],
)
2024-01-30 17:39:46 +00:00
elif agent_type in (AgentType.OPENAI_FUNCTIONS, "openai-tools"):
prompt = _get_functions_prompt(
df,
prefix=prefix,
suffix=suffix,
include_df_in_prompt=include_df_in_prompt,
number_of_head_rows=number_of_head_rows,
)
2024-01-30 17:39:46 +00:00
if agent_type == AgentType.OPENAI_FUNCTIONS:
agent = RunnableAgent(
runnable=create_openai_functions_agent(llm, tools, prompt), # type: ignore
input_keys_arg=["input"],
return_keys_arg=["output"],
)
else:
agent = RunnableMultiActionAgent(
runnable=create_openai_tools_agent(llm, tools, prompt), # type: ignore
input_keys_arg=["input"],
return_keys_arg=["output"],
)
else:
2024-01-30 17:39:46 +00:00
raise ValueError(
f"Agent type {agent_type} not supported at the moment. Must be one of "
"'openai-tools', 'openai-functions', or 'zero-shot-react-description'."
)
return AgentExecutor(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
return_intermediate_steps=return_intermediate_steps,
max_iterations=max_iterations,
max_execution_time=max_execution_time,
early_stopping_method=early_stopping_method,
**(agent_executor_kwargs or {}),
)