langchain/templates/csv-agent/csv_agent/agent.py

from pathlib import Path

import pandas as pd
from langchain.agents import AgentExecutor, OpenAIFunctionsAgent
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.tools.retriever import create_retriever_tool
from langchain.vectorstores import FAISS
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_experimental.tools import PythonAstREPLTool

MAIN_DIR = Path(__file__).parents[1]

pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", 20)

embedding_model = OpenAIEmbeddings()
vectorstore = FAISS.load_local(MAIN_DIR / "titanic_data", embedding_model)
retriever_tool = create_retriever_tool(
    vectorstore.as_retriever(), "person_name_search", "Search for a person by name"
)


TEMPLATE = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
It is important to understand the attributes of the dataframe before working with it. This is the result of running `df.head().to_markdown()`

<df>
{dhead}
</df>

You are not meant to use only these rows to answer questions - they are meant as a way of telling you about the shape and schema of the dataframe.
You also do not have use only the information here to answer questions - you can run intermediate queries to do exporatory data analysis to give you more information as needed.

You have a tool called `person_name_search` through which you can lookup a person by name and find the records corresponding to people with similar name as the query.
You should only really use this if your search term contains a persons name. Otherwise, try to solve it with code.

For example:

<question>How old is Jane?</question>
<logic>Use `person_name_search` since you can use the query `Jane`</logic>

<question>Who has id 320</question>
<logic>Use `python_repl` since even though the question is about a person, you don't know their name so you can't include it.</logic>
"""  # noqa: E501


class PythonInputs(BaseModel):
    query: str = Field(description="code snippet to run")


df = pd.read_csv(MAIN_DIR / "titanic.csv")
template = TEMPLATE.format(dhead=df.head().to_markdown())

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", template),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
        ("human", "{input}"),
    ]
)

repl = PythonAstREPLTool(
    locals={"df": df},
    name="python_repl",
    description="Runs code and returns the output of the final line",
    args_schema=PythonInputs,
)
tools = [repl, retriever_tool]
agent = OpenAIFunctionsAgent(
    llm=ChatOpenAI(temperature=0, model="gpt-4"), prompt=prompt, tools=tools
)
agent_executor = AgentExecutor(
    agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate"
) | (lambda x: x["output"])

# Typing for playground inputs


class AgentInputs(BaseModel):
    input: str


agent_executor = agent_executor.with_types(input_type=AgentInputs)
Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`from pathlib import Path`

Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00			`import pandas as pd`
Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`from langchain.agents import AgentExecutor, OpenAIFunctionsAgent`
Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00			`from langchain.chat_models import ChatOpenAI`
			`from langchain.embeddings import OpenAIEmbeddings`
Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder`
Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00			`from langchain.tools.retriever import create_retriever_tool`
Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`from langchain.vectorstores import FAISS`
docs[patch], templates[patch]: Import from core (#14575) Update imports to use core for the low-hanging fruit changes. Ran following ```bash git grep -l 'langchain.schema.runnable' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g' git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g' git grep -l 'langchain.schema.messages' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g' git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g' git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g' git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g' git grep -l 'from langchain.tools.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g' git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g' git grep -l 'from langchain.llms.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g' git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g' git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g' git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g' git grep -l 'from langchain.schema.output' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g' git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g' git grep -l 'from langchain.schema.document' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g' git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g' git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g' git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g' ``` 2023-12-12 00:49:10 +00:00			`from langchain_core.pydantic_v1 import BaseModel, Field`
Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`from langchain_experimental.tools import PythonAstREPLTool`
Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00
			`MAIN_DIR = Path(__file__).parents[1]`

Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`pd.set_option("display.max_rows", 20)`
			`pd.set_option("display.max_columns", 20)`
Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00
			`embedding_model = OpenAIEmbeddings()`
			`vectorstore = FAISS.load_local(MAIN_DIR / "titanic_data", embedding_model)`
Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`retriever_tool = create_retriever_tool(`
			`vectorstore.as_retriever(), "person_name_search", "Search for a person by name"`
			`)`
Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00

			TEMPLATE = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
			It is important to understand the attributes of the dataframe before working with it. This is the result of running `df.head().to_markdown()`

			`<df>`
			`{dhead}`
			`</df>`

			`You are not meant to use only these rows to answer questions - they are meant as a way of telling you about the shape and schema of the dataframe.`
			`You also do not have use only the information here to answer questions - you can run intermediate queries to do exporatory data analysis to give you more information as needed.`

			You have a tool called `person_name_search` through which you can lookup a person by name and find the records corresponding to people with similar name as the query.
			`You should only really use this if your search term contains a persons name. Otherwise, try to solve it with code.`

			`For example:`

			`<question>How old is Jane?</question>`
			<logic>Use `person_name_search` since you can use the query `Jane`</logic>

			`<question>Who has id 320</question>`
			<logic>Use `python_repl` since even though the question is about a person, you don't know their name so you can't include it.</logic>
Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`""" # noqa: E501`
Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00

			`class PythonInputs(BaseModel):`
			`query: str = Field(description="code snippet to run")`


various templates improvements (#12500) 2023-10-29 05:13:22 +00:00			`df = pd.read_csv(MAIN_DIR / "titanic.csv")`
Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00			`template = TEMPLATE.format(dhead=df.head().to_markdown())`

Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`prompt = ChatPromptTemplate.from_messages(`
			`[`
			`("system", template),`
			`MessagesPlaceholder(variable_name="agent_scratchpad"),`
			`("human", "{input}"),`
			`]`
			`)`

			`repl = PythonAstREPLTool(`
			`locals={"df": df},`
			`name="python_repl",`
			`description="Runs code and returns the output of the final line",`
			`args_schema=PythonInputs,`
			`)`
Templates (#12294) Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com> 2023-10-26 01:47:42 +00:00			`tools = [repl, retriever_tool]`
Format Templates (#12396) 2023-10-27 02:44:30 +00:00			`agent = OpenAIFunctionsAgent(`
			`llm=ChatOpenAI(temperature=0, model="gpt-4"), prompt=prompt, tools=tools`
			`)`
			`agent_executor = AgentExecutor(`
			`agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate"`
notebook fmt (#12498) 2023-10-29 22:50:09 +00:00			`) \| (lambda x: x["output"])`
various templates improvements (#12500) 2023-10-29 05:13:22 +00:00
			`# Typing for playground inputs`


			`class AgentInputs(BaseModel):`
			`input: str`


			`agent_executor = agent_executor.with_types(input_type=AgentInputs)`