From 568c4f0d81d8ac332ba31143c59bc5160f47abc9 Mon Sep 17 00:00:00 2001 From: Bhupendra Aole Date: Wed, 26 Apr 2023 19:05:53 -0400 Subject: [PATCH] Close dataframe column names are being treated as one by the LLM (#3611) We are sending sample dataframe to LLM with df.head(). If the column names are close by, LLM treats two columns names as one, returning incorrect results. ![image](https://user-images.githubusercontent.com/4707543/234678692-97851fa0-9e12-44db-92ec-9ad9f3545ae2.png) In the above case the LLM uses **Org Week** as the column name instead of **Week** if asked about a specific week. Returning head() as a markdown separates out the columns names and thus using correct column name. ![image](https://user-images.githubusercontent.com/4707543/234678945-c6d7b218-143e-4e70-9e17-77dc64841a49.png) --- langchain/agents/agent_toolkits/pandas/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/agents/agent_toolkits/pandas/base.py b/langchain/agents/agent_toolkits/pandas/base.py index 2d4b9fb5..e6a4326e 100644 --- a/langchain/agents/agent_toolkits/pandas/base.py +++ b/langchain/agents/agent_toolkits/pandas/base.py @@ -35,7 +35,7 @@ def create_pandas_dataframe_agent( prompt = ZeroShotAgent.create_prompt( tools, prefix=prefix, suffix=suffix, input_variables=input_variables ) - partial_prompt = prompt.partial(df=str(df.head())) + partial_prompt = prompt.partial(df=str(df.head().to_markdown())) llm_chain = LLMChain( llm=llm, prompt=partial_prompt,