Update marathon notebook (#3408)

Fixes #3404
This commit is contained in:
Zander Chase 2023-04-23 18:14:11 -07:00 committed by GitHub
parent b4de839ed8
commit 73bc70b4fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 199 additions and 262 deletions

View File

@ -13,7 +13,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"id": "ef972313-c05a-4c49-8fd1-03e599e21033", "id": "ef972313-c05a-4c49-8fd1-03e599e21033",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -26,7 +26,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "1cff42fd", "id": "1cff42fd",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -34,13 +34,13 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# General \n", "# General \n",
"import os\n",
"import pandas as pd\n", "import pandas as pd\n",
"from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT\n", "from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT\n",
"from langchain.chat_models import ChatOpenAI\n", "from langchain.chat_models import ChatOpenAI\n",
"\n", "\n",
"from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent\n", "from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent\n",
"from langchain.docstore.document import Document\n", "from langchain.docstore.document import Document\n",
"from langchain.chains import RetrievalQA\n",
"import asyncio\n", "import asyncio\n",
"import nest_asyncio\n", "import nest_asyncio\n",
"\n", "\n",
@ -51,14 +51,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"id": "01283ac7-1da0-41ba-8011-bd455d21dd82", "id": "01283ac7-1da0-41ba-8011-bd455d21dd82",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=1.0)" "llm = ChatOpenAI(model_name=\"gpt-4\", temperature=1.0)"
] ]
}, },
{ {
@ -68,7 +68,7 @@
"source": [ "source": [
"### Set up tools\n", "### Set up tools\n",
"\n", "\n",
"* We'll set up an AutoGPT with a `search` tool, and `write-file` tool, and a `read-file` tool, and a web browsing tool" "* We'll set up an AutoGPT with a `search` tool, and `write-file` tool, and a `read-file` tool, a web browsing tool, and a tool to interact with a CSV file via a python REPL"
] ]
}, },
{ {
@ -76,12 +76,12 @@
"id": "708a426f", "id": "708a426f",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Define any other `tools` you want to use here" "Define any other `tools` you want to use below:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"id": "cef4c150-0ef1-4a33-836b-01062fec134e", "id": "cef4c150-0ef1-4a33-836b-01062fec134e",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -89,86 +89,46 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Tools\n", "# Tools\n",
"import os\n",
"from contextlib import contextmanager\n",
"from typing import Optional\n", "from typing import Optional\n",
"from langchain.agents import tool\n", "from langchain.agents import tool\n",
"from langchain.tools.file_management.read import ReadFileTool\n", "from langchain.tools.file_management.read import ReadFileTool\n",
"from langchain.tools.file_management.write import WriteFileTool\n", "from langchain.tools.file_management.write import WriteFileTool\n",
"\n", "\n",
"@tool\n", "ROOT_DIR = \"./data/\"\n",
"def process_csv(csv_file_path: str, instructions: str, output_path: Optional[str] = None) -> str:\n", "\n",
" \"\"\"Process a CSV by with pandas in a limited REPL. Only use this after writing data to disk as a csv file. Any figures must be saved to disk to be viewed by the human. Instructions should be written in natural language, not code. Assume the dataframe is already loaded.\"\"\"\n", "@contextmanager\n",
"def pushd(new_dir):\n",
" \"\"\"Context manager for changing the current working directory.\"\"\"\n",
" prev_dir = os.getcwd()\n",
" os.chdir(new_dir)\n",
" try:\n", " try:\n",
" df = pd.read_csv(csv_file_path)\n", " yield\n",
" except Exception as e:\n", " finally:\n",
" return f\"Error: {e}\"\n", " os.chdir(prev_dir)\n",
" agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)\n",
" if output_path is not None:\n",
" instructions += f\" Save output to disk at {output_path}\"\n",
" try:\n",
" return agent.run(instructions)\n",
" except Exception as e:\n",
" return f\"Error: {e}\"\n"
]
},
{
"cell_type": "markdown",
"id": "51c07298-00e0-42d6-8aff-bd2e6bbd35a3",
"metadata": {},
"source": [
"**Web Search Tool**\n",
"\n", "\n",
"No need for API Tokens to use this tool, but it will require an optional dependency"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4afdedb2-f295-4ab8-9397-3640f5eeeed3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# !pip install duckduckgo_search"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "45f143de-e49e-4e27-88eb-ee44a4fdf933",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import json\n",
"from duckduckgo_search import ddg"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e2e799f4-86fb-4190-a298-4ae5c7b7a540",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"@tool\n", "@tool\n",
"def web_search(query: str, num_results: int = 8) -> str:\n", "def process_csv(\n",
" \"\"\"Useful for general internet search queries.\"\"\"\n", " csv_file_path: str, instructions: str, output_path: Optional[str] = None\n",
" search_results = []\n", ") -> str:\n",
" if not query:\n", " \"\"\"Process a CSV by with pandas in a limited REPL.\\\n",
" return json.dumps(search_results)\n", " Only use this after writing data to disk as a csv file.\\\n",
"\n", " Any figures must be saved to disk to be viewed by the human.\\\n",
" results = ddg(query, max_results=num_results)\n", " Instructions should be written in natural language, not code. Assume the dataframe is already loaded.\"\"\"\n",
" if not results:\n", " with pushd(ROOT_DIR):\n",
" return json.dumps(search_results)\n", " try:\n",
"\n", " df = pd.read_csv(csv_file_path)\n",
" for j in results:\n", " except Exception as e:\n",
" search_results.append(j)\n", " return f\"Error: {e}\"\n",
"\n", " agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)\n",
" return json.dumps(search_results, ensure_ascii=False, indent=4)" " if output_path is not None:\n",
" instructions += f\" Save output to disk at {output_path}\"\n",
" try:\n",
" result = agent.run(instructions)\n",
" return result\n",
" except Exception as e:\n",
" return f\"Error: {e}\""
] ]
}, },
{ {
@ -183,7 +143,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 5,
"id": "6bb5e47b-0f54-4faa-ae42-49a28fa5497b", "id": "6bb5e47b-0f54-4faa-ae42-49a28fa5497b",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -196,7 +156,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 6,
"id": "26b497d7-8e52-4c7f-8e7e-da0a48820a3c", "id": "26b497d7-8e52-4c7f-8e7e-da0a48820a3c",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -252,17 +212,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 7,
"id": "1842929d-f18d-4edc-9fdd-82c929181141", "id": "1842929d-f18d-4edc-9fdd-82c929181141",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.tools.base import BaseTool\n", "from langchain.tools import BaseTool, DuckDuckGoSearchTool\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"from langchain.document_loaders import WebBaseLoader\n",
"from pydantic import Field\n", "from pydantic import Field\n",
"from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain, BaseCombineDocumentsChain\n", "from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain, BaseCombineDocumentsChain\n",
"\n", "\n",
@ -302,7 +261,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 8,
"id": "e6f72bd0", "id": "e6f72bd0",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -324,7 +283,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 9,
"id": "1df7b724", "id": "1df7b724",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -356,7 +315,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 10,
"id": "1233caf3-fbc9-4acb-9faa-01008200633d",
"metadata": {},
"outputs": [],
"source": [
"# !pip install duckduckgo_search\n",
"web_search = DuckDuckGoSearchTool()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "88c8b184-67d7-4c35-84ae-9b14bef8c4e3", "id": "88c8b184-67d7-4c35-84ae-9b14bef8c4e3",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -365,8 +335,8 @@
"source": [ "source": [
"tools = [\n", "tools = [\n",
" web_search,\n", " web_search,\n",
" WriteFileTool(),\n", " WriteFileTool(root_dir=\"./data\"),\n",
" ReadFileTool(),\n", " ReadFileTool(root_dir=\"./data\"),\n",
" process_csv,\n", " process_csv,\n",
" query_website_tool,\n", " query_website_tool,\n",
" # HumanInputRun(), # Activate if you want the permit asking for help from the human\n", " # HumanInputRun(), # Activate if you want the permit asking for help from the human\n",
@ -375,7 +345,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 12,
"id": "709c08c2", "id": "709c08c2",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -398,248 +368,211 @@
"id": "fc9b51ba", "id": "fc9b51ba",
"metadata": {}, "metadata": {},
"source": [ "source": [
"### AutoGPT as a research / data munger \n", "### AutoGPT for Querying the Web\n",
"\n",
"#### `inflation` and `college tuition`\n",
" \n", " \n",
"Let's use AutoGPT as researcher and data munger / cleaner.\n",
" \n", " \n",
"I spent a lot of time over the years crawling data sources and cleaning data. \n", "I've spent a lot of time over the years crawling data sources and cleaning data. Let's see if AutoGPT can help with this!\n",
"\n", "\n",
"Let's see if AutoGPT can do all of this for us!\n", "Here is the prompt for looking up recent boston marathon times and converting them to tabular form."
"\n",
"Here is the prompt comparing `inflation` and `college tuition`."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 13,
"id": "64455d70-a134-4d11-826a-33e34c2ce287", "id": "64455d70-a134-4d11-826a-33e34c2ce287",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
]
},
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"{\n", "{\n",
" \"thoughts\": {\n", " \"thoughts\": {\n",
" \"text\": \"I need to find the winning Boston Marathon times for the past 5 years.\",\n", " \"text\": \"I need to find the winning Boston Marathon times for the past 5 years. I can use the DuckDuckGo Search command to search for this information.\",\n",
" \"reasoning\": \"I'll start by conducting a web search for the requested information.\",\n", " \"reasoning\": \"Using DuckDuckGo Search will help me gather information on the winning times without complications.\",\n",
" \"plan\": \"- Conduct a web search\\n- Query relevant webpage\\n- Generate table\\n- Save data to file\",\n", " \"plan\": \"- Use DuckDuckGo Search to find the winning Boston Marathon times\\n- Generate a table with the year, name, country of origin, and times\\n- Ensure there are no legal complications\",\n",
" \"criticism\": \"None\",\n", " \"criticism\": \"None\",\n",
" \"speak\": \"I will begin by conducting a web search to find the past 5 years' Boston Marathon winning times.\"\n", " \"speak\": \"I will use the DuckDuckGo Search command to find the winning Boston Marathon times for the past 5 years.\"\n",
" },\n", " },\n",
" \"command\": {\n", " \"command\": {\n",
" \"name\": \"web_search\",\n", " \"name\": \"DuckDuckGo Search\",\n",
" \"args\": {\n", " \"args\": {\n",
" \"query\": \"winning Boston Marathon times for the past 5 years\"\n", " \"query\": \"winning Boston Marathon times for the past 5 years ending in 2022\"\n",
" }\n", " }\n",
" }\n", " }\n",
"}\n" "}\n",
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n", "{\n",
" \"thoughts\": {\n", " \"thoughts\": {\n",
" \"text\": \"I found several relevant search results, and I will use the query_webpage command on a specific URL to gather the information.\",\n", " \"text\": \"The DuckDuckGo Search command did not provide the specific information I need. I must switch my approach and use query_webpage command to browse a webpage containing the Boston Marathon winning times for the past 5 years.\",\n",
" \"reasoning\": \"The Boston Athletic Association's official website (www.baa.org) is likely the most accurate source.\",\n", " \"reasoning\": \"The query_webpage command may give me more accurate and comprehensive results compared to the search command.\",\n",
" \"plan\": \"- Query the Boston Athletic Association webpage\\n- Filter and parse the data\\n- Generate table and save to file\",\n", " \"plan\": \"- Use query_webpage command to find the winning Boston Marathon times\\n- Generate a table with the year, name, country of origin, and times\\n- Ensure there are no legal complications\",\n",
" \"criticism\": \"None\",\n", " \"criticism\": \"I may face difficulty in finding the right webpage with the desired information.\",\n",
" \"speak\": \"I will now query the Boston Athletic Association webpage to retrieve the information on the past 5 years' winning times.\"\n", " \"speak\": \"I will use the query_webpage command to find the winning Boston Marathon times for the past 5 years.\"\n",
" },\n",
" \"command\": {\n",
" \"name\": \"DuckDuckGo Search\",\n",
" \"args\": {\n",
" \"query\": \"site with winning Boston Marathon times for the past 5 years ending in 2022\"\n",
" }\n",
" }\n",
"}\n",
"{\n",
" \"thoughts\": {\n",
" \"text\": \"I need to use the query_webpage command to find the information about the winning Boston Marathon times for the past 5 years.\",\n",
" \"reasoning\": \"The previous DuckDuckGo Search command did not provide specific enough results. The query_webpage command might give more accurate and comprehensive results.\",\n",
" \"plan\": \"- Use query_webpage command to find the winning Boston Marathon times\\\\n- Generate a table with the year, name, country of origin, and times\\\\n- Ensure there are no legal complications\",\n",
" \"criticism\": \"I may face difficulty in finding the right webpage with the desired information.\",\n",
" \"speak\": \"I will use the query_webpage command to find the winning Boston Marathon times for the past 5 years.\"\n",
" },\n", " },\n",
" \"command\": {\n", " \"command\": {\n",
" \"name\": \"query_webpage\",\n", " \"name\": \"query_webpage\",\n",
" \"args\": {\n", " \"args\": {\n",
" \"url\": \"https://www.baa.org/races/boston-marathon/results/champions\",\n", " \"url\": \"https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\",\n",
" \"question\": \"winning times of the Boston Marathon for the past 5 years\"\n", " \"question\": \"What were the winning Boston Marathon times for the past 5 years ending in 2022?\"\n",
" }\n", " }\n",
" }\n", " }\n",
"}\n" "}\n",
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n", "{\n",
" \"thoughts\": {\n", " \"thoughts\": {\n",
" \"text\": \"I have the winning times of the Boston Marathon for the past 5 years. I need to create a table with the names, countries of origin, and times.\",\n", " \"text\": \"I have already found the winning Boston Marathon times for the past 5 years. Now, I need to generate a table with the information.\",\n",
" \"reasoning\": \"I can use the information I've retrieved to generate a CSV file, then process the CSV file to create the table.\",\n", " \"reasoning\": \"Using the information I already have, I can create a table containing year, name, country of origin, and times.\",\n",
" \"plan\": \"- Save data to a CSV file\\n- Process CSV file to generate table\",\n", " \"plan\": \"- Write the marathon data to a CSV file\\n- Process the CSV file to display the table\",\n",
" \"criticism\": \"None\",\n", " \"criticism\": \"None\",\n",
" \"speak\": \"I will save the Boston Marathon data to a CSV file and then process it to generate a table.\"\n", " \"speak\": \"I will generate a table with the year, name, country of origin, and times for the winning Boston Marathon times for the past 5 years.\"\n",
" },\n",
" \"command\": {\n",
" \"name\": \"write_file\",\n",
" \"args\": {\n",
" \"file_path\": \"boston_marathon_data.csv\",\n",
" \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,Kenya,2:06:51\\n2021,Benson Kipruto,Kenya,2:09:51\\n2019,Lawrence Cherono,Kenya,2:07:57\\n2018,Yuki Kawauchi,Japan,2:15:58\\n2017,Geoffrey Kirui,Kenya,2:09:37\"\n",
" }\n",
" }\n",
"}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"thoughts\": {\n",
" \"text\": \"I have the winning times for the Boston Marathon for the past 5 years. Now, I need to turn it into a table, save it as a CSV, and process it.\",\n",
" \"reasoning\": \"Saving data as a CSV will allow for a clean table format and output.\",\n",
" \"plan\": \"- Format the data as a CSV\\n- Write the CSV to disk\\n- Process the CSV and generate a table\",\n",
" \"criticism\": \"None\",\n",
" \"speak\": \"I will now format the winning times data as a CSV, save it, and process it to generate a table.\"\n",
" },\n", " },\n",
" \"command\": {\n", " \"command\": {\n",
" \"name\": \"write_file\",\n", " \"name\": \"write_file\",\n",
" \"args\": {\n", " \"args\": {\n",
" \"file_path\": \"boston_marathon_winners.csv\",\n", " \"file_path\": \"boston_marathon_winners.csv\",\n",
" \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,Kenya,2:06:51\\n2021,Benson Kipruto,Kenya,2:09:51\\n2019,Lawrence Cherono,Kenya,2:07:57\\n2018,Yuki Kawauchi,Japan,2:15:58\\n2017,Geoffrey Kirui,Kenya,2:09:37\"\n", " \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,KEN,2:06:51\\n2021,Benson Kipruto,KEN,2:09:51\\n2019,Lawrence Cherono,KEN,2:07:57\\n2018,Yuki Kawauchi,JPN,2:15:58\"\n",
" }\n", " }\n",
" }\n", " }\n",
"}\n" "}\n",
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n", "{\n",
" \"thoughts\": {\n", " \"thoughts\": {\n",
" \"text\": \"I have the winning times for the past 5 years, now I need to organize the information into a table and save it as a file.\",\n", " \"text\": \"I have retrieved the winning Boston Marathon times for the past 5 years. Now, I need to generate a table with the year, name, country of origin, and times.\",\n",
" \"reasoning\": \"Storing the information in a table file format will make it easier to access and present the data.\",\n", " \"reasoning\": \"Creating a table will help organize the data in a clear and accessible format.\",\n",
" \"plan\": \"- Save data in a CSV file\\n- Generate a table\",\n", " \"plan\": \"- Write the data to a CSV file\\n- Process the CSV file to generate the table\\n- Complete the task\",\n",
" \"criticism\": \"None\",\n", " \"criticism\": \"None\",\n",
" \"speak\": \"I will save the winning marathon times in a table format as a CSV file and generate a table.\"\n", " \"speak\": \"I will generate a table with the year, name, country of origin, and winning times using the recently retrieved data.\"\n",
" },\n", " },\n",
" \"command\": {\n", " \"command\": {\n",
" \"name\": \"write_file\",\n", " \"name\": \"write_file\",\n",
" \"args\": {\n", " \"args\": {\n",
" \"file_path\": \"boston_marathon_winners.csv\",\n", " \"file_path\": \"winning_boston_marathon_data.csv\",\n",
" \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,Kenya,2:06:51\\n2021,Benson Kipruto,Kenya,2:09:51\\n2019,Lawrence Cherono,Kenya,2:07:57\\n2018,Yuki Kawauchi,Japan,2:15:58\\n2017,Geoffrey Kirui,Kenya,2:09:37\"\n", " \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,KEN,2:06:51\\n2021,Benson Kipruto,KEN,2:09:51\\n2019,Lawrence Cherono,KEN,2:07:57\\n2018,Yuki Kawauchi,JPN,2:15:58\\n\"\n",
" }\n", " }\n",
" }\n", " }\n",
"}\n" "}\n",
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n", "{\n",
" \"thoughts\": {\n", " \"thoughts\": {\n",
" \"text\": \"I previously found the winning times of the Boston Marathon for the past 5 years and saved it as a CSV file. Now, I need to process the CSV to create a table with the names, countries of origin, and times.\",\n", " \"text\": \"I have found the winning Boston Marathon times for the past five years ending in 2022. Next, I need to create a table with the year, name, country of origin, and times.\",\n",
" \"reasoning\": \"Processing the CSV will help generate a clean table to provide the answer.\",\n", " \"reasoning\": \"Generating a table will help organize the information in a structured format.\",\n",
" \"plan\": \"- Process the CSV file\\n- Generate a table with the required columns\",\n", " \"plan\": \"- Create a table with the year, name, country of origin, and times\\n- Ensure there are no legal complications\",\n",
" \"criticism\": \"None\",\n", " \"criticism\": \"None\",\n",
" \"speak\": \"I will process the Boston Marathon winners' CSV file to create a table with the names, countries of origin, and times.\"\n", " \"speak\": \"I will generate a table with the winning Boston Marathon times for the past 5 years ending in 2022.\"\n",
" },\n",
" \"command\": {\n",
" \"name\": \"write_file\",\n",
" \"args\": {\n",
" \"file_path\": \"winning_times.csv\",\n",
" \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,Kenya,2:06:51\\n2021,Benson Kipruto,Kenya,2:09:51\\n2020,Canceled due to COVID-19 pandemic,,\\n2019,Lawrence Cherono,Kenya,2:07:57\\n2018,Yuki Kawauchi,Japan,2:15:58\"\n",
" }\n",
" }\n",
"}\n",
"{\n",
" \"thoughts\": {\n",
" \"text\": \"I need to process the CSV file to generate the table with the year, name, country of origin, and winning times.\",\n",
" \"reasoning\": \"I have already written the data to a file named 'winning_times.csv'. Now, I need to process this CSV file to properly display the data as a table.\",\n",
" \"plan\": \"- Use the process_csv command to read the 'winning_times.csv' file and generate the table\",\n",
" \"criticism\": \"None\",\n",
" \"speak\": \"I will process the 'winning_times.csv' file to display the table with the winning Boston Marathon times for the past 5 years.\"\n",
" },\n", " },\n",
" \"command\": {\n", " \"command\": {\n",
" \"name\": \"process_csv\",\n", " \"name\": \"process_csv\",\n",
" \"args\": {\n", " \"args\": {\n",
" \"csv_file_path\": \"boston_marathon_winners.csv\",\n", " \"csv_file_path\": \"winning_times.csv\",\n",
" \"instructions\": \"Generate a table with columns Year, Name, Country, and Time.\"\n", " \"instructions\": \"Read the CSV file and display the data as a table\"\n",
" }\n", " }\n",
" }\n", " }\n",
"}\n", "}\n",
"\n", "\n",
"\n", "\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to convert the provided dataframe to a string in Markdown table format.\n", "\u001b[32;1m\u001b[1;3mThought: The CSV file has already been read and saved into a pandas dataframe called `df`. Hence, I can simply display the data by printing the whole dataframe. Since `df.head()` returns the first 5 rows, I can use that to showcase the contents.\n",
"\n",
"Action: python_repl_ast\n", "Action: python_repl_ast\n",
"Action Input: df.to_markdown(index=False)\u001b[0m\n", "Action Input: print(df.head())\u001b[0m Year Name Country Time\n",
"Observation: \u001b[36;1m\u001b[1;3m| Year | Name | Country | Time |\n", "0 2022 Evans Chebet Kenya 2:06:51\n",
"|-------:|:-----------------|:----------|:--------|\n", "1 2021 Benson Kipruto Kenya 2:09:51\n",
"| 2022 | Evans Chebet | Kenya | 2:06:51 |\n", "2 2020 Canceled due to COVID-19 pandemic NaN NaN\n",
"| 2021 | Benson Kipruto | Kenya | 2:09:51 |\n", "3 2019 Lawrence Cherono Kenya 2:07:57\n",
"| 2019 | Lawrence Cherono | Kenya | 2:07:57 |\n", "4 2018 Yuki Kawauchi Japan 2:15:58\n",
"| 2018 | Yuki Kawauchi | Japan | 2:15:58 |\n", "\n",
"| 2017 | Geoffrey Kirui | Kenya | 2:09:37 |\u001b[0m\n", "Observation: \u001b[36;1m\u001b[1;3mNone\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n", "Thought:\u001b[32;1m\u001b[1;3mI used the wrong tool to perform the action. I should have used the given data and not interacted with the Python shell. I can now provide the displayed data as the answer since the information in the printed dataframe would look like a table when typed as text.\n",
"\n",
"Final Answer: \n", "Final Answer: \n",
" Year Name Country Time\n",
"0 2022 Evans Chebet Kenya 2:06:51\n",
"1 2021 Benson Kipruto Kenya 2:09:51\n",
"2 2020 Canceled due to COVID-19 pandemic NaN NaN\n",
"3 2019 Lawrence Cherono Kenya 2:07:57\n",
"4 2018 Yuki Kawauchi Japan 2:15:58\u001b[0m\n",
"\n", "\n",
"| Year | Name | Country | Time |\n", "\u001b[1m> Finished chain.\u001b[0m\n",
"|-------:|:-----------------|:----------|:--------|\n",
"| 2022 | Evans Chebet | Kenya | 2:06:51 |\n",
"| 2021 | Benson Kipruto | Kenya | 2:09:51 |\n",
"| 2019 | Lawrence Cherono | Kenya | 2:07:57 |\n",
"| 2018 | Yuki Kawauchi | Japan | 2:15:58 |\n",
"| 2017 | Geoffrey Kirui | Kenya | 2:09:37 |\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n", "{\n",
" \"thoughts\": {\n", " \"thoughts\": {\n",
" \"text\": \"I have already retrieved the winning times for the past 5 years of the Boston Marathon and created a table. My objective is complete.\",\n", " \"text\": \"I already have the winning Boston Marathon times for the past 5 years saved in the file 'winning_times.csv'. Now, I need to process the CSV and display the table.\",\n",
" \"reasoning\": \"I have completed my task on this topic, so I don't need to use any other commands.\",\n", " \"reasoning\": \"I am choosing the process_csv command because I already have the required data saved as a CSV file, and I can use this command to read and display the data as a table.\",\n",
" \"plan\": \"- Inform the user that the task is complete\",\n", " \"plan\": \"- Use the process_csv command to read the 'winning_times.csv' file and generate the table\",\n",
" \"criticism\": \"None\",\n", " \"criticism\": \"None\",\n",
" \"speak\": \"I have found the winning times for the past 5 years of the Boston Marathon and created a table. My task is complete.\"\n", " \"speak\": \"I will process the 'winning_times.csv' file to display the table with the winning Boston Marathon times for the past 5 years.\"\n",
" },\n",
" \"command\": {\n",
" \"name\": \"process_csv\",\n",
" \"args\": {\n",
" \"csv_file_path\": \"winning_times.csv\",\n",
" \"instructions\": \"Read the CSV file and display the data as a table\"\n",
" }\n",
" }\n",
"}\n",
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: Since the data is already loaded in a pandas dataframe, I just need to display the top rows of the dataframe.\n",
"Action: python_repl_ast\n",
"Action Input: df.head()\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m Year Name Country Time\n",
"0 2022 Evans Chebet Kenya 2:06:51\n",
"1 2021 Benson Kipruto Kenya 2:09:51\n",
"2 2020 Canceled due to COVID-19 pandemic NaN NaN\n",
"3 2019 Lawrence Cherono Kenya 2:07:57\n",
"4 2018 Yuki Kawauchi Japan 2:15:58\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n",
"Final Answer: \n",
" Year Name Country Time\n",
"0 2022 Evans Chebet Kenya 2:06:51\n",
"1 2021 Benson Kipruto Kenya 2:09:51\n",
"2 2020 Canceled due to COVID-19 pandemic NaN NaN\n",
"3 2019 Lawrence Cherono Kenya 2:07:57\n",
"4 2018 Yuki Kawauchi Japan 2:15:58\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"{\n",
" \"thoughts\": {\n",
" \"text\": \"I have already generated a table with the winning Boston Marathon times for the past 5 years. Now, I can finish the task.\",\n",
" \"reasoning\": \"I have completed the required actions and obtained the desired data. The task is complete.\",\n",
" \"plan\": \"- Use the finish command\",\n",
" \"criticism\": \"None\",\n",
" \"speak\": \"I have generated the table with the winning Boston Marathon times for the past 5 years. Task complete.\"\n",
" },\n", " },\n",
" \"command\": {\n", " \"command\": {\n",
" \"name\": \"finish\",\n", " \"name\": \"finish\",\n",
" \"args\": {\n", " \"args\": {\n",
" \"response\": \"I have found the winning times for the past 5 years of the Boston Marathon and created a table. My task is complete.\"\n", " \"response\": \"I have generated the table with the winning Boston Marathon times for the past 5 years. Task complete.\"\n",
" }\n", " }\n",
" }\n", " }\n",
"}\n" "}\n"
@ -648,16 +581,16 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"'I have found the winning times for the past 5 years of the Boston Marathon and created a table. My task is complete.'" "'I have generated the table with the winning Boston Marathon times for the past 5 years. Task complete.'"
] ]
}, },
"execution_count": 14, "execution_count": 13,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"agent.run([\"What were the winning boston marathon times for the past 5 years? Generate a table of the names, countries of origin, and times.\"])" "agent.run([\"What were the winning boston marathon times for the past 5 years (ending in 2022)? Generate a table of the year, name, country of origin, and times.\"])"
] ]
}, },
{ {

View File

@ -1 +1,5 @@
"""DuckDuckGo Search API toolkit.""" """DuckDuckGo Search API toolkit."""
from langchain.tools.ddg_search.tool import DuckDuckGoSearchTool
__all__ = ["DuckDuckGoSearchTool"]