diff --git a/docs/modules/chains/examples/baby_agi.ipynb b/docs/modules/chains/examples/baby_agi.ipynb new file mode 100644 index 00000000..db4384c2 --- /dev/null +++ b/docs/modules/chains/examples/baby_agi.ipynb @@ -0,0 +1,596 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "517a9fd4", + "metadata": {}, + "source": [ + "# BabyAGI User Guide\n", + "\n", + "This notebook demonstrates how to implement [BabyAGI](https://github.com/yoheinakajima/babyagi/tree/main) by [Yohei Nakajima](https://twitter.com/yoheinakajima). BabyAGI is an AI agent that can generate and pretend to execute tasks based on a given objective.\n", + "\n", + "This guide will help you understand the components to create your own recursive agents.\n" + ] + }, + { + "cell_type": "markdown", + "id": "556af556", + "metadata": {}, + "source": [ + "## Install and Import Required Modules" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "33a0c80a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install pinecone-client > /dev/null" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c8a354b6", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages/pinecone/index.py:4: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", + " from tqdm.autonotebook import tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "from collections import deque\n", + "from typing import Dict, List, Optional\n", + "\n", + "import pinecone\n", + "from langchain import LLMChain, OpenAI, PromptTemplate\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.llms import BaseLLM\n", + "from langchain.vectorstores import Pinecone\n", + "from langchain.vectorstores.base import VectorStore\n", + "from pydantic import BaseModel, Field\n" + ] + }, + { + "cell_type": "markdown", + "id": "09f70772", + "metadata": {}, + "source": [ + "## Connect to the Vector Index\n", + "\n", + "Define the required environment, and connect to the vector index.\n", + "\n", + "See [Pinecone](https://app.pinecone.io/) to create your API key." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "794045d4", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the parameters for this run.\n", + "index_name = \"langchain-baby-agi\"\n", + "OBJECTIVE = \"Interstellar Travel\" # Ultimate goal\n", + "YOUR_FIRST_TASK = \"brush my teeth\" # Where to start" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "802b797e", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI() # The underlying LLM\n", + "\n", + "# Get Pinecone environment\n", + "pinecone.init(\n", + " api_key=os.environ[\"PINECONE_API_KEY\"], # find at app.pinecone.io\n", + " environment=os.environ[\"PINECONE_ENVIRONMENT\"] # next to api key in console\n", + ")\n", + "\n", + "# Define your embedding model\n", + "embeddings_model = OpenAIEmbeddings()\n", + "\n", + "# Create Pinecone index if it doesn't exist\n", + "dimension = 1536 # dimension of the embedding space\n", + "metric = \"cosine\" # distance function\n", + "pod_type = \"p1\" # performance-optimized\n", + "if index_name not in pinecone.list_indexes():\n", + " pinecone.create_index(index_name, dimension=dimension, metric=metric, pod_type=pod_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "30e8ecc4", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Fetch the index\n", + "text_key = \"page_content\"\n", + "if index_name in pinecone.list_indexes():\n", + " index = pinecone.Index(index_name)\n", + "else:\n", + " raise ValueError(f\"Index '{index_name}' not found in your Pinecone project.\")\n", + "\n", + "vectorstore = Pinecone(index, embeddings_model.embed_query, text_key)" + ] + }, + { + "cell_type": "markdown", + "id": "0f3b72bf", + "metadata": {}, + "source": [ + "## Define the Chains\n", + "\n", + "BabyAGI relies on three LLM chains:\n", + "- Task creation chain to select new tasks to add to the list\n", + "- Task prioritization chain to re-prioritize tasks\n", + "- Execution Chain to execute the tasks" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bf4bd5cd", + "metadata": {}, + "outputs": [], + "source": [ + "class TaskCreationChain(LLMChain):\n", + " \"\"\"Chain to generates tasks.\"\"\"\n", + "\n", + " @classmethod\n", + " def from_llm(cls, llm: BaseLLM, objective: str, verbose: bool = True) -> LLMChain:\n", + " \"\"\"Get the response parser.\"\"\"\n", + " task_creation_template = (\n", + " \"You are an task creation AI that uses the result of an execution agent\"\n", + " \" to create new tasks with the following objective: {objective},\"\n", + " \" The last completed task has the result: {result}.\"\n", + " \" This result was based on this task description: {task_description}.\"\n", + " \" These are incomplete tasks: {incomplete_tasks}.\"\n", + " \" Based on the result, create new tasks to be completed\"\n", + " \" by the AI system that do not overlap with incomplete tasks.\"\n", + " \" Return the tasks as an array.\"\n", + " )\n", + " prompt = PromptTemplate(\n", + " template=task_creation_template,\n", + " partial_variables={\"objective\": objective},\n", + " input_variables=[\"result\", \"task_description\", \"incomplete_tasks\"],\n", + " )\n", + " return cls(prompt=prompt, llm=llm, verbose=verbose)\n", + " \n", + " def get_next_task(self, result: Dict, task_description: str, task_list: List[str]) -> List[Dict]:\n", + " \"\"\"Get the next task.\"\"\"\n", + " incomplete_tasks = \", \".join(task_list)\n", + " response = self.run(result=result, task_description=task_description, incomplete_tasks=incomplete_tasks)\n", + " new_tasks = response.split('\\n')\n", + " return [{\"task_name\": task_name} for task_name in new_tasks if task_name.strip()]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b6488ffe", + "metadata": {}, + "outputs": [], + "source": [ + "class TaskPrioritizationChain(LLMChain):\n", + " \"\"\"Chain to prioritize tasks.\"\"\"\n", + "\n", + " @classmethod\n", + " def from_llm(cls, llm: BaseLLM, objective: str, verbose: bool = True) -> LLMChain:\n", + " \"\"\"Get the response parser.\"\"\"\n", + " task_prioritization_template = (\n", + " \"You are an task prioritization AI tasked with cleaning the formatting of and reprioritizing\"\n", + " \" the following tasks: {task_names}.\"\n", + " \" Consider the ultimate objective of your team: {objective}.\"\n", + " \" Do not remove any tasks. Return the result as a numbered list, like:\"\n", + " \" #. First task\"\n", + " \" #. Second task\"\n", + " \" Start the task list with number {next_task_id}.\"\n", + " )\n", + " prompt = PromptTemplate(\n", + " template=task_prioritization_template,\n", + " partial_variables={\"objective\": objective},\n", + " input_variables=[\"task_names\", \"next_task_id\"],\n", + " )\n", + " return cls(prompt=prompt, llm=llm, verbose=verbose)\n", + "\n", + " def prioritize_tasks(self, this_task_id: int, task_list: List[Dict]) -> List[Dict]:\n", + " \"\"\"Prioritize tasks.\"\"\"\n", + " task_names = [t[\"task_name\"] for t in task_list]\n", + " next_task_id = int(this_task_id) + 1\n", + " response = self.run(task_names=task_names, next_task_id=next_task_id)\n", + " new_tasks = response.split('\\n')\n", + " prioritized_task_list = []\n", + " for task_string in new_tasks:\n", + " if not task_string.strip():\n", + " continue\n", + " task_parts = task_string.strip().split(\".\", 1)\n", + " if len(task_parts) == 2:\n", + " task_id = task_parts[0].strip()\n", + " task_name = task_parts[1].strip()\n", + " prioritized_task_list.append({\"task_id\": task_id, \"task_name\": task_name})\n", + " return prioritized_task_list\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b43cd580", + "metadata": {}, + "outputs": [], + "source": [ + "class ExecutionChain(LLMChain):\n", + " \"\"\"Chain to execute tasks.\"\"\"\n", + " \n", + " vectorstore: VectorStore = Field(init=False)\n", + "\n", + " @classmethod\n", + " def from_llm(cls, llm: BaseLLM, vectorstore: VectorStore, verbose: bool = True) -> LLMChain:\n", + " \"\"\"Get the response parser.\"\"\"\n", + " execution_template = (\n", + " \"You are an AI who performs one task based on the following objective: {objective}.\"\n", + " \" Take into account these previously completed tasks: {context}.\"\n", + " \" Your task: {task}.\"\n", + " \" Response:\"\n", + " )\n", + " prompt = PromptTemplate(\n", + " template=execution_template,\n", + " input_variables=[\"objective\", \"context\", \"task\"],\n", + " )\n", + " return cls(prompt=prompt, llm=llm, verbose=verbose, vectorstore=vectorstore)\n", + " \n", + " def _get_top_tasks(self, query: str, k: int) -> List[str]:\n", + " \"\"\"Get the top k tasks based on the query.\"\"\"\n", + " results = self.vectorstore.similarity_search_with_score(query, k=k)\n", + " if not results:\n", + " return []\n", + " sorted_results, _ = zip(*sorted(results, key=lambda x: x[1], reverse=True))\n", + " return [str(item.metadata['task']) for item in sorted_results]\n", + " \n", + " def execute_task(self, objective: str, task: str, k: int = 5) -> str:\n", + " \"\"\"Execute a task.\"\"\"\n", + " context = self._get_top_tasks(query=objective, k=k)\n", + " return self.run(objective=objective, context=context, task=task)\n" + ] + }, + { + "cell_type": "markdown", + "id": "3ad996c5", + "metadata": {}, + "source": [ + "### Define the BabyAGI Controller\n", + "\n", + "BabyAGI composes the chains defined above in a (potentially-)infinite loop." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1e978938", + "metadata": {}, + "outputs": [], + "source": [ + "class BabyAGI(BaseModel):\n", + " \"\"\"Controller model for the BabyAGI agent.\"\"\"\n", + "\n", + " objective: str = Field(alias=\"objective\")\n", + " task_list: deque = Field(default_factory=deque)\n", + " task_creation_chain: TaskCreationChain = Field(...)\n", + " task_prioritization_chain: TaskPrioritizationChain = Field(...)\n", + " execution_chain: ExecutionChain = Field(...)\n", + " task_id_counter: int = Field(1)\n", + "\n", + " def add_task(self, task: Dict):\n", + " self.task_list.append(task)\n", + "\n", + " def print_task_list(self):\n", + " print(\"\\033[95m\\033[1m\" + \"\\n*****TASK LIST*****\\n\" + \"\\033[0m\\033[0m\")\n", + " for t in self.task_list:\n", + " print(str(t[\"task_id\"]) + \": \" + t[\"task_name\"])\n", + "\n", + " def print_next_task(self, task: Dict):\n", + " print(\"\\033[92m\\033[1m\" + \"\\n*****NEXT TASK*****\\n\" + \"\\033[0m\\033[0m\")\n", + " print(str(task[\"task_id\"]) + \": \" + task[\"task_name\"])\n", + "\n", + " def print_task_result(self, result: str):\n", + " print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n", + " print(result)\n", + "\n", + " def run(self, max_iterations: Optional[int] = None):\n", + " \"\"\"Run the agent.\"\"\"\n", + " num_iters = 0\n", + " while True:\n", + " if self.task_list:\n", + " self.print_task_list()\n", + "\n", + " # Step 1: Pull the first task\n", + " task = self.task_list.popleft()\n", + " self.print_next_task(task)\n", + "\n", + " # Step 2: Execute the task\n", + " result = self.execution_chain.execute_task(\n", + " self.objective, task[\"task_name\"]\n", + " )\n", + " this_task_id = int(task[\"task_id\"])\n", + " self.print_task_result(result)\n", + "\n", + " # Step 3: Store the result in Pinecone\n", + " result_id = f\"result_{task['task_id']}\"\n", + " self.execution_chain.vectorstore.add_texts(\n", + " texts=[result],\n", + " metadatas=[{\"task\": task[\"task_name\"]}],\n", + " ids=[result_id],\n", + " )\n", + "\n", + " # Step 4: Create new tasks and reprioritize task list\n", + " new_tasks = self.task_creation_chain.get_next_task(\n", + " result, task[\"task_name\"], [t[\"task_name\"] for t in self.task_list]\n", + " )\n", + " for new_task in new_tasks:\n", + " self.task_id_counter += 1\n", + " new_task.update({\"task_id\": self.task_id_counter})\n", + " self.add_task(new_task)\n", + " self.task_list = deque(\n", + " self.task_prioritization_chain.prioritize_tasks(\n", + " this_task_id, list(self.task_list)\n", + " )\n", + " )\n", + " num_iters += 1\n", + " if max_iterations is not None and num_iters == max_iterations:\n", + " print(\"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\")\n", + " break\n", + "\n", + " @classmethod\n", + " def from_llm_and_objectives(\n", + " cls,\n", + " llm: BaseLLM,\n", + " vectorstore: VectorStore,\n", + " objective: str,\n", + " first_task: str,\n", + " verbose: bool = False,\n", + " ) -> \"BabyAGI\":\n", + " \"\"\"Initialize the BabyAGI Controller.\"\"\"\n", + " task_creation_chain = TaskCreationChain.from_llm(\n", + " llm, objective, verbose=verbose\n", + " )\n", + " task_prioritization_chain = TaskPrioritizationChain.from_llm(\n", + " llm, objective, verbose=verbose\n", + " )\n", + " execution_chain = ExecutionChain.from_llm(llm, vectorstore, verbose=verbose)\n", + " controller = cls(\n", + " objective=objective,\n", + " task_creation_chain=task_creation_chain,\n", + " task_prioritization_chain=task_prioritization_chain,\n", + " execution_chain=execution_chain,\n", + " )\n", + " controller.add_task({\"task_id\": 1, \"task_name\": first_task})\n", + " return controller" + ] + }, + { + "cell_type": "markdown", + "id": "05ba762e", + "metadata": {}, + "source": [ + "### Run the BabyAGI\n", + "\n", + "Now it's time to create the BabyAGI controller and watch it try to accomplish your objective." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3d69899b", + "metadata": {}, + "outputs": [], + "source": [ + "# Run the agent\n", + "verbose=False\n", + "\n", + "baby_agi = BabyAGI.from_llm_and_objectives(\n", + " llm=llm,\n", + " vectorstore=vectorstore,\n", + " objective=OBJECTIVE,\n", + " first_task=YOUR_FIRST_TASK,\n", + " verbose=verbose\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f7957b51", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[95m\u001b[1m\n", + "*****TASK LIST*****\n", + "\u001b[0m\u001b[0m\n", + "1: brush my teeth\n", + "\u001b[92m\u001b[1m\n", + "*****NEXT TASK*****\n", + "\u001b[0m\u001b[0m\n", + "1: brush my teeth\n", + "\u001b[93m\u001b[1m\n", + "*****TASK RESULT*****\n", + "\u001b[0m\u001b[0m\n", + " The objective of this task does not align with the objective of Interstellar Travel. Please provide a task that is related to Interstellar Travel.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "68b489b48aad4ce886088bd8be140bfa", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Upserted vectors: 0%| | 0/1 [00:00