feat: agent manager manage memory by key

1 year ago · 28ed048240
parent 7242023eaa
commit 28ed048240
8 changed files with 277 additions and 110 deletions
--- a/agents/builder.py
+++ b/agents/builder.py
@ -1,38 +1,26 @@
-from typing import Dict, Tuple
-
-from llm import ChatOpenAI
-from langchain.agents.agent import AgentExecutor
-from langchain.agents.initialize import initialize_agent
-from langchain.chains.conversation.memory import ConversationBufferMemory
 from langchain.chat_models.base import BaseChatModel
-from langchain.memory.chat_memory import BaseChatMemory
 from langchain.output_parsers.base import BaseOutputParser

 from prompts.input import EVAL_PREFIX, EVAL_SUFFIX
 from env import settings

-from agents.parser import EvalOutputParser
 from tools.base import BaseToolSet
 from tools.factory import ToolsFactory
-from handlers.base import BaseHandler, FileHandler, FileType
+
+from .llm import ChatOpenAI
+from .chat_agent import ConversationalChatAgent
+from .parser import EvalOutputParser


 class AgentBuilder:
    def __init__(self):
        self.llm: BaseChatModel = None
-        self.memory: BaseChatMemory = None
        self.parser: BaseOutputParser = None
        self.tools: list = None
-        self.handler: FileHandler = None

    def build_llm(self):
        self.llm = ChatOpenAI(temperature=0)

-    def build_memory(self):
-        self.memory = ConversationBufferMemory(
-            memory_key="chat_history", return_messages=True
-        )
-
    def build_parser(self):
        self.parser = EvalOutputParser()

@ -52,55 +40,26 @@ class AgentBuilder:
            *ToolsFactory.from_toolsets(toolsets),
        ]

-    def build_handler(self, handlers: Dict[FileType, BaseHandler]):
-        self.handler = FileHandler(handlers)
+    def get_tools(self):
+        if self.tools is None:
+            raise ValueError("Tools must be initialized before agent")

-    def get_agent(self):
-        print(f"Initializing {settings['BOT_NAME']}")
+        return self.tools

+    def get_agent(self):
        if self.llm is None:
            raise ValueError("LLM must be initialized before agent")

-        if self.memory is None:
-            raise ValueError("Memory must be initialized before agent")
-
        if self.parser is None:
            raise ValueError("Parser must be initialized before agent")

        if self.tools is None:
            raise ValueError("Tools must be initialized before agent")

-        return initialize_agent(
-            self.tools,
-            self.llm,
-            agent="chat-conversational-react-description",
-            verbose=True,
-            memory=self.memory,
-            agent_kwargs={
-                "system_message": EVAL_PREFIX.format(bot_name=settings["BOT_NAME"]),
-                "human_message": EVAL_SUFFIX.format(bot_name=settings["BOT_NAME"]),
-                "output_parser": self.parser,
-            },
+        return ConversationalChatAgent.from_llm_and_tools(
+            llm=self.llm,
+            tools=self.tools,
+            system_message=EVAL_PREFIX.format(bot_name=settings["BOT_NAME"]),
+            human_message=EVAL_SUFFIX.format(bot_name=settings["BOT_NAME"]),
+            output_parser=self.parser,
        )
-
-    def get_handler(self):
-        if self.handler is None:
-            raise ValueError("Handler must be initialized before returning")
-
-        return self.handler
-
-    @staticmethod
-    def get_agent_and_handler(
-        toolsets: list[BaseToolSet], handlers: Dict[FileType, BaseHandler]
-    ) -> Tuple[AgentExecutor, FileHandler]:
-        builder = AgentBuilder()
-        builder.build_llm()
-        builder.build_memory()
-        builder.build_parser()
-        builder.build_tools(toolsets)
-        builder.build_handler(handlers)
-
-        agent = builder.get_agent()
-        handler = builder.get_handler()
-
-        return (agent, handler)
--- a/agents/chat_agent.py
+++ b/agents/chat_agent.py
@ -0,0 +1,126 @@
+from typing import Any, List, Optional, Sequence, Tuple
+
+from langchain.agents.agent import Agent
+from langchain.callbacks.base import BaseCallbackManager
+from langchain.chains import LLMChain
+from langchain.output_parsers.base import BaseOutputParser
+from langchain.prompts.base import BasePromptTemplate
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    MessagesPlaceholder,
+    SystemMessagePromptTemplate,
+)
+from langchain.schema import (
+    AgentAction,
+    AIMessage,
+    BaseLanguageModel,
+    BaseMessage,
+    HumanMessage,
+)
+from langchain.tools.base import BaseTool
+
+from prompts.input import EVAL_TOOL_RESPONSE
+
+
+class ConversationalChatAgent(Agent):
+    """An agent designed to hold a conversation in addition to using tools."""
+
+    output_parser: BaseOutputParser
+
+    @property
+    def _agent_type(self) -> str:
+        raise NotImplementedError
+
+    @property
+    def observation_prefix(self) -> str:
+        """Prefix to append the observation with."""
+        return "Observation: "
+
+    @property
+    def llm_prefix(self) -> str:
+        """Prefix to append the llm call with."""
+        return "Thought: "
+
+    @classmethod
+    def create_prompt(
+        cls,
+        tools: Sequence[BaseTool],
+        system_message: str,
+        human_message: str,
+        output_parser: BaseOutputParser,
+        input_variables: Optional[List[str]] = None,
+    ) -> BasePromptTemplate:
+        tool_strings = "\n".join(
+            [f"> {tool.name}: {tool.description}" for tool in tools]
+        )
+        tool_names = ", ".join([tool.name for tool in tools])
+        format_instructions = human_message.format(
+            format_instructions=output_parser.get_format_instructions()
+        )
+        final_prompt = format_instructions.format(
+            tool_names=tool_names, tools=tool_strings
+        )
+        if input_variables is None:
+            input_variables = ["input", "chat_history", "agent_scratchpad"]
+        messages = [
+            SystemMessagePromptTemplate.from_template(system_message),
+            MessagesPlaceholder(variable_name="chat_history"),
+            HumanMessagePromptTemplate.from_template(final_prompt),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ]
+        return ChatPromptTemplate(input_variables=input_variables, messages=messages)
+
+    def _extract_tool_and_input(self, llm_output: str) -> Optional[Tuple[str, str]]:
+        try:
+            response = self.output_parser.parse(llm_output)
+            return response["action"], response["action_input"]
+        except Exception:
+            raise ValueError(f"Could not parse LLM output: {llm_output}")
+
+    def _construct_scratchpad(
+        self, intermediate_steps: List[Tuple[AgentAction, str]]
+    ) -> List[BaseMessage]:
+        """Construct the scratchpad that lets the agent continue its thought process."""
+        thoughts: List[BaseMessage] = []
+        for action, observation in intermediate_steps:
+            thoughts.append(AIMessage(content=action.log))
+            human_message = HumanMessage(
+                content=EVAL_TOOL_RESPONSE.format(observation=observation)
+            )
+            thoughts.append(human_message)
+        return thoughts
+
+    @classmethod
+    def from_llm_and_tools(
+        cls,
+        llm: BaseLanguageModel,
+        tools: Sequence[BaseTool],
+        system_message: str,
+        human_message: str,
+        output_parser: BaseOutputParser,
+        callback_manager: Optional[BaseCallbackManager] = None,
+        input_variables: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> Agent:
+        """Construct an agent from an LLM and tools."""
+        cls._validate_tools(tools)
+        prompt = cls.create_prompt(
+            tools,
+            system_message=system_message,
+            human_message=human_message,
+            input_variables=input_variables,
+            output_parser=output_parser,
+        )
+        llm_chain = LLMChain(
+            llm=llm,
+            prompt=prompt,
+            callback_manager=callback_manager,
+        )
+        tool_names = [tool.name for tool in tools]
+        return cls(
+            llm_chain=llm_chain,
+            allowed_tools=tool_names,
+            output_parser=output_parser,
+            **kwargs,
+        )
--- a/agents/llm.py
+++ b/agents/llm.py
@ -217,7 +217,9 @@ class ChatOpenAI(BaseChatModel, BaseModel):

        @retry_decorator
        def _completion_with_retry(**kwargs: Any) -> Any:
-            return self.client.create(**kwargs)
+            response = self.client.create(**kwargs)
+            print(response)
+            return response

        return _completion_with_retry(**kwargs)

@ -226,11 +228,11 @@ class ChatOpenAI(BaseChatModel, BaseModel):
    ) -> ChatResult:

        message_dicts, params = self._create_message_dicts(messages, stop)
-        # for item in message_dicts:
-        #     for k, v in item.items():
-        #         print(f"{k}: {v}")
-        #     print("-------")
-        # print("===========")
+        for item in message_dicts:
+            for k, v in item.items():
+                print(f"{k}: {v}")
+            print("-------")
+        print("===========")

        if self.streaming:
            inner_completion = ""
--- a/agents/manager.py
+++ b/agents/manager.py
@ -0,0 +1,47 @@
+from typing import Dict
+
+from langchain.agents.agent import Agent, AgentExecutor
+from langchain.chains.conversation.memory import ConversationBufferMemory
+from langchain.memory.chat_memory import BaseChatMemory
+
+from tools.base import BaseToolSet
+
+from .builder import AgentBuilder
+
+
+class AgentManager:
+    def __init__(self, agent: Agent, tools: list[BaseToolSet]):
+        self.agent: Agent = agent
+        self.tools: list[BaseToolSet] = tools
+        self.executors: Dict[str, AgentExecutor] = {}
+
+    def create_memory(self) -> BaseChatMemory:
+        return ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+
+    def create_executor(self) -> AgentExecutor:
+        memory: BaseChatMemory = self.create_memory()
+        return AgentExecutor.from_agent_and_tools(
+            agent=self.agent,
+            tools=self.tools,
+            memory=memory,
+        )
+
+    def remove_executor(self, key: str) -> None:
+        if key in self.executors:
+            del self.executors[key]
+
+    def get_or_create_executor(self, key: str) -> AgentExecutor:
+        if not (key in self.executors):
+            self.executors[key] = self.create_executor()
+        return self.executors[key]
+
+    @staticmethod
+    def create(toolsets: list[BaseToolSet]) -> "AgentManager":
+        builder = AgentBuilder()
+        builder.build_llm()
+        builder.build_parser()
+        builder.build_tools(toolsets)
+        agent = builder.get_agent()
+        tools = builder.get_tools()
+
+        return AgentManager(agent, tools)
--- a/agents/parser.py
+++ b/agents/parser.py
@ -1,4 +1,5 @@
 import json
+import re
 from typing import Dict

 from langchain.output_parsers.base import BaseOutputParser
@ -11,15 +12,10 @@ class EvalOutputParser(BaseOutputParser):
        return EVAL_FORMAT_INSTRUCTIONS

    def parse(self, text: str) -> Dict[str, str]:
-        cleaned_output = text.strip()
-        if "```json" in cleaned_output:
-            _, cleaned_output = cleaned_output.split("```json")
-        if cleaned_output.startswith("```json"):
-            cleaned_output = cleaned_output[len("```json") :]
-        if cleaned_output.startswith("```"):
-            cleaned_output = cleaned_output[len("```") :]
-        if cleaned_output.endswith("```"):
-            cleaned_output = cleaned_output[: -len("```")]
-        cleaned_output = cleaned_output.strip()
-        response = json.loads(cleaned_output)
-        return {"action": response["action"], "action_input": response["action_input"]}
+        regex = r"Action: (.*?)[\n]*Action Input: (.*)"
+        match = re.search(regex, text, re.DOTALL)
+        if not match:
+            raise ValueError(f"Could not parse LLM output: `{text}`")
+        action = match.group(1).strip()
+        action_input = match.group(2)
+        return {"action": action, "action_input": action_input.strip(" ").strip('"')}
--- a/main.py
+++ b/main.py
@ -8,10 +8,11 @@ from s3 import upload
 from env import settings

 from prompts.error import ERROR_PROMPT
-from agents.builder import AgentBuilder
+from agents.manager import AgentManager
 from tools.base import BaseToolSet
 from tools.cpu import (
    Terminal,
+    CodeEditor,
    RequestsGet,
    WineDB,
    ExitConversation,
@ -22,16 +23,19 @@ from tools.gpu import (
    Text2Image,
    VisualQuestionAnswering,
 )
-from handlers.base import BaseHandler, FileType
+from handlers.base import BaseHandler, FileHandler, FileType
 from handlers.image import ImageCaptioning
 from handlers.dataframe import CsvToDataframe

 app = FastAPI()

+agent_manager: AgentManager = None
+
 toolsets: List[BaseToolSet] = [
    Terminal(),
+    CodeEditor(),
    RequestsGet(),
-    ExitConversation(),
+    ExitConversation(agent_manager),
    Text2Image("cuda"),
    ImageEditing("cuda"),
    InstructPix2Pix("cuda"),
@ -46,9 +50,8 @@ handlers: Dict[FileType, BaseHandler] = {
 if settings["WINEDB_HOST"] and settings["WINEDB_PASSWORD"]:
    toolsets.append(WineDB())

-agent, handler = AgentBuilder.get_agent_and_handler(
-    toolsets=toolsets, handlers=handlers
-)
+agent_manager = AgentManager.create(toolsets=toolsets)
+file_handler = FileHandler(handlers=handlers)


 class Request(BaseModel):
@ -77,17 +80,19 @@ async def command(request: Request) -> Response:
    print("Inputs:", query, files)
    # TODO - add state to memory (use key)

-    print("======>Previous memory:\n %s" % agent.memory)
+    executor = agent_manager.get_or_create_executor(key)
+
+    print("======>Previous memory:\n %s" % executor.memory)

-    promptedQuery = "\n".join([handler.handle(file) for file in files])
+    promptedQuery = "\n".join([file_handler.handle(file) for file in files])
    promptedQuery += query
    print("======>Prompted Text:\n %s" % promptedQuery)

    try:
-        res = agent({"input": promptedQuery})
+        res = executor({"input": promptedQuery})
    except Exception as e:
        try:
-            res = agent(
+            res = executor(
                {
                    "input": ERROR_PROMPT.format(promptedQuery=promptedQuery, e=str(e)),
                }
--- a/prompts/input.py
+++ b/prompts/input.py
@ -14,7 +14,8 @@ When using tools to generate new files, {bot_name} is also known that the file(i
 {bot_name} is able to use tools in a sequence, and is loyal to the tool observation outputs rather than faking the file content and file name. It will remember to provide the file name from the last tool observation, if a new file is generated.
 Human may provide new figures to {bot_name} with a description. The description helps {bot_name} to understand this file, but {bot_name} should use tools to finish following tasks, rather than directly imagine from the description.

-Overall, {bot_name} is a powerful visual dialogue assistant tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics."""
+Overall, {bot_name} is a powerful visual dialogue assistant tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics.
+"""

 EVAL_FORMAT_INSTRUCTIONS = """RESPONSE FORMAT INSTRUCTIONS
 ----------------------------
@ -23,27 +24,19 @@ When responding to me please, please output a response in one of two formats:

 **Option 1:**
 Use this if you want the human to use a tool.
-Markdown code snippet formatted in the following schema:
+Your response should be in the following schema:

-```json
-{{{{
-    "action": string \\ The action to take. Must be one of {tool_names}
-    "action_input": string \\ The input to the action
-}}}}
-```
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action

 **Option #2:**
 Use this if you want to respond directly to the human.
 You should replace sensitive data or encrypted data with "d1dy0uth1nk7hat1t1s7haAAat3aSy?" in action_input.
+Your response should be in the following schema:

-Markdown code snippet formatted in the following schema:
-
-```json
-{{{{
-    "action": "Final Answer",
-    "action_input": string \\ You should put what you want to return to use here.
-}}}}
-```"""
+Action: Final Answer
+Action Input: string \\ You should put what you want to return to use here.
+"""

 EVAL_SUFFIX = """TOOLS
 ------
@ -59,6 +52,15 @@ The tools the human can use are:

 USER'S INPUT
 --------------------
-Here is the user's input (remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else):
+Here is the user's input:

 {{{{{{{{input}}}}}}}}"""
+
+EVAL_TOOL_RESPONSE = """TOOL RESPONSE: 
+---------------------
+{observation}
+
+USER'S INPUT
+--------------------
+
+"""
--- a/tools/cpu.py
+++ b/tools/cpu.py
@ -8,15 +8,13 @@ from llama_index import GPTSimpleVectorIndex
 from bs4 import BeautifulSoup
 from langchain.memory.chat_memory import BaseChatMemory

-"""Wrapper around subprocess to run commands."""
 import subprocess

+from agents.manager import AgentManager
 from .base import tool, BaseToolSet


 class Terminal(BaseToolSet):
-    """Executes bash commands and returns the output."""
-
    @tool(
        name="Terminal",
        description="Executes commands in a terminal."
@ -43,6 +41,35 @@ class Terminal(BaseToolSet):
        return output


+class CodeEditor(BaseToolSet):
+    @tool(
+        name="CodeEditor.WRITE",
+        description="Writes and appends code."
+        "It can be used to write or append code in any language. "
+        "If the code is completed, use the Terminal tool to execute it, if not, append the code through the CodeEditor tool."
+        "Input should be filename, status, code. Status will be 'complete' or 'incomplete'. ex. 'test.py|complete\nprint('hello world')\n"
+        "and the output will be status and last line. status will be 'complete' or 'incomplete' or 'error'.",
+    )
+    def write(self, inputs: str) -> str:
+        """Save codes to file and return success or failure."""
+        filename, status_and_code = inputs.split("|", 1)
+        status, code = status_and_code.split("\n", 1)
+
+        if status != "complete" and status != "incomplete":
+            return "error: status must be complete or incomplete"
+
+        try:
+            with open(filename, "a") as f:
+                f.write(code)
+            output = status + "\nLast line was:" + code.split("\n")[-1]
+        except Exception as e:
+            output = "error"
+        print(
+            f"\nProcessed CodeEditor, Input Codes: {code} " f"Output Answer: {output}"
+        )
+        return output
+
+
 class RequestsGet(BaseToolSet):
    @tool(
        name="requests_get",
@ -95,7 +122,7 @@ class WineDB(BaseToolSet):
        self.index = GPTSimpleVectorIndex(documents)

    @tool(
-        name="Wine Recommendataion",
+        name="Wine Recommendation",
        description="A tool to recommend wines based on a user's input. "
        "Inputs are necessary factors for wine recommendations, such as the user's mood today, side dishes to eat with wine, people to drink wine with, what things you want to do, the scent and taste of their favorite wine."
        "The output will be a list of recommended wines."
@ -120,17 +147,20 @@ class WineDB(BaseToolSet):


 class ExitConversation(BaseToolSet):
+    def __init__(self, agent_manager: AgentManager):
+        self.agent_manager = agent_manager
+
    @tool(
        name="exit_conversation",
        description="A tool to exit the conversation. "
        "Use this when you want to end the conversation. "
-        "Input should be a user's query."
+        "Input should be a user's key."
        "The output will be a message that the conversation is over.",
    )
-    def inference(self, query: str) -> str:
+    def exit(self, key: str) -> str:
        """Run the tool."""
-        # session.clear() # TODO
+        self.agent_manager.remove_executor(key)

-        print(f"\nProcessed ExitConversation, Input Query: {query} ")
+        print(f"\nProcessed ExitConversation.")

-        return f"My original question was: {query}"
+        return f"End conversation."