MultiOn client toolkit update 2.0 (#8750)

- Updated to use newer better function interaction
 - Previous version had only one callback
 - @hinthornw @hwchase17  Can you look into this
 -  Shout out to @MultiON_AI @DivGarg9 on twitter

---------

Co-authored-by: Naman Garg <ngarg3@binghamton.edu>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Karthik Raja A 2023-08-07 10:54:10 +05:30 committed by GitHub
parent 454998c1fb
commit 5a9765b1b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 186 additions and 210 deletions

View File

@ -5,7 +5,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Multion Toolkit\n",
"# MultiOn Toolkit\n",
"\n",
"This notebook walks you through connecting LangChain to the MultiOn Client in your browser\n",
"\n",
@ -18,7 +18,32 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install --upgrade multion > /dev/null"
"!pip install --upgrade multion langchain -q"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents.agent_toolkits import MultionToolkit\n",
"import os\n",
"\n",
"\n",
"toolkit = MultionToolkit()\n",
"\n",
"toolkit"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tools = toolkit.get_tools()\n",
"tools"
]
},
{
@ -38,8 +63,9 @@
"outputs": [],
"source": [
"# Authorize connection to your Browser extention\n",
"import multion \n",
"multion.login()\n"
"import multion\n",
"multion.login()\n",
"\n"
]
},
{
@ -57,38 +83,18 @@
},
"outputs": [],
"source": [
"from langchain.agents.agent_toolkits import create_multion_agent\n",
"from langchain.tools.multion.tool import MultionClientTool\n",
"from langchain.agents.agent_types import AgentType\n",
"from langchain.chat_models import ChatOpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"\n",
"agent_executor = create_multion_agent(\n",
" llm=ChatOpenAI(temperature=0),\n",
" tool=MultionClientTool(),\n",
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
" verbose=True\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent.run(\"show me the weather today\")"
"from langchain import OpenAI\n",
"from langchain.agents import initialize_agent, AgentType\n",
"llm = OpenAI(temperature=0)\n",
"from langchain.agents.agent_toolkits import MultionToolkit\n",
"toolkit = MultionToolkit()\n",
"tools=toolkit.get_tools()\n",
"agent = initialize_agent(\n",
" tools=toolkit.get_tools(),\n",
" llm=llm,\n",
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
" verbose = True\n",
")"
]
},
{
@ -100,7 +106,7 @@
"outputs": [],
"source": [
"agent.run(\n",
" \"Tweet about Elon Musk\"\n",
" \"Tweet 'Hi from MultiOn'\"\n",
")"
]
}

View File

@ -17,7 +17,7 @@ from langchain.agents.agent_toolkits.gmail.toolkit import GmailToolkit
from langchain.agents.agent_toolkits.jira.toolkit import JiraToolkit
from langchain.agents.agent_toolkits.json.base import create_json_agent
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
from langchain.agents.agent_toolkits.multion.base import create_multion_agent
from langchain.agents.agent_toolkits.multion.toolkit import MultionToolkit
from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
from langchain.agents.agent_toolkits.office365.toolkit import O365Toolkit
from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
@ -52,6 +52,7 @@ __all__ = [
"GmailToolkit",
"JiraToolkit",
"JsonToolkit",
"MultionToolkit",
"NLAToolkit",
"O365Toolkit",
"OpenAPIToolkit",
@ -65,7 +66,6 @@ __all__ = [
"ZapierToolkit",
"create_csv_agent",
"create_json_agent",
"create_multion_agent",
"create_openapi_agent",
"create_pandas_dataframe_agent",
"create_pbi_agent",

View File

@ -1,58 +0,0 @@
"""MultiOn agent."""
from typing import Any, Dict, Optional
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
from langchain.agents.agent_toolkits.python.prompt import PREFIX
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.agents.types import AgentType
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema import SystemMessage
from langchain.tools.multion.tool import MultionClientTool
def create_multion_agent(
llm: BaseLanguageModel,
tool: MultionClientTool,
agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION,
callback_manager: Optional[BaseCallbackManager] = None,
verbose: bool = False,
prefix: str = PREFIX,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Dict[str, Any],
) -> AgentExecutor:
"""Construct a multion agent from an LLM and tool."""
tools = [tool]
agent: BaseSingleActionAgent
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
elif agent_type == AgentType.OPENAI_FUNCTIONS:
system_message = SystemMessage(content=prefix)
_prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
agent = OpenAIFunctionsAgent(
llm=llm,
prompt=_prompt,
tools=[tool],
callback_manager=callback_manager,
**kwargs,
)
else:
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@ -0,0 +1,22 @@
"""MultiOn agent."""
from __future__ import annotations
from typing import List
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.tools import BaseTool
from langchain.tools.multion.create_session import MultionCreateSession
from langchain.tools.multion.update_session import MultionUpdateSession
class MultionToolkit(BaseToolkit):
"""Toolkit for interacting with the Browser Agent"""
class Config:
"""Pydantic config."""
arbitrary_types_allowed = True
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
return [MultionCreateSession(), MultionUpdateSession()]

View File

@ -1 +1,6 @@
"""MutliOn Client API toolkit."""
"""MutliOn Client API tools."""
from langchain.tools.multion.create_session import MultionCreateSession
from langchain.tools.multion.update_session import MultionUpdateSession
__all__ = ["MultionCreateSession", "MultionUpdateSession"]

View File

@ -0,0 +1,50 @@
from typing import TYPE_CHECKING, Optional, Type
from pydantic import BaseModel, Field
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.tools.base import BaseTool
if TYPE_CHECKING:
# This is for linting and IDE typehints
import multion
else:
try:
# We do this so pydantic can resolve the types when instantiating
import multion
except ImportError:
pass
class CreateSessionSchema(BaseModel):
"""Input for CreateSessionTool."""
query: str = Field(
...,
description="The query to run in multion agent.",
)
url: str = Field(
"https://www.google.com/",
description="""The Url to run the agent at. Note: accepts only secure \
links having https://""",
)
class MultionCreateSession(BaseTool):
name: str = "create_multion_session"
description: str = """Use this tool to create a new Multion Browser Window \
with provided fields.Always the first step to run \
any activities that can be done using browser."""
args_schema: Type[CreateSessionSchema] = CreateSessionSchema
def _run(
self,
query: str,
url: Optional[str] = "https://www.google.com/",
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> dict:
try:
response = multion.new_session({"input": query, "url": url})
return {"tabId": response["tabId"], "Response": response["message"]}
except Exception as e:
raise Exception(f"An error occurred: {e}")

View File

@ -1,37 +0,0 @@
"""Tool for MultiOn Extension API"""
from typing import Any, Optional
from pydantic import Field
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.tools.base import BaseTool
from langchain.utilities.multion import MultionClientAPIWrapper
def _get_default_multion_client() -> MultionClientAPIWrapper:
return MultionClientAPIWrapper()
class MultionClientTool(BaseTool):
"""Simulates a Browser interacting agent."""
name = "Multion_Client"
description = (
"A api to communicate with browser extension multion "
"Useful for automating tasks and actions in the browser "
"Input should be a task and a url."
"The result is text form of action that was executed in the given url."
)
api_wrapper: MultionClientAPIWrapper = Field(
default_factory=_get_default_multion_client
)
def _run(
self,
task: str,
url: str = "https://www.google.com/",
tabId: Optional[Any] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
return self.api_wrapper.run(task, url, tabId)

View File

@ -0,0 +1,63 @@
from typing import TYPE_CHECKING, Optional, Type
from pydantic import BaseModel, Field
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.tools.base import BaseTool
if TYPE_CHECKING:
# This is for linting and IDE typehints
import multion
else:
try:
# We do this so pydantic can resolve the types when instantiating
import multion
except ImportError:
pass
class UpdateSessionSchema(BaseModel):
"""Input for UpdateSessionTool."""
tabId: str = Field(
..., description="The tabID, received from one of the createSessions run before"
)
query: str = Field(
...,
description="The query to run in multion agent.",
)
url: str = Field(
"https://www.google.com/",
description="""The Url to run the agent at. \
Note: accepts only secure links having https://""",
)
class MultionUpdateSession(BaseTool):
name: str = "update_multion_session"
description: str = """Use this tool to update \
a existing corresponding \
Multion Browser Window with provided fields. \
Note:TabId is got from one of the previous Browser window creation."""
args_schema: Type[UpdateSessionSchema] = UpdateSessionSchema
def _run(
self,
tabId: str,
query: str,
url: Optional[str] = "https://www.google.com/",
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> dict:
try:
try:
response = multion.update_session(tabId, {"input": query, "url": url})
content = {"tabId": tabId, "Response": response["message"]}
self.tabId = tabId
return content
except Exception as e:
print(f"{e}, creating a new session")
response = multion.new_session({"input": query, "url": url})
self.tabID = response["tabId"]
return {"tabId": response["tabId"], "Response": response["message"]}
except Exception as e:
raise Exception(f"An error occurred: {e}")

View File

@ -1,75 +0,0 @@
"""Util that calls MultiOn Client.
In order to set this up, follow instructions at:
https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5
"""
from typing import Any, Optional
from pydantic import BaseModel
class MultionAPI:
def __init__(self) -> None:
self.tabId = None
self.new_session_count = 0
def create_session(self, query: str, url: str) -> str:
"""Always the first step to run any activities that can be done using browser.
Args:
'query': the query that you need to perform in the given url.
If there is no 'query' set it as open.
'url': the base url of a site.
"""
import multion
# Only create new session once and continue using update session
if self.new_session_count < 2:
response = multion.new_session({"input": query, "url": url})
self.new_session_count += 1
self.tabId = response["tabId"]
return response["message"]
else:
return "Continue using update session"
def update_session(self, query: str, url: str) -> str:
"""Updates the existing browser session.
Updates with given action and url, used consequently to handle browser
activities after creating one session of browser.
Args:
'query': the query that you need to perform in the given url.
If there is no 'query' set it as open.
'url': the base url of a site.
"""
import multion
response = multion.update_session(self.tabId, {"input": query, "url": url})
return response["message"]
class MultionClientAPIWrapper(BaseModel):
"""Wrapper for Multion Client API.
In order to set this up, follow instructions at:
NEED TO ADD
"""
client: Any = MultionAPI()
def run(self, task: str, url: str, tabId: Optional[Any]) -> str:
"""Run body through Multion Client and respond with action.
Args:
task:
url:
tabId:
"""
if self.client.tabId is None or tabId is None:
self.client = MultionAPI()
message = self.client.create_session(task, url)
else:
message = self.client.update_session(task, url)
return message