MultiOn client toolkit update 2.0 (#8750)

- Updated to use newer better function interaction
 - Previous version had only one callback
 - @hinthornw @hwchase17  Can you look into this
 -  Shout out to @MultiON_AI @DivGarg9 on twitter

---------

Co-authored-by: Naman Garg <ngarg3@binghamton.edu>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Karthik Raja A 2023-08-07 10:54:10 +05:30 committed by GitHub
parent 454998c1fb
commit 5a9765b1b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 186 additions and 210 deletions

View File

@ -5,7 +5,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Multion Toolkit\n", "# MultiOn Toolkit\n",
"\n", "\n",
"This notebook walks you through connecting LangChain to the MultiOn Client in your browser\n", "This notebook walks you through connecting LangChain to the MultiOn Client in your browser\n",
"\n", "\n",
@ -18,7 +18,32 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install --upgrade multion > /dev/null" "!pip install --upgrade multion langchain -q"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents.agent_toolkits import MultionToolkit\n",
"import os\n",
"\n",
"\n",
"toolkit = MultionToolkit()\n",
"\n",
"toolkit"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tools = toolkit.get_tools()\n",
"tools"
] ]
}, },
{ {
@ -38,8 +63,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Authorize connection to your Browser extention\n", "# Authorize connection to your Browser extention\n",
"import multion \n", "import multion\n",
"multion.login()\n" "multion.login()\n",
"\n"
] ]
}, },
{ {
@ -57,38 +83,18 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.agents.agent_toolkits import create_multion_agent\n", "from langchain import OpenAI\n",
"from langchain.tools.multion.tool import MultionClientTool\n", "from langchain.agents import initialize_agent, AgentType\n",
"from langchain.agents.agent_types import AgentType\n", "llm = OpenAI(temperature=0)\n",
"from langchain.chat_models import ChatOpenAI" "from langchain.agents.agent_toolkits import MultionToolkit\n",
] "toolkit = MultionToolkit()\n",
}, "tools=toolkit.get_tools()\n",
{ "agent = initialize_agent(\n",
"cell_type": "code", " tools=toolkit.get_tools(),\n",
"execution_count": null, " llm=llm,\n",
"metadata": { " agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
"tags": [] " verbose = True\n",
}, ")"
"outputs": [],
"source": [
"\n",
"agent_executor = create_multion_agent(\n",
" llm=ChatOpenAI(temperature=0),\n",
" tool=MultionClientTool(),\n",
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
" verbose=True\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent.run(\"show me the weather today\")"
] ]
}, },
{ {
@ -100,7 +106,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"agent.run(\n", "agent.run(\n",
" \"Tweet about Elon Musk\"\n", " \"Tweet 'Hi from MultiOn'\"\n",
")" ")"
] ]
} }

View File

@ -17,7 +17,7 @@ from langchain.agents.agent_toolkits.gmail.toolkit import GmailToolkit
from langchain.agents.agent_toolkits.jira.toolkit import JiraToolkit from langchain.agents.agent_toolkits.jira.toolkit import JiraToolkit
from langchain.agents.agent_toolkits.json.base import create_json_agent from langchain.agents.agent_toolkits.json.base import create_json_agent
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
from langchain.agents.agent_toolkits.multion.base import create_multion_agent from langchain.agents.agent_toolkits.multion.toolkit import MultionToolkit
from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
from langchain.agents.agent_toolkits.office365.toolkit import O365Toolkit from langchain.agents.agent_toolkits.office365.toolkit import O365Toolkit
from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
@ -52,6 +52,7 @@ __all__ = [
"GmailToolkit", "GmailToolkit",
"JiraToolkit", "JiraToolkit",
"JsonToolkit", "JsonToolkit",
"MultionToolkit",
"NLAToolkit", "NLAToolkit",
"O365Toolkit", "O365Toolkit",
"OpenAPIToolkit", "OpenAPIToolkit",
@ -65,7 +66,6 @@ __all__ = [
"ZapierToolkit", "ZapierToolkit",
"create_csv_agent", "create_csv_agent",
"create_json_agent", "create_json_agent",
"create_multion_agent",
"create_openapi_agent", "create_openapi_agent",
"create_pandas_dataframe_agent", "create_pandas_dataframe_agent",
"create_pbi_agent", "create_pbi_agent",

View File

@ -1,58 +0,0 @@
"""MultiOn agent."""
from typing import Any, Dict, Optional
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
from langchain.agents.agent_toolkits.python.prompt import PREFIX
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.agents.types import AgentType
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema import SystemMessage
from langchain.tools.multion.tool import MultionClientTool
def create_multion_agent(
llm: BaseLanguageModel,
tool: MultionClientTool,
agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION,
callback_manager: Optional[BaseCallbackManager] = None,
verbose: bool = False,
prefix: str = PREFIX,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Dict[str, Any],
) -> AgentExecutor:
"""Construct a multion agent from an LLM and tool."""
tools = [tool]
agent: BaseSingleActionAgent
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
elif agent_type == AgentType.OPENAI_FUNCTIONS:
system_message = SystemMessage(content=prefix)
_prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
agent = OpenAIFunctionsAgent(
llm=llm,
prompt=_prompt,
tools=[tool],
callback_manager=callback_manager,
**kwargs,
)
else:
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@ -0,0 +1,22 @@
"""MultiOn agent."""
from __future__ import annotations
from typing import List
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.tools import BaseTool
from langchain.tools.multion.create_session import MultionCreateSession
from langchain.tools.multion.update_session import MultionUpdateSession
class MultionToolkit(BaseToolkit):
"""Toolkit for interacting with the Browser Agent"""
class Config:
"""Pydantic config."""
arbitrary_types_allowed = True
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
return [MultionCreateSession(), MultionUpdateSession()]

View File

@ -1 +1,6 @@
"""MutliOn Client API toolkit.""" """MutliOn Client API tools."""
from langchain.tools.multion.create_session import MultionCreateSession
from langchain.tools.multion.update_session import MultionUpdateSession
__all__ = ["MultionCreateSession", "MultionUpdateSession"]

View File

@ -0,0 +1,50 @@
from typing import TYPE_CHECKING, Optional, Type
from pydantic import BaseModel, Field
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.tools.base import BaseTool
if TYPE_CHECKING:
# This is for linting and IDE typehints
import multion
else:
try:
# We do this so pydantic can resolve the types when instantiating
import multion
except ImportError:
pass
class CreateSessionSchema(BaseModel):
"""Input for CreateSessionTool."""
query: str = Field(
...,
description="The query to run in multion agent.",
)
url: str = Field(
"https://www.google.com/",
description="""The Url to run the agent at. Note: accepts only secure \
links having https://""",
)
class MultionCreateSession(BaseTool):
name: str = "create_multion_session"
description: str = """Use this tool to create a new Multion Browser Window \
with provided fields.Always the first step to run \
any activities that can be done using browser."""
args_schema: Type[CreateSessionSchema] = CreateSessionSchema
def _run(
self,
query: str,
url: Optional[str] = "https://www.google.com/",
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> dict:
try:
response = multion.new_session({"input": query, "url": url})
return {"tabId": response["tabId"], "Response": response["message"]}
except Exception as e:
raise Exception(f"An error occurred: {e}")

View File

@ -1,37 +0,0 @@
"""Tool for MultiOn Extension API"""
from typing import Any, Optional
from pydantic import Field
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.tools.base import BaseTool
from langchain.utilities.multion import MultionClientAPIWrapper
def _get_default_multion_client() -> MultionClientAPIWrapper:
return MultionClientAPIWrapper()
class MultionClientTool(BaseTool):
"""Simulates a Browser interacting agent."""
name = "Multion_Client"
description = (
"A api to communicate with browser extension multion "
"Useful for automating tasks and actions in the browser "
"Input should be a task and a url."
"The result is text form of action that was executed in the given url."
)
api_wrapper: MultionClientAPIWrapper = Field(
default_factory=_get_default_multion_client
)
def _run(
self,
task: str,
url: str = "https://www.google.com/",
tabId: Optional[Any] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
return self.api_wrapper.run(task, url, tabId)

View File

@ -0,0 +1,63 @@
from typing import TYPE_CHECKING, Optional, Type
from pydantic import BaseModel, Field
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.tools.base import BaseTool
if TYPE_CHECKING:
# This is for linting and IDE typehints
import multion
else:
try:
# We do this so pydantic can resolve the types when instantiating
import multion
except ImportError:
pass
class UpdateSessionSchema(BaseModel):
"""Input for UpdateSessionTool."""
tabId: str = Field(
..., description="The tabID, received from one of the createSessions run before"
)
query: str = Field(
...,
description="The query to run in multion agent.",
)
url: str = Field(
"https://www.google.com/",
description="""The Url to run the agent at. \
Note: accepts only secure links having https://""",
)
class MultionUpdateSession(BaseTool):
name: str = "update_multion_session"
description: str = """Use this tool to update \
a existing corresponding \
Multion Browser Window with provided fields. \
Note:TabId is got from one of the previous Browser window creation."""
args_schema: Type[UpdateSessionSchema] = UpdateSessionSchema
def _run(
self,
tabId: str,
query: str,
url: Optional[str] = "https://www.google.com/",
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> dict:
try:
try:
response = multion.update_session(tabId, {"input": query, "url": url})
content = {"tabId": tabId, "Response": response["message"]}
self.tabId = tabId
return content
except Exception as e:
print(f"{e}, creating a new session")
response = multion.new_session({"input": query, "url": url})
self.tabID = response["tabId"]
return {"tabId": response["tabId"], "Response": response["message"]}
except Exception as e:
raise Exception(f"An error occurred: {e}")

View File

@ -1,75 +0,0 @@
"""Util that calls MultiOn Client.
In order to set this up, follow instructions at:
https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5
"""
from typing import Any, Optional
from pydantic import BaseModel
class MultionAPI:
def __init__(self) -> None:
self.tabId = None
self.new_session_count = 0
def create_session(self, query: str, url: str) -> str:
"""Always the first step to run any activities that can be done using browser.
Args:
'query': the query that you need to perform in the given url.
If there is no 'query' set it as open.
'url': the base url of a site.
"""
import multion
# Only create new session once and continue using update session
if self.new_session_count < 2:
response = multion.new_session({"input": query, "url": url})
self.new_session_count += 1
self.tabId = response["tabId"]
return response["message"]
else:
return "Continue using update session"
def update_session(self, query: str, url: str) -> str:
"""Updates the existing browser session.
Updates with given action and url, used consequently to handle browser
activities after creating one session of browser.
Args:
'query': the query that you need to perform in the given url.
If there is no 'query' set it as open.
'url': the base url of a site.
"""
import multion
response = multion.update_session(self.tabId, {"input": query, "url": url})
return response["message"]
class MultionClientAPIWrapper(BaseModel):
"""Wrapper for Multion Client API.
In order to set this up, follow instructions at:
NEED TO ADD
"""
client: Any = MultionAPI()
def run(self, task: str, url: str, tabId: Optional[Any]) -> str:
"""Run body through Multion Client and respond with action.
Args:
task:
url:
tabId:
"""
if self.client.tabId is None or tabId is None:
self.client = MultionAPI()
message = self.client.create_session(task, url)
else:
message = self.client.update_session(task, url)
return message