MultiOn client toolkit (#8110)

Addition of MultiOn Client Agent Toolkit
Dependencies: multion pip package
This PR consists of the following:
- MultiOn utility,tools and integration with agent
- sample jupyter notebook.
Request @hwchase17 , @hinthornw

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Karthik Raja A 2023-07-22 20:49:01 +05:30 committed by GitHub
parent aa0e69bc98
commit 8b08687fc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 314 additions and 0 deletions

View File

@ -0,0 +1,129 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Multion Toolkit\n",
"\n",
"This notebook walks you through connecting LangChain to the MultiOn Client in your browser\n",
"\n",
"To use this toolkit, you will need to add MultiOn Extension to your browser as explained in the [MultiOn for Chrome](https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install --upgrade multion > /dev/null"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## MultiOn Setup\n",
"\n",
"Login to establish connection with your extension."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Authorize connection to your Browser extention\n",
"import multion \n",
"multion.login()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use Multion Toolkit within an Agent"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.agents.agent_toolkits import create_multion_agent\n",
"from langchain.tools.multion.tool import MultionClientTool\n",
"from langchain.agents.agent_types import AgentType\n",
"from langchain.chat_models import ChatOpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"\n",
"agent_executor = create_multion_agent(\n",
" llm=ChatOpenAI(temperature=0),\n",
" tool=MultionClientTool(),\n",
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
" verbose=True\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent.run(\"show me the weather today\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent.run(\n",
" \"Tweet about Elon Musk\"\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1 @@
"""MultiOn Toolkit."""

View File

@ -0,0 +1,58 @@
"""MultiOn agent."""
from typing import Any, Dict, Optional
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
from langchain.agents.agent_toolkits.python.prompt import PREFIX
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.agents.types import AgentType
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema import SystemMessage
from langchain.tools.multion.tool import MultionClientTool
def create_multion_agent(
llm: BaseLanguageModel,
tool: MultionClientTool,
agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION,
callback_manager: Optional[BaseCallbackManager] = None,
verbose: bool = False,
prefix: str = PREFIX,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Dict[str, Any],
) -> AgentExecutor:
"""Construct a multion agent from an LLM and tool."""
tools = [tool]
agent: BaseSingleActionAgent
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
elif agent_type == AgentType.OPENAI_FUNCTIONS:
system_message = SystemMessage(content=prefix)
_prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
agent = OpenAIFunctionsAgent(
llm=llm,
prompt=_prompt,
tools=[tool],
callback_manager=callback_manager,
**kwargs,
)
else:
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@ -0,0 +1 @@
"""MutliOn Client API toolkit."""

View File

@ -0,0 +1,50 @@
"""Tool for MultiOn Extension API"""
from typing import Any, Optional
from pydantic import Field
from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.tools.base import BaseTool
from langchain.utilities.multion import MultionClientAPIWrapper
def _get_default_multion_client() -> MultionClientAPIWrapper:
return MultionClientAPIWrapper()
class MultionClientTool(BaseTool):
"""Simulates a Browser interacting agent."""
name = "Multion_Client"
description = (
"A api to communicate with browser extension multion "
"Useful for automating tasks and actions in the browser "
"Input should be a task and a url."
"The result is text form of action that was executed in the given url."
)
api_wrapper: MultionClientAPIWrapper = Field(
default_factory=_get_default_multion_client
)
def _run(
self,
task: str,
url: str = "https://www.google.com/",
tabId: Optional[Any] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
return self.api_wrapper.run(task, url, tabId)
async def _arun(
self,
task: str,
url: str,
tabId: Optional[Any] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("Multion Client does not support async yet")

View File

@ -0,0 +1,75 @@
"""Util that calls MultiOn Client.
In order to set this up, follow instructions at:
https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5
"""
from typing import Any, Optional
from pydantic import BaseModel
class MultionAPI:
def __init__(self) -> None:
self.tabId = None
self.new_session_count = 0
def create_session(self, query: str, url: str) -> str:
"""Always the first step to run any activities that can be done using browser.
Args:
'query': the query that you need to perform in the given url.
If there is no 'query' set it as open.
'url': the base url of a site.
"""
import multion
# Only create new session once and continue using update session
if self.new_session_count < 2:
response = multion.new_session({"input": query, "url": url})
self.new_session_count += 1
self.tabId = response["tabId"]
return response["message"]
else:
return "Continue using update session"
def update_session(self, query: str, url: str) -> str:
"""Updates the existing browser session.
Updates with given action and url, used consequently to handle browser
activities after creating one session of browser.
Args:
'query': the query that you need to perform in the given url.
If there is no 'query' set it as open.
'url': the base url of a site.
"""
import multion
response = multion.update_session(self.tabId, {"input": query, "url": url})
return response["message"]
class MultionClientAPIWrapper(BaseModel):
"""Wrapper for Multion Client API.
In order to set this up, follow instructions at:
NEED TO ADD
"""
client: Any = MultionAPI()
def run(self, task: str, url: str, tabId: Optional[Any]) -> str:
"""Run body through Multion Client and respond with action.
Args:
task:
url:
tabId:
"""
if self.client.tabId is None or tabId is None:
self.client = MultionAPI()
message = self.client.create_session(task, url)
else:
message = self.client.update_session(task, url)
return message