PlayWright Web Browser Toolkit (#3262)

Adds a PlayWright web browser toolkit with the following tools:

- NavigateTool (navigate_browser) - navigate to a URL
- NavigateBackTool (previous_page) - wait for an element to appear
- ClickTool (click_element) - click on an element (specified by
selector)
- ExtractTextTool (extract_text) - use beautiful soup to extract text
from the current web page
- ExtractHyperlinksTool (extract_hyperlinks) - use beautiful soup to
extract hyperlinks from the current web page
- GetElementsTool (get_elements) - select elements by CSS selector
- CurrentPageTool (current_page) - get the current page URL
This commit is contained in:
Zander Chase 2023-04-28 10:42:44 -07:00 committed by GitHub
parent da7b51455c
commit 491c27f861
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 642 additions and 2 deletions

File diff suppressed because one or more lines are too long

View File

@ -8,6 +8,7 @@ from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
from langchain.agents.agent_toolkits.playwright.toolkit import PlayWrightBrowserToolkit
from langchain.agents.agent_toolkits.powerbi.base import create_pbi_agent from langchain.agents.agent_toolkits.powerbi.base import create_pbi_agent
from langchain.agents.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent from langchain.agents.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent
from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
@ -46,4 +47,5 @@ __all__ = [
"create_csv_agent", "create_csv_agent",
"ZapierToolkit", "ZapierToolkit",
"JiraToolkit", "JiraToolkit",
"PlayWrightBrowserToolkit",
] ]

View File

@ -0,0 +1,4 @@
"""Playwright browser toolkit."""
from langchain.agents.agent_toolkits.playwright.toolkit import PlayWrightBrowserToolkit
__all__ = ["PlayWrightBrowserToolkit"]

View File

@ -0,0 +1,66 @@
"""Playwright web browser toolkit."""
from __future__ import annotations
from typing import TYPE_CHECKING, List, Type
from pydantic import Extra, Field, root_validator
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.tools.base import BaseTool
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.click import ClickTool
from langchain.tools.playwright.current_page import CurrentWebPageTool
from langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool
from langchain.tools.playwright.extract_text import ExtractTextTool
from langchain.tools.playwright.get_elements import GetElementsTool
from langchain.tools.playwright.navigate import NavigateTool
from langchain.tools.playwright.navigate_back import NavigateBackTool
from langchain.tools.playwright.utils import create_playwright_browser
if TYPE_CHECKING:
from playwright.async_api import Browser as AsyncBrowser
class PlayWrightBrowserToolkit(BaseToolkit):
"""Toolkit for web browser tools."""
browser: AsyncBrowser = Field(default_factory=create_playwright_browser)
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@root_validator
def check_args(cls, values: dict) -> dict:
"""Check that the arguments are valid."""
try:
from playwright.async_api import Browser as AsyncBrowser # noqa: F401
except ImportError:
raise ValueError(
"The 'playwright' package is required to use this tool."
" Please install it with 'pip install playwright'."
)
return values
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
tool_classes: List[Type[BaseBrowserTool]] = [
ClickTool,
NavigateTool,
NavigateBackTool,
ExtractTextTool,
ExtractHyperlinksTool,
GetElementsTool,
CurrentWebPageTool,
]
return [tool_cls.from_browser(self.browser) for tool_cls in tool_classes]
@classmethod
def from_browser(cls, browser: AsyncBrowser) -> PlayWrightBrowserToolkit:
from playwright.async_api import Browser as AsyncBrowser
cls.update_forward_refs(AsyncBrowser=AsyncBrowser)
return cls(browser=browser)

View File

@ -8,20 +8,38 @@ from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearch
from langchain.tools.ifttt import IFTTTWebhook from langchain.tools.ifttt import IFTTTWebhook
from langchain.tools.openapi.utils.api_models import APIOperation from langchain.tools.openapi.utils.api_models import APIOperation
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
from langchain.tools.playwright import (
BaseBrowserTool,
ClickTool,
CurrentWebPageTool,
ExtractHyperlinksTool,
ExtractTextTool,
GetElementsTool,
NavigateBackTool,
NavigateTool,
)
from langchain.tools.plugin import AIPluginTool from langchain.tools.plugin import AIPluginTool
__all__ = [ __all__ = [
"AIPluginTool",
"APIOperation", "APIOperation",
"BaseBrowserTool",
"BaseTool",
"BingSearchResults", "BingSearchResults",
"BingSearchRun", "BingSearchRun",
"ClickTool",
"CurrentWebPageTool",
"DuckDuckGoSearchResults", "DuckDuckGoSearchResults",
"DuckDuckGoSearchRun", "DuckDuckGoSearchRun",
"DuckDuckGoSearchRun", "DuckDuckGoSearchRun",
"ExtractHyperlinksTool",
"ExtractTextTool",
"GetElementsTool",
"GooglePlacesTool", "GooglePlacesTool",
"GoogleSearchResults", "GoogleSearchResults",
"GoogleSearchRun", "GoogleSearchRun",
"IFTTTWebhook", "IFTTTWebhook",
"NavigateBackTool",
"NavigateTool",
"OpenAPISpec", "OpenAPISpec",
"BaseTool", "AIPluginTool",
] ]

View File

@ -0,0 +1,21 @@
"""Browser tools and toolkit."""
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.click import ClickTool
from langchain.tools.playwright.current_page import CurrentWebPageTool
from langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool
from langchain.tools.playwright.extract_text import ExtractTextTool
from langchain.tools.playwright.get_elements import GetElementsTool
from langchain.tools.playwright.navigate import NavigateTool
from langchain.tools.playwright.navigate_back import NavigateBackTool
__all__ = [
"NavigateTool",
"NavigateBackTool",
"ExtractTextTool",
"ExtractHyperlinksTool",
"GetElementsTool",
"BaseBrowserTool",
"ClickTool",
"CurrentWebPageTool",
]

View File

@ -0,0 +1,40 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from pydantic import Field, root_validator
from langchain.tools.base import BaseTool
from langchain.tools.playwright.utils import create_playwright_browser, run_async
if TYPE_CHECKING:
from playwright.async_api import Browser as AsyncBrowser
class BaseBrowserTool(BaseTool):
"""Base class for browser tools."""
browser: AsyncBrowser = Field(default_factory=create_playwright_browser)
@root_validator
def check_args(cls, values: dict) -> dict:
"""Check that the arguments are valid."""
try:
from playwright.async_api import Browser as AsyncBrowser # noqa: F401
except ImportError:
raise ValueError(
"The 'playwright' package is required to use this tool."
" Please install it with 'pip install playwright'."
)
return values
def _run(self, *args: Any, **kwargs: Any) -> str:
"""Use the tool."""
return run_async(self._arun(*args, **kwargs))
@classmethod
def from_browser(cls, browser: AsyncBrowser) -> BaseBrowserTool:
from playwright.async_api import Browser as AsyncBrowser
cls.update_forward_refs(AsyncBrowser=AsyncBrowser)
return cls(browser=browser)

View File

@ -0,0 +1,29 @@
from __future__ import annotations
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import (
get_current_page,
)
class ClickToolInput(BaseModel):
"""Input for ClickTool."""
selector: str = Field(..., description="CSS selector for the element to click")
class ClickTool(BaseBrowserTool):
name: str = "click_element"
description: str = "Click on an element with the given CSS selector"
args_schema: Type[BaseModel] = ClickToolInput
async def _arun(self, selector: str) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
# Navigate to the desired webpage before using this tool
await page.click(selector)
return f"Clicked element '{selector}'"

View File

@ -0,0 +1,21 @@
from __future__ import annotations
from typing import Type
from pydantic import BaseModel
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import (
get_current_page,
)
class CurrentWebPageTool(BaseBrowserTool):
name: str = "current_webpage"
description: str = "Returns the URL of the current page"
args_schema: Type[BaseModel] = BaseModel
async def _arun(self) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
return str(page.url)

View File

@ -0,0 +1,64 @@
from __future__ import annotations
import json
from typing import TYPE_CHECKING, Type
from pydantic import BaseModel, Field, root_validator
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import get_current_page
if TYPE_CHECKING:
pass
class ExtractHyperlinksToolInput(BaseModel):
"""Input for ExtractHyperlinksTool."""
absolute_urls: bool = Field(
default=False,
description="Return absolute URLs instead of relative URLs",
)
class ExtractHyperlinksTool(BaseBrowserTool):
"""Extract all hyperlinks on the page."""
name: str = "extract_hyperlinks"
description: str = "Extract all hyperlinks on the current webpage"
args_schema: Type[BaseModel] = ExtractHyperlinksToolInput
@root_validator
def check_args(cls, values: dict) -> dict:
"""Check that the arguments are valid."""
try:
from bs4 import BeautifulSoup # noqa: F401
except ImportError:
raise ValueError(
"The 'beautifulsoup4' package is required to use this tool."
" Please install it with 'pip install beautifulsoup4'."
)
return values
async def _arun(self, absolute_urls: bool = False) -> str:
"""Use the tool."""
from urllib.parse import urljoin
from bs4 import BeautifulSoup
page = await get_current_page(self.browser)
html_content = await page.content()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, "lxml")
# Find all the anchor elements and extract their href attributes
anchors = soup.find_all("a")
if absolute_urls:
base_url = page.url
links = [urljoin(base_url, anchor.get("href", "")) for anchor in anchors]
else:
links = [anchor.get("href", "") for anchor in anchors]
# Return the list of links as a JSON string
return json.dumps(links)

View File

@ -0,0 +1,39 @@
from __future__ import annotations
from typing import Type
from pydantic import BaseModel, root_validator
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import get_current_page
class ExtractTextTool(BaseBrowserTool):
name: str = "extract_text"
description: str = "Extract all the text on the current webpage"
args_schema: Type[BaseModel] = BaseModel
@root_validator
def check_args(cls, values: dict) -> dict:
"""Check that the arguments are valid."""
try:
from bs4 import BeautifulSoup # noqa: F401
except ImportError:
raise ValueError(
"The 'beautifulsoup4' package is required to use this tool."
" Please install it with 'pip install beautifulsoup4'."
)
return values
async def _arun(self) -> str:
"""Use the tool."""
# Use Beautiful Soup since it's faster than looping through the elements
from bs4 import BeautifulSoup
page = await get_current_page(self.browser)
html_content = await page.content()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, "lxml")
return " ".join(text for text in soup.stripped_strings)

View File

@ -0,0 +1,62 @@
from __future__ import annotations
import json
from typing import TYPE_CHECKING, List, Optional, Sequence, Type
from pydantic import BaseModel, Field
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import get_current_page
if TYPE_CHECKING:
from playwright.async_api import Page as AsyncPage
class GetElementsToolInput(BaseModel):
"""Input for GetElementsTool."""
selector: str = Field(
...,
description="CSS selector, such as '*', 'div', 'p', 'a', #id, .classname",
)
attributes: List[str] = Field(
default_factory=lambda: ["innerText"],
description="Set of attributes to retrieve for each element",
)
async def _get_elements(
page: AsyncPage, selector: str, attributes: Sequence[str]
) -> List[dict]:
"""Get elements matching the given CSS selector."""
elements = await page.query_selector_all(selector)
results = []
for element in elements:
result = {}
for attribute in attributes:
if attribute == "innerText":
val: Optional[str] = await element.inner_text()
else:
val = await element.get_attribute(attribute)
if val is not None and val.strip() != "":
result[attribute] = val
if result:
results.append(result)
return results
class GetElementsTool(BaseBrowserTool):
name: str = "get_elements"
description: str = (
"Retrieve elements in the current web page matching the given CSS selector"
)
args_schema: Type[BaseModel] = GetElementsToolInput
async def _arun(
self, selector: str, attributes: Sequence[str] = ["innerText"]
) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
# Navigate to the desired webpage before using this tool
results = await _get_elements(page, selector, attributes)
return json.dumps(results)

View File

@ -0,0 +1,29 @@
from __future__ import annotations
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import (
get_current_page,
)
class NavigateToolInput(BaseModel):
"""Input for NavigateToolInput."""
url: str = Field(..., description="url to navigate to")
class NavigateTool(BaseBrowserTool):
name: str = "navigate_browser"
description: str = "Navigate a browser to the specified URL"
args_schema: Type[BaseModel] = NavigateToolInput
async def _arun(self, url: str) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
response = await page.goto(url)
status = response.status if response else "unknown"
return f"Navigating to {url} returned status code {status}"

View File

@ -0,0 +1,31 @@
from __future__ import annotations
from typing import Type
from pydantic import BaseModel
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import (
get_current_page,
)
class NavigateBackTool(BaseBrowserTool):
"""Navigate back to the previous page in the browser history."""
name: str = "previous_webpage"
description: str = "Navigate back to the previous page in the browser history"
args_schema: Type[BaseModel] = BaseModel
async def _arun(self) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
response = await page.go_back()
if response:
return (
f"Navigated back to the previous page with URL '{response.url}'."
" Status code {response.status}"
)
else:
return "Unable to navigate back; no previous page in the history"

View File

@ -0,0 +1,35 @@
"""Utilities for the Playwright browser tools."""
from __future__ import annotations
import asyncio
from typing import TYPE_CHECKING, Any, Coroutine, TypeVar
if TYPE_CHECKING:
from playwright.async_api import Browser as AsyncBrowser
from playwright.async_api import Page as AsyncPage
async def get_current_page(browser: AsyncBrowser) -> AsyncPage:
if not browser.contexts:
context = await browser.new_context()
return await context.new_page()
context = browser.contexts[0] # Assuming you're using the default browser context
if not context.pages:
return await context.new_page()
# Assuming the last page in the list is the active one
return context.pages[-1]
def create_playwright_browser() -> AsyncBrowser:
from playwright.async_api import async_playwright
browser = run_async(async_playwright().start())
return run_async(browser.chromium.launch(headless=True))
T = TypeVar("T")
def run_async(coro: Coroutine[Any, Any, T]) -> T:
event_loop = asyncio.get_event_loop()
return event_loop.run_until_complete(coro)