Synchronous Browser (#3745)

Split out sync methods in playwright
fix_agent_callbacks
Zander Chase 1 year ago committed by GitHub
parent 6c2b16e465
commit a46f1d830e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

File diff suppressed because one or more lines are too long

@ -1,13 +1,16 @@
"""Playwright web browser toolkit."""
from __future__ import annotations
from typing import TYPE_CHECKING, List, Type
from typing import TYPE_CHECKING, List, Optional, Type, cast
from pydantic import Extra, Field, root_validator
from pydantic import Extra, root_validator
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.tools.base import BaseTool
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.base import (
BaseBrowserTool,
lazy_import_playwright_browsers,
)
from langchain.tools.playwright.click import ClickTool
from langchain.tools.playwright.current_page import CurrentWebPageTool
from langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool
@ -15,16 +18,24 @@ from langchain.tools.playwright.extract_text import ExtractTextTool
from langchain.tools.playwright.get_elements import GetElementsTool
from langchain.tools.playwright.navigate import NavigateTool
from langchain.tools.playwright.navigate_back import NavigateBackTool
from langchain.tools.playwright.utils import create_playwright_browser
if TYPE_CHECKING:
from playwright.async_api import Browser as AsyncBrowser
from playwright.sync_api import Browser as SyncBrowser
else:
try:
# We do this so pydantic can resolve the types when instantiating
from playwright.async_api import Browser as AsyncBrowser
from playwright.sync_api import Browser as SyncBrowser
except ImportError:
pass
class PlayWrightBrowserToolkit(BaseToolkit):
"""Toolkit for web browser tools."""
browser: AsyncBrowser = Field(default_factory=create_playwright_browser)
sync_browser: Optional["SyncBrowser"] = None
async_browser: Optional["AsyncBrowser"] = None
class Config:
"""Configuration for this pydantic object."""
@ -33,15 +44,11 @@ class PlayWrightBrowserToolkit(BaseToolkit):
arbitrary_types_allowed = True
@root_validator
def check_args(cls, values: dict) -> dict:
def validate_imports_and_browser_provided(cls, values: dict) -> dict:
"""Check that the arguments are valid."""
try:
from playwright.async_api import Browser as AsyncBrowser # noqa: F401
except ImportError:
raise ValueError(
"The 'playwright' package is required to use this tool."
" Please install it with 'pip install playwright'."
)
lazy_import_playwright_browsers()
if values.get("async_browser") is None and values.get("sync_browser") is None:
raise ValueError("Either async_browser or sync_browser must be specified.")
return values
def get_tools(self) -> List[BaseTool]:
@ -56,11 +63,21 @@ class PlayWrightBrowserToolkit(BaseToolkit):
CurrentWebPageTool,
]
return [tool_cls.from_browser(self.browser) for tool_cls in tool_classes]
tools = [
tool_cls.from_browser(
sync_browser=self.sync_browser, async_browser=self.async_browser
)
for tool_cls in tool_classes
]
return cast(List[BaseTool], tools)
@classmethod
def from_browser(cls, browser: AsyncBrowser) -> PlayWrightBrowserToolkit:
from playwright.async_api import Browser as AsyncBrowser
cls.update_forward_refs(AsyncBrowser=AsyncBrowser)
return cls(browser=browser)
def from_browser(
cls,
sync_browser: Optional[SyncBrowser] = None,
async_browser: Optional[AsyncBrowser] = None,
) -> PlayWrightBrowserToolkit:
"""Instantiate the toolkit."""
# This is to raise a better error than the forward ref ones Pydantic would have
lazy_import_playwright_browsers()
return cls(sync_browser=sync_browser, async_browser=async_browser)

@ -16,7 +16,6 @@ from langchain.tools.ifttt import IFTTTWebhook
from langchain.tools.openapi.utils.api_models import APIOperation
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
from langchain.tools.playwright import (
BaseBrowserTool,
ClickTool,
CurrentWebPageTool,
ExtractHyperlinksTool,
@ -32,7 +31,6 @@ from langchain.tools.shell.tool import ShellTool
__all__ = [
"AIPluginTool",
"APIOperation",
"BaseBrowserTool",
"BaseTool",
"BaseTool",
"BingSearchResults",

@ -1,6 +1,5 @@
"""Browser tools and toolkit."""
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.click import ClickTool
from langchain.tools.playwright.current_page import CurrentWebPageTool
from langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool
@ -15,7 +14,6 @@ __all__ = [
"ExtractTextTool",
"ExtractHyperlinksTool",
"GetElementsTool",
"BaseBrowserTool",
"ClickTool",
"CurrentWebPageTool",
]

@ -1,40 +1,55 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Optional, Tuple, Type
from pydantic import Field, root_validator
from pydantic import root_validator
from langchain.tools.base import BaseTool
from langchain.tools.playwright.utils import create_playwright_browser, run_async
if TYPE_CHECKING:
from playwright.async_api import Browser as AsyncBrowser
from playwright.sync_api import Browser as SyncBrowser
else:
try:
# We do this so pydantic can resolve the types when instantiating
from playwright.async_api import Browser as AsyncBrowser
from playwright.sync_api import Browser as SyncBrowser
except ImportError:
pass
def lazy_import_playwright_browsers() -> Tuple[Type[AsyncBrowser], Type[SyncBrowser]]:
try:
from playwright.async_api import Browser as AsyncBrowser # noqa: F401
from playwright.sync_api import Browser as SyncBrowser # noqa: F401
except ImportError:
raise ValueError(
"The 'playwright' package is required to use the playwright tools."
" Please install it with 'pip install playwright'."
)
return AsyncBrowser, SyncBrowser
class BaseBrowserTool(BaseTool):
"""Base class for browser tools."""
browser: AsyncBrowser = Field(default_factory=create_playwright_browser)
sync_browser: Optional["SyncBrowser"] = None
async_browser: Optional["AsyncBrowser"] = None
@root_validator
def check_args(cls, values: dict) -> dict:
def validate_browser_provided(cls, values: dict) -> dict:
"""Check that the arguments are valid."""
try:
from playwright.async_api import Browser as AsyncBrowser # noqa: F401
except ImportError:
raise ValueError(
"The 'playwright' package is required to use this tool."
" Please install it with 'pip install playwright'."
)
lazy_import_playwright_browsers()
if values.get("async_browser") is None and values.get("sync_browser") is None:
raise ValueError("Either async_browser or sync_browser must be specified.")
return values
def _run(self, *args: Any, **kwargs: Any) -> str:
"""Use the tool."""
return run_async(self._arun(*args, **kwargs))
@classmethod
def from_browser(cls, browser: AsyncBrowser) -> BaseBrowserTool:
from playwright.async_api import Browser as AsyncBrowser
cls.update_forward_refs(AsyncBrowser=AsyncBrowser)
return cls(browser=browser)
def from_browser(
cls,
sync_browser: Optional[SyncBrowser] = None,
async_browser: Optional[AsyncBrowser] = None,
) -> BaseBrowserTool:
"""Instantiate the tool."""
lazy_import_playwright_browsers()
return cls(sync_browser=sync_browser, async_browser=async_browser)

@ -6,6 +6,7 @@ from pydantic import BaseModel, Field
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import (
aget_current_page,
get_current_page,
)
@ -21,9 +22,20 @@ class ClickTool(BaseBrowserTool):
description: str = "Click on an element with the given CSS selector"
args_schema: Type[BaseModel] = ClickToolInput
def _run(self, selector: str) -> str:
"""Use the tool."""
if self.sync_browser is None:
raise ValueError(f"Synchronous browser not provided to {self.name}")
page = get_current_page(self.sync_browser)
# Navigate to the desired webpage before using this tool
page.click(selector)
return f"Clicked element '{selector}'"
async def _arun(self, selector: str) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
if self.async_browser is None:
raise ValueError(f"Asynchronous browser not provided to {self.name}")
page = await aget_current_page(self.async_browser)
# Navigate to the desired webpage before using this tool
await page.click(selector)
return f"Clicked element '{selector}'"

@ -6,6 +6,7 @@ from pydantic import BaseModel
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import (
aget_current_page,
get_current_page,
)
@ -15,7 +16,16 @@ class CurrentWebPageTool(BaseBrowserTool):
description: str = "Returns the URL of the current page"
args_schema: Type[BaseModel] = BaseModel
def _run(self) -> str:
"""Use the tool."""
if self.sync_browser is None:
raise ValueError(f"Synchronous browser not provided to {self.name}")
page = get_current_page(self.sync_browser)
return str(page.url)
async def _arun(self) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
if self.async_browser is None:
raise ValueError(f"Asynchronous browser not provided to {self.name}")
page = await aget_current_page(self.async_browser)
return str(page.url)

@ -1,12 +1,12 @@
from __future__ import annotations
import json
from typing import TYPE_CHECKING, Type
from typing import TYPE_CHECKING, Any, Type
from pydantic import BaseModel, Field, root_validator
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import get_current_page
from langchain.tools.playwright.utils import aget_current_page, get_current_page
if TYPE_CHECKING:
pass
@ -29,7 +29,7 @@ class ExtractHyperlinksTool(BaseBrowserTool):
args_schema: Type[BaseModel] = ExtractHyperlinksToolInput
@root_validator
def check_args(cls, values: dict) -> dict:
def check_bs_import(cls, values: dict) -> dict:
"""Check that the arguments are valid."""
try:
from bs4 import BeautifulSoup # noqa: F401
@ -40,15 +40,12 @@ class ExtractHyperlinksTool(BaseBrowserTool):
)
return values
async def _arun(self, absolute_urls: bool = False) -> str:
"""Use the tool."""
@staticmethod
def scrape_page(page: Any, html_content: str, absolute_urls: bool) -> str:
from urllib.parse import urljoin
from bs4 import BeautifulSoup
page = await get_current_page(self.browser)
html_content = await page.content()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, "lxml")
@ -59,6 +56,21 @@ class ExtractHyperlinksTool(BaseBrowserTool):
links = [urljoin(base_url, anchor.get("href", "")) for anchor in anchors]
else:
links = [anchor.get("href", "") for anchor in anchors]
# Return the list of links as a JSON string
return json.dumps(links)
def _run(self, absolute_urls: bool = False) -> str:
"""Use the tool."""
if self.sync_browser is None:
raise ValueError(f"Synchronous browser not provided to {self.name}")
page = get_current_page(self.sync_browser)
html_content = page.content()
return self.scrape_page(page, html_content, absolute_urls)
async def _arun(self, absolute_urls: bool = False) -> str:
"""Use the tool asynchronously."""
if self.async_browser is None:
raise ValueError(f"Asynchronous browser not provided to {self.name}")
page = await aget_current_page(self.async_browser)
html_content = await page.content()
return self.scrape_page(page, html_content, absolute_urls)

@ -5,7 +5,7 @@ from typing import Type
from pydantic import BaseModel, root_validator
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import get_current_page
from langchain.tools.playwright.utils import aget_current_page, get_current_page
class ExtractTextTool(BaseBrowserTool):
@ -14,7 +14,7 @@ class ExtractTextTool(BaseBrowserTool):
args_schema: Type[BaseModel] = BaseModel
@root_validator
def check_args(cls, values: dict) -> dict:
def check_acheck_bs_importrgs(cls, values: dict) -> dict:
"""Check that the arguments are valid."""
try:
from bs4 import BeautifulSoup # noqa: F401
@ -25,12 +25,30 @@ class ExtractTextTool(BaseBrowserTool):
)
return values
def _run(self) -> str:
"""Use the tool."""
# Use Beautiful Soup since it's faster than looping through the elements
from bs4 import BeautifulSoup
if self.sync_browser is None:
raise ValueError(f"Synchronous browser not provided to {self.name}")
page = get_current_page(self.sync_browser)
html_content = page.content()
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, "lxml")
return " ".join(text for text in soup.stripped_strings)
async def _arun(self) -> str:
"""Use the tool."""
if self.async_browser is None:
raise ValueError(f"Asynchronous browser not provided to {self.name}")
# Use Beautiful Soup since it's faster than looping through the elements
from bs4 import BeautifulSoup
page = await get_current_page(self.browser)
page = await aget_current_page(self.async_browser)
html_content = await page.content()
# Parse the HTML content with BeautifulSoup

@ -6,10 +6,11 @@ from typing import TYPE_CHECKING, List, Optional, Sequence, Type
from pydantic import BaseModel, Field
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import get_current_page
from langchain.tools.playwright.utils import aget_current_page, get_current_page
if TYPE_CHECKING:
from playwright.async_api import Page as AsyncPage
from playwright.sync_api import Page as SyncPage
class GetElementsToolInput(BaseModel):
@ -25,7 +26,7 @@ class GetElementsToolInput(BaseModel):
)
async def _get_elements(
async def _aget_elements(
page: AsyncPage, selector: str, attributes: Sequence[str]
) -> List[dict]:
"""Get elements matching the given CSS selector."""
@ -45,6 +46,26 @@ async def _get_elements(
return results
def _get_elements(
page: SyncPage, selector: str, attributes: Sequence[str]
) -> List[dict]:
"""Get elements matching the given CSS selector."""
elements = page.query_selector_all(selector)
results = []
for element in elements:
result = {}
for attribute in attributes:
if attribute == "innerText":
val: Optional[str] = element.inner_text()
else:
val = element.get_attribute(attribute)
if val is not None and val.strip() != "":
result[attribute] = val
if result:
results.append(result)
return results
class GetElementsTool(BaseBrowserTool):
name: str = "get_elements"
description: str = (
@ -52,11 +73,22 @@ class GetElementsTool(BaseBrowserTool):
)
args_schema: Type[BaseModel] = GetElementsToolInput
def _run(self, selector: str, attributes: Sequence[str] = ["innerText"]) -> str:
"""Use the tool."""
if self.sync_browser is None:
raise ValueError(f"Synchronous browser not provided to {self.name}")
page = get_current_page(self.sync_browser)
# Navigate to the desired webpage before using this tool
results = _get_elements(page, selector, attributes)
return json.dumps(results)
async def _arun(
self, selector: str, attributes: Sequence[str] = ["innerText"]
) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
if self.async_browser is None:
raise ValueError(f"Asynchronous browser not provided to {self.name}")
page = await aget_current_page(self.async_browser)
# Navigate to the desired webpage before using this tool
results = await _get_elements(page, selector, attributes)
results = await _aget_elements(page, selector, attributes)
return json.dumps(results)

@ -6,6 +6,7 @@ from pydantic import BaseModel, Field
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import (
aget_current_page,
get_current_page,
)
@ -21,9 +22,20 @@ class NavigateTool(BaseBrowserTool):
description: str = "Navigate a browser to the specified URL"
args_schema: Type[BaseModel] = NavigateToolInput
def _run(self, url: str) -> str:
"""Use the tool."""
if self.sync_browser is None:
raise ValueError(f"Synchronous browser not provided to {self.name}")
page = get_current_page(self.sync_browser)
response = page.goto(url)
status = response.status if response else "unknown"
return f"Navigating to {url} returned status code {status}"
async def _arun(self, url: str) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
if self.async_browser is None:
raise ValueError(f"Asynchronous browser not provided to {self.name}")
page = await aget_current_page(self.async_browser)
response = await page.goto(url)
status = response.status if response else "unknown"
return f"Navigating to {url} returned status code {status}"

@ -6,6 +6,7 @@ from pydantic import BaseModel
from langchain.tools.playwright.base import BaseBrowserTool
from langchain.tools.playwright.utils import (
aget_current_page,
get_current_page,
)
@ -17,15 +18,32 @@ class NavigateBackTool(BaseBrowserTool):
description: str = "Navigate back to the previous page in the browser history"
args_schema: Type[BaseModel] = BaseModel
def _run(self) -> str:
"""Use the tool."""
if self.sync_browser is None:
raise ValueError(f"Synchronous browser not provided to {self.name}")
page = get_current_page(self.sync_browser)
response = page.go_back()
if response:
return (
f"Navigated back to the previous page with URL '{response.url}'."
f" Status code {response.status}"
)
else:
return "Unable to navigate back; no previous page in the history"
async def _arun(self) -> str:
"""Use the tool."""
page = await get_current_page(self.browser)
if self.async_browser is None:
raise ValueError(f"Asynchronous browser not provided to {self.name}")
page = await aget_current_page(self.async_browser)
response = await page.go_back()
if response:
return (
f"Navigated back to the previous page with URL '{response.url}'."
" Status code {response.status}"
f" Status code {response.status}"
)
else:
return "Unable to navigate back; no previous page in the history"

@ -7,9 +7,11 @@ from typing import TYPE_CHECKING, Any, Coroutine, TypeVar
if TYPE_CHECKING:
from playwright.async_api import Browser as AsyncBrowser
from playwright.async_api import Page as AsyncPage
from playwright.sync_api import Browser as SyncBrowser
from playwright.sync_api import Page as SyncPage
async def get_current_page(browser: AsyncBrowser) -> AsyncPage:
async def aget_current_page(browser: AsyncBrowser) -> AsyncPage:
if not browser.contexts:
context = await browser.new_context()
return await context.new_page()
@ -20,13 +22,31 @@ async def get_current_page(browser: AsyncBrowser) -> AsyncPage:
return context.pages[-1]
def create_playwright_browser() -> AsyncBrowser:
def get_current_page(browser: SyncBrowser) -> SyncPage:
if not browser.contexts:
context = browser.new_context()
return context.new_page()
context = browser.contexts[0] # Assuming you're using the default browser context
if not context.pages:
return context.new_page()
# Assuming the last page in the list is the active one
return context.pages[-1]
def create_async_playwright_browser() -> AsyncBrowser:
from playwright.async_api import async_playwright
browser = run_async(async_playwright().start())
return run_async(browser.chromium.launch(headless=True))
def create_sync_playwright_browser() -> SyncBrowser:
from playwright.sync_api import sync_playwright
browser = sync_playwright().start()
return browser.chromium.launch(headless=True)
T = TypeVar("T")

Loading…
Cancel
Save