forked from Archives/langchain
parent
6c2b16e465
commit
a46f1d830e
File diff suppressed because one or more lines are too long
@ -1,13 +1,16 @@
|
|||||||
"""Playwright web browser toolkit."""
|
"""Playwright web browser toolkit."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, List, Type
|
from typing import TYPE_CHECKING, List, Optional, Type, cast
|
||||||
|
|
||||||
from pydantic import Extra, Field, root_validator
|
from pydantic import Extra, root_validator
|
||||||
|
|
||||||
from langchain.agents.agent_toolkits.base import BaseToolkit
|
from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||||
from langchain.tools.base import BaseTool
|
from langchain.tools.base import BaseTool
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
from langchain.tools.playwright.base import (
|
||||||
|
BaseBrowserTool,
|
||||||
|
lazy_import_playwright_browsers,
|
||||||
|
)
|
||||||
from langchain.tools.playwright.click import ClickTool
|
from langchain.tools.playwright.click import ClickTool
|
||||||
from langchain.tools.playwright.current_page import CurrentWebPageTool
|
from langchain.tools.playwright.current_page import CurrentWebPageTool
|
||||||
from langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool
|
from langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool
|
||||||
@ -15,16 +18,24 @@ from langchain.tools.playwright.extract_text import ExtractTextTool
|
|||||||
from langchain.tools.playwright.get_elements import GetElementsTool
|
from langchain.tools.playwright.get_elements import GetElementsTool
|
||||||
from langchain.tools.playwright.navigate import NavigateTool
|
from langchain.tools.playwright.navigate import NavigateTool
|
||||||
from langchain.tools.playwright.navigate_back import NavigateBackTool
|
from langchain.tools.playwright.navigate_back import NavigateBackTool
|
||||||
from langchain.tools.playwright.utils import create_playwright_browser
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from playwright.async_api import Browser as AsyncBrowser
|
from playwright.async_api import Browser as AsyncBrowser
|
||||||
|
from playwright.sync_api import Browser as SyncBrowser
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
# We do this so pydantic can resolve the types when instantiating
|
||||||
|
from playwright.async_api import Browser as AsyncBrowser
|
||||||
|
from playwright.sync_api import Browser as SyncBrowser
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class PlayWrightBrowserToolkit(BaseToolkit):
|
class PlayWrightBrowserToolkit(BaseToolkit):
|
||||||
"""Toolkit for web browser tools."""
|
"""Toolkit for web browser tools."""
|
||||||
|
|
||||||
browser: AsyncBrowser = Field(default_factory=create_playwright_browser)
|
sync_browser: Optional["SyncBrowser"] = None
|
||||||
|
async_browser: Optional["AsyncBrowser"] = None
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
"""Configuration for this pydantic object."""
|
"""Configuration for this pydantic object."""
|
||||||
@ -33,15 +44,11 @@ class PlayWrightBrowserToolkit(BaseToolkit):
|
|||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
@root_validator
|
@root_validator
|
||||||
def check_args(cls, values: dict) -> dict:
|
def validate_imports_and_browser_provided(cls, values: dict) -> dict:
|
||||||
"""Check that the arguments are valid."""
|
"""Check that the arguments are valid."""
|
||||||
try:
|
lazy_import_playwright_browsers()
|
||||||
from playwright.async_api import Browser as AsyncBrowser # noqa: F401
|
if values.get("async_browser") is None and values.get("sync_browser") is None:
|
||||||
except ImportError:
|
raise ValueError("Either async_browser or sync_browser must be specified.")
|
||||||
raise ValueError(
|
|
||||||
"The 'playwright' package is required to use this tool."
|
|
||||||
" Please install it with 'pip install playwright'."
|
|
||||||
)
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def get_tools(self) -> List[BaseTool]:
|
def get_tools(self) -> List[BaseTool]:
|
||||||
@ -56,11 +63,21 @@ class PlayWrightBrowserToolkit(BaseToolkit):
|
|||||||
CurrentWebPageTool,
|
CurrentWebPageTool,
|
||||||
]
|
]
|
||||||
|
|
||||||
return [tool_cls.from_browser(self.browser) for tool_cls in tool_classes]
|
tools = [
|
||||||
|
tool_cls.from_browser(
|
||||||
|
sync_browser=self.sync_browser, async_browser=self.async_browser
|
||||||
|
)
|
||||||
|
for tool_cls in tool_classes
|
||||||
|
]
|
||||||
|
return cast(List[BaseTool], tools)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_browser(cls, browser: AsyncBrowser) -> PlayWrightBrowserToolkit:
|
def from_browser(
|
||||||
from playwright.async_api import Browser as AsyncBrowser
|
cls,
|
||||||
|
sync_browser: Optional[SyncBrowser] = None,
|
||||||
cls.update_forward_refs(AsyncBrowser=AsyncBrowser)
|
async_browser: Optional[AsyncBrowser] = None,
|
||||||
return cls(browser=browser)
|
) -> PlayWrightBrowserToolkit:
|
||||||
|
"""Instantiate the toolkit."""
|
||||||
|
# This is to raise a better error than the forward ref ones Pydantic would have
|
||||||
|
lazy_import_playwright_browsers()
|
||||||
|
return cls(sync_browser=sync_browser, async_browser=async_browser)
|
||||||
|
@ -16,7 +16,6 @@ from langchain.tools.ifttt import IFTTTWebhook
|
|||||||
from langchain.tools.openapi.utils.api_models import APIOperation
|
from langchain.tools.openapi.utils.api_models import APIOperation
|
||||||
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
|
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
|
||||||
from langchain.tools.playwright import (
|
from langchain.tools.playwright import (
|
||||||
BaseBrowserTool,
|
|
||||||
ClickTool,
|
ClickTool,
|
||||||
CurrentWebPageTool,
|
CurrentWebPageTool,
|
||||||
ExtractHyperlinksTool,
|
ExtractHyperlinksTool,
|
||||||
@ -32,7 +31,6 @@ from langchain.tools.shell.tool import ShellTool
|
|||||||
__all__ = [
|
__all__ = [
|
||||||
"AIPluginTool",
|
"AIPluginTool",
|
||||||
"APIOperation",
|
"APIOperation",
|
||||||
"BaseBrowserTool",
|
|
||||||
"BaseTool",
|
"BaseTool",
|
||||||
"BaseTool",
|
"BaseTool",
|
||||||
"BingSearchResults",
|
"BingSearchResults",
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
"""Browser tools and toolkit."""
|
"""Browser tools and toolkit."""
|
||||||
|
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
|
||||||
from langchain.tools.playwright.click import ClickTool
|
from langchain.tools.playwright.click import ClickTool
|
||||||
from langchain.tools.playwright.current_page import CurrentWebPageTool
|
from langchain.tools.playwright.current_page import CurrentWebPageTool
|
||||||
from langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool
|
from langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool
|
||||||
@ -15,7 +14,6 @@ __all__ = [
|
|||||||
"ExtractTextTool",
|
"ExtractTextTool",
|
||||||
"ExtractHyperlinksTool",
|
"ExtractHyperlinksTool",
|
||||||
"GetElementsTool",
|
"GetElementsTool",
|
||||||
"BaseBrowserTool",
|
|
||||||
"ClickTool",
|
"ClickTool",
|
||||||
"CurrentWebPageTool",
|
"CurrentWebPageTool",
|
||||||
]
|
]
|
||||||
|
@ -1,40 +1,55 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Optional, Tuple, Type
|
||||||
|
|
||||||
from pydantic import Field, root_validator
|
from pydantic import root_validator
|
||||||
|
|
||||||
from langchain.tools.base import BaseTool
|
from langchain.tools.base import BaseTool
|
||||||
from langchain.tools.playwright.utils import create_playwright_browser, run_async
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from playwright.async_api import Browser as AsyncBrowser
|
from playwright.async_api import Browser as AsyncBrowser
|
||||||
|
from playwright.sync_api import Browser as SyncBrowser
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
# We do this so pydantic can resolve the types when instantiating
|
||||||
|
from playwright.async_api import Browser as AsyncBrowser
|
||||||
|
from playwright.sync_api import Browser as SyncBrowser
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def lazy_import_playwright_browsers() -> Tuple[Type[AsyncBrowser], Type[SyncBrowser]]:
|
||||||
|
try:
|
||||||
|
from playwright.async_api import Browser as AsyncBrowser # noqa: F401
|
||||||
|
from playwright.sync_api import Browser as SyncBrowser # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
raise ValueError(
|
||||||
|
"The 'playwright' package is required to use the playwright tools."
|
||||||
|
" Please install it with 'pip install playwright'."
|
||||||
|
)
|
||||||
|
return AsyncBrowser, SyncBrowser
|
||||||
|
|
||||||
|
|
||||||
class BaseBrowserTool(BaseTool):
|
class BaseBrowserTool(BaseTool):
|
||||||
"""Base class for browser tools."""
|
"""Base class for browser tools."""
|
||||||
|
|
||||||
browser: AsyncBrowser = Field(default_factory=create_playwright_browser)
|
sync_browser: Optional["SyncBrowser"] = None
|
||||||
|
async_browser: Optional["AsyncBrowser"] = None
|
||||||
|
|
||||||
@root_validator
|
@root_validator
|
||||||
def check_args(cls, values: dict) -> dict:
|
def validate_browser_provided(cls, values: dict) -> dict:
|
||||||
"""Check that the arguments are valid."""
|
"""Check that the arguments are valid."""
|
||||||
try:
|
lazy_import_playwright_browsers()
|
||||||
from playwright.async_api import Browser as AsyncBrowser # noqa: F401
|
if values.get("async_browser") is None and values.get("sync_browser") is None:
|
||||||
except ImportError:
|
raise ValueError("Either async_browser or sync_browser must be specified.")
|
||||||
raise ValueError(
|
|
||||||
"The 'playwright' package is required to use this tool."
|
|
||||||
" Please install it with 'pip install playwright'."
|
|
||||||
)
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def _run(self, *args: Any, **kwargs: Any) -> str:
|
|
||||||
"""Use the tool."""
|
|
||||||
return run_async(self._arun(*args, **kwargs))
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_browser(cls, browser: AsyncBrowser) -> BaseBrowserTool:
|
def from_browser(
|
||||||
from playwright.async_api import Browser as AsyncBrowser
|
cls,
|
||||||
|
sync_browser: Optional[SyncBrowser] = None,
|
||||||
cls.update_forward_refs(AsyncBrowser=AsyncBrowser)
|
async_browser: Optional[AsyncBrowser] = None,
|
||||||
return cls(browser=browser)
|
) -> BaseBrowserTool:
|
||||||
|
"""Instantiate the tool."""
|
||||||
|
lazy_import_playwright_browsers()
|
||||||
|
return cls(sync_browser=sync_browser, async_browser=async_browser)
|
||||||
|
@ -6,6 +6,7 @@ from pydantic import BaseModel, Field
|
|||||||
|
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
from langchain.tools.playwright.base import BaseBrowserTool
|
||||||
from langchain.tools.playwright.utils import (
|
from langchain.tools.playwright.utils import (
|
||||||
|
aget_current_page,
|
||||||
get_current_page,
|
get_current_page,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -21,9 +22,20 @@ class ClickTool(BaseBrowserTool):
|
|||||||
description: str = "Click on an element with the given CSS selector"
|
description: str = "Click on an element with the given CSS selector"
|
||||||
args_schema: Type[BaseModel] = ClickToolInput
|
args_schema: Type[BaseModel] = ClickToolInput
|
||||||
|
|
||||||
|
def _run(self, selector: str) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
if self.sync_browser is None:
|
||||||
|
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||||
|
page = get_current_page(self.sync_browser)
|
||||||
|
# Navigate to the desired webpage before using this tool
|
||||||
|
page.click(selector)
|
||||||
|
return f"Clicked element '{selector}'"
|
||||||
|
|
||||||
async def _arun(self, selector: str) -> str:
|
async def _arun(self, selector: str) -> str:
|
||||||
"""Use the tool."""
|
"""Use the tool."""
|
||||||
page = await get_current_page(self.browser)
|
if self.async_browser is None:
|
||||||
|
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||||
|
page = await aget_current_page(self.async_browser)
|
||||||
# Navigate to the desired webpage before using this tool
|
# Navigate to the desired webpage before using this tool
|
||||||
await page.click(selector)
|
await page.click(selector)
|
||||||
return f"Clicked element '{selector}'"
|
return f"Clicked element '{selector}'"
|
||||||
|
@ -6,6 +6,7 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
from langchain.tools.playwright.base import BaseBrowserTool
|
||||||
from langchain.tools.playwright.utils import (
|
from langchain.tools.playwright.utils import (
|
||||||
|
aget_current_page,
|
||||||
get_current_page,
|
get_current_page,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -15,7 +16,16 @@ class CurrentWebPageTool(BaseBrowserTool):
|
|||||||
description: str = "Returns the URL of the current page"
|
description: str = "Returns the URL of the current page"
|
||||||
args_schema: Type[BaseModel] = BaseModel
|
args_schema: Type[BaseModel] = BaseModel
|
||||||
|
|
||||||
|
def _run(self) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
if self.sync_browser is None:
|
||||||
|
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||||
|
page = get_current_page(self.sync_browser)
|
||||||
|
return str(page.url)
|
||||||
|
|
||||||
async def _arun(self) -> str:
|
async def _arun(self) -> str:
|
||||||
"""Use the tool."""
|
"""Use the tool."""
|
||||||
page = await get_current_page(self.browser)
|
if self.async_browser is None:
|
||||||
|
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||||
|
page = await aget_current_page(self.async_browser)
|
||||||
return str(page.url)
|
return str(page.url)
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import TYPE_CHECKING, Type
|
from typing import TYPE_CHECKING, Any, Type
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, root_validator
|
from pydantic import BaseModel, Field, root_validator
|
||||||
|
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
from langchain.tools.playwright.base import BaseBrowserTool
|
||||||
from langchain.tools.playwright.utils import get_current_page
|
from langchain.tools.playwright.utils import aget_current_page, get_current_page
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
pass
|
pass
|
||||||
@ -29,7 +29,7 @@ class ExtractHyperlinksTool(BaseBrowserTool):
|
|||||||
args_schema: Type[BaseModel] = ExtractHyperlinksToolInput
|
args_schema: Type[BaseModel] = ExtractHyperlinksToolInput
|
||||||
|
|
||||||
@root_validator
|
@root_validator
|
||||||
def check_args(cls, values: dict) -> dict:
|
def check_bs_import(cls, values: dict) -> dict:
|
||||||
"""Check that the arguments are valid."""
|
"""Check that the arguments are valid."""
|
||||||
try:
|
try:
|
||||||
from bs4 import BeautifulSoup # noqa: F401
|
from bs4 import BeautifulSoup # noqa: F401
|
||||||
@ -40,15 +40,12 @@ class ExtractHyperlinksTool(BaseBrowserTool):
|
|||||||
)
|
)
|
||||||
return values
|
return values
|
||||||
|
|
||||||
async def _arun(self, absolute_urls: bool = False) -> str:
|
@staticmethod
|
||||||
"""Use the tool."""
|
def scrape_page(page: Any, html_content: str, absolute_urls: bool) -> str:
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
page = await get_current_page(self.browser)
|
|
||||||
html_content = await page.content()
|
|
||||||
|
|
||||||
# Parse the HTML content with BeautifulSoup
|
# Parse the HTML content with BeautifulSoup
|
||||||
soup = BeautifulSoup(html_content, "lxml")
|
soup = BeautifulSoup(html_content, "lxml")
|
||||||
|
|
||||||
@ -59,6 +56,21 @@ class ExtractHyperlinksTool(BaseBrowserTool):
|
|||||||
links = [urljoin(base_url, anchor.get("href", "")) for anchor in anchors]
|
links = [urljoin(base_url, anchor.get("href", "")) for anchor in anchors]
|
||||||
else:
|
else:
|
||||||
links = [anchor.get("href", "") for anchor in anchors]
|
links = [anchor.get("href", "") for anchor in anchors]
|
||||||
|
|
||||||
# Return the list of links as a JSON string
|
# Return the list of links as a JSON string
|
||||||
return json.dumps(links)
|
return json.dumps(links)
|
||||||
|
|
||||||
|
def _run(self, absolute_urls: bool = False) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
if self.sync_browser is None:
|
||||||
|
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||||
|
page = get_current_page(self.sync_browser)
|
||||||
|
html_content = page.content()
|
||||||
|
return self.scrape_page(page, html_content, absolute_urls)
|
||||||
|
|
||||||
|
async def _arun(self, absolute_urls: bool = False) -> str:
|
||||||
|
"""Use the tool asynchronously."""
|
||||||
|
if self.async_browser is None:
|
||||||
|
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||||
|
page = await aget_current_page(self.async_browser)
|
||||||
|
html_content = await page.content()
|
||||||
|
return self.scrape_page(page, html_content, absolute_urls)
|
||||||
|
@ -5,7 +5,7 @@ from typing import Type
|
|||||||
from pydantic import BaseModel, root_validator
|
from pydantic import BaseModel, root_validator
|
||||||
|
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
from langchain.tools.playwright.base import BaseBrowserTool
|
||||||
from langchain.tools.playwright.utils import get_current_page
|
from langchain.tools.playwright.utils import aget_current_page, get_current_page
|
||||||
|
|
||||||
|
|
||||||
class ExtractTextTool(BaseBrowserTool):
|
class ExtractTextTool(BaseBrowserTool):
|
||||||
@ -14,7 +14,7 @@ class ExtractTextTool(BaseBrowserTool):
|
|||||||
args_schema: Type[BaseModel] = BaseModel
|
args_schema: Type[BaseModel] = BaseModel
|
||||||
|
|
||||||
@root_validator
|
@root_validator
|
||||||
def check_args(cls, values: dict) -> dict:
|
def check_acheck_bs_importrgs(cls, values: dict) -> dict:
|
||||||
"""Check that the arguments are valid."""
|
"""Check that the arguments are valid."""
|
||||||
try:
|
try:
|
||||||
from bs4 import BeautifulSoup # noqa: F401
|
from bs4 import BeautifulSoup # noqa: F401
|
||||||
@ -25,12 +25,30 @@ class ExtractTextTool(BaseBrowserTool):
|
|||||||
)
|
)
|
||||||
return values
|
return values
|
||||||
|
|
||||||
async def _arun(self) -> str:
|
def _run(self) -> str:
|
||||||
"""Use the tool."""
|
"""Use the tool."""
|
||||||
# Use Beautiful Soup since it's faster than looping through the elements
|
# Use Beautiful Soup since it's faster than looping through the elements
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
page = await get_current_page(self.browser)
|
if self.sync_browser is None:
|
||||||
|
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||||
|
|
||||||
|
page = get_current_page(self.sync_browser)
|
||||||
|
html_content = page.content()
|
||||||
|
|
||||||
|
# Parse the HTML content with BeautifulSoup
|
||||||
|
soup = BeautifulSoup(html_content, "lxml")
|
||||||
|
|
||||||
|
return " ".join(text for text in soup.stripped_strings)
|
||||||
|
|
||||||
|
async def _arun(self) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
if self.async_browser is None:
|
||||||
|
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||||
|
# Use Beautiful Soup since it's faster than looping through the elements
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
page = await aget_current_page(self.async_browser)
|
||||||
html_content = await page.content()
|
html_content = await page.content()
|
||||||
|
|
||||||
# Parse the HTML content with BeautifulSoup
|
# Parse the HTML content with BeautifulSoup
|
||||||
|
@ -6,10 +6,11 @@ from typing import TYPE_CHECKING, List, Optional, Sequence, Type
|
|||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
from langchain.tools.playwright.base import BaseBrowserTool
|
||||||
from langchain.tools.playwright.utils import get_current_page
|
from langchain.tools.playwright.utils import aget_current_page, get_current_page
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from playwright.async_api import Page as AsyncPage
|
from playwright.async_api import Page as AsyncPage
|
||||||
|
from playwright.sync_api import Page as SyncPage
|
||||||
|
|
||||||
|
|
||||||
class GetElementsToolInput(BaseModel):
|
class GetElementsToolInput(BaseModel):
|
||||||
@ -25,7 +26,7 @@ class GetElementsToolInput(BaseModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _get_elements(
|
async def _aget_elements(
|
||||||
page: AsyncPage, selector: str, attributes: Sequence[str]
|
page: AsyncPage, selector: str, attributes: Sequence[str]
|
||||||
) -> List[dict]:
|
) -> List[dict]:
|
||||||
"""Get elements matching the given CSS selector."""
|
"""Get elements matching the given CSS selector."""
|
||||||
@ -45,6 +46,26 @@ async def _get_elements(
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _get_elements(
|
||||||
|
page: SyncPage, selector: str, attributes: Sequence[str]
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Get elements matching the given CSS selector."""
|
||||||
|
elements = page.query_selector_all(selector)
|
||||||
|
results = []
|
||||||
|
for element in elements:
|
||||||
|
result = {}
|
||||||
|
for attribute in attributes:
|
||||||
|
if attribute == "innerText":
|
||||||
|
val: Optional[str] = element.inner_text()
|
||||||
|
else:
|
||||||
|
val = element.get_attribute(attribute)
|
||||||
|
if val is not None and val.strip() != "":
|
||||||
|
result[attribute] = val
|
||||||
|
if result:
|
||||||
|
results.append(result)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
class GetElementsTool(BaseBrowserTool):
|
class GetElementsTool(BaseBrowserTool):
|
||||||
name: str = "get_elements"
|
name: str = "get_elements"
|
||||||
description: str = (
|
description: str = (
|
||||||
@ -52,11 +73,22 @@ class GetElementsTool(BaseBrowserTool):
|
|||||||
)
|
)
|
||||||
args_schema: Type[BaseModel] = GetElementsToolInput
|
args_schema: Type[BaseModel] = GetElementsToolInput
|
||||||
|
|
||||||
|
def _run(self, selector: str, attributes: Sequence[str] = ["innerText"]) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
if self.sync_browser is None:
|
||||||
|
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||||
|
page = get_current_page(self.sync_browser)
|
||||||
|
# Navigate to the desired webpage before using this tool
|
||||||
|
results = _get_elements(page, selector, attributes)
|
||||||
|
return json.dumps(results)
|
||||||
|
|
||||||
async def _arun(
|
async def _arun(
|
||||||
self, selector: str, attributes: Sequence[str] = ["innerText"]
|
self, selector: str, attributes: Sequence[str] = ["innerText"]
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Use the tool."""
|
"""Use the tool."""
|
||||||
page = await get_current_page(self.browser)
|
if self.async_browser is None:
|
||||||
|
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||||
|
page = await aget_current_page(self.async_browser)
|
||||||
# Navigate to the desired webpage before using this tool
|
# Navigate to the desired webpage before using this tool
|
||||||
results = await _get_elements(page, selector, attributes)
|
results = await _aget_elements(page, selector, attributes)
|
||||||
return json.dumps(results)
|
return json.dumps(results)
|
||||||
|
@ -6,6 +6,7 @@ from pydantic import BaseModel, Field
|
|||||||
|
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
from langchain.tools.playwright.base import BaseBrowserTool
|
||||||
from langchain.tools.playwright.utils import (
|
from langchain.tools.playwright.utils import (
|
||||||
|
aget_current_page,
|
||||||
get_current_page,
|
get_current_page,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -21,9 +22,20 @@ class NavigateTool(BaseBrowserTool):
|
|||||||
description: str = "Navigate a browser to the specified URL"
|
description: str = "Navigate a browser to the specified URL"
|
||||||
args_schema: Type[BaseModel] = NavigateToolInput
|
args_schema: Type[BaseModel] = NavigateToolInput
|
||||||
|
|
||||||
|
def _run(self, url: str) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
if self.sync_browser is None:
|
||||||
|
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||||
|
page = get_current_page(self.sync_browser)
|
||||||
|
response = page.goto(url)
|
||||||
|
status = response.status if response else "unknown"
|
||||||
|
return f"Navigating to {url} returned status code {status}"
|
||||||
|
|
||||||
async def _arun(self, url: str) -> str:
|
async def _arun(self, url: str) -> str:
|
||||||
"""Use the tool."""
|
"""Use the tool."""
|
||||||
page = await get_current_page(self.browser)
|
if self.async_browser is None:
|
||||||
|
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||||
|
page = await aget_current_page(self.async_browser)
|
||||||
response = await page.goto(url)
|
response = await page.goto(url)
|
||||||
status = response.status if response else "unknown"
|
status = response.status if response else "unknown"
|
||||||
return f"Navigating to {url} returned status code {status}"
|
return f"Navigating to {url} returned status code {status}"
|
||||||
|
@ -6,6 +6,7 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from langchain.tools.playwright.base import BaseBrowserTool
|
from langchain.tools.playwright.base import BaseBrowserTool
|
||||||
from langchain.tools.playwright.utils import (
|
from langchain.tools.playwright.utils import (
|
||||||
|
aget_current_page,
|
||||||
get_current_page,
|
get_current_page,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -17,15 +18,32 @@ class NavigateBackTool(BaseBrowserTool):
|
|||||||
description: str = "Navigate back to the previous page in the browser history"
|
description: str = "Navigate back to the previous page in the browser history"
|
||||||
args_schema: Type[BaseModel] = BaseModel
|
args_schema: Type[BaseModel] = BaseModel
|
||||||
|
|
||||||
|
def _run(self) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
if self.sync_browser is None:
|
||||||
|
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||||
|
page = get_current_page(self.sync_browser)
|
||||||
|
response = page.go_back()
|
||||||
|
|
||||||
|
if response:
|
||||||
|
return (
|
||||||
|
f"Navigated back to the previous page with URL '{response.url}'."
|
||||||
|
f" Status code {response.status}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return "Unable to navigate back; no previous page in the history"
|
||||||
|
|
||||||
async def _arun(self) -> str:
|
async def _arun(self) -> str:
|
||||||
"""Use the tool."""
|
"""Use the tool."""
|
||||||
page = await get_current_page(self.browser)
|
if self.async_browser is None:
|
||||||
|
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||||
|
page = await aget_current_page(self.async_browser)
|
||||||
response = await page.go_back()
|
response = await page.go_back()
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
return (
|
return (
|
||||||
f"Navigated back to the previous page with URL '{response.url}'."
|
f"Navigated back to the previous page with URL '{response.url}'."
|
||||||
" Status code {response.status}"
|
f" Status code {response.status}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return "Unable to navigate back; no previous page in the history"
|
return "Unable to navigate back; no previous page in the history"
|
||||||
|
@ -7,9 +7,11 @@ from typing import TYPE_CHECKING, Any, Coroutine, TypeVar
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from playwright.async_api import Browser as AsyncBrowser
|
from playwright.async_api import Browser as AsyncBrowser
|
||||||
from playwright.async_api import Page as AsyncPage
|
from playwright.async_api import Page as AsyncPage
|
||||||
|
from playwright.sync_api import Browser as SyncBrowser
|
||||||
|
from playwright.sync_api import Page as SyncPage
|
||||||
|
|
||||||
|
|
||||||
async def get_current_page(browser: AsyncBrowser) -> AsyncPage:
|
async def aget_current_page(browser: AsyncBrowser) -> AsyncPage:
|
||||||
if not browser.contexts:
|
if not browser.contexts:
|
||||||
context = await browser.new_context()
|
context = await browser.new_context()
|
||||||
return await context.new_page()
|
return await context.new_page()
|
||||||
@ -20,13 +22,31 @@ async def get_current_page(browser: AsyncBrowser) -> AsyncPage:
|
|||||||
return context.pages[-1]
|
return context.pages[-1]
|
||||||
|
|
||||||
|
|
||||||
def create_playwright_browser() -> AsyncBrowser:
|
def get_current_page(browser: SyncBrowser) -> SyncPage:
|
||||||
|
if not browser.contexts:
|
||||||
|
context = browser.new_context()
|
||||||
|
return context.new_page()
|
||||||
|
context = browser.contexts[0] # Assuming you're using the default browser context
|
||||||
|
if not context.pages:
|
||||||
|
return context.new_page()
|
||||||
|
# Assuming the last page in the list is the active one
|
||||||
|
return context.pages[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def create_async_playwright_browser() -> AsyncBrowser:
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
browser = run_async(async_playwright().start())
|
browser = run_async(async_playwright().start())
|
||||||
return run_async(browser.chromium.launch(headless=True))
|
return run_async(browser.chromium.launch(headless=True))
|
||||||
|
|
||||||
|
|
||||||
|
def create_sync_playwright_browser() -> SyncBrowser:
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
browser = sync_playwright().start()
|
||||||
|
return browser.chromium.launch(headless=True)
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user