langchain/docs/modules/agents/toolkits/examples/playwright.ipynb
Zander Chase 491c27f861
PlayWright Web Browser Toolkit (#3262)
Adds a PlayWright web browser toolkit with the following tools:

- NavigateTool (navigate_browser) - navigate to a URL
- NavigateBackTool (previous_page) - wait for an element to appear
- ClickTool (click_element) - click on an element (specified by
selector)
- ExtractTextTool (extract_text) - use beautiful soup to extract text
from the current web page
- ExtractHyperlinksTool (extract_hyperlinks) - use beautiful soup to
extract hyperlinks from the current web page
- GetElementsTool (get_elements) - select elements by CSS selector
- CurrentPageTool (current_page) - get the current page URL
2023-04-28 10:42:44 -07:00

180 lines
17 KiB
Plaintext

{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# PlayWright Browser Toolkit\n",
"\n",
"This toolkit is used to interact with the browser. While other tools (like the Requests tools) are fine for static sites, Browser toolkits let your agent navigate the web and interact with dynamically rendered sites. Some tools bundled within the Browser toolkit include:\n",
"\n",
"- NavigateTool (navigate_browser) - navigate to a URL\n",
"- NavigateBackTool (previous_page) - wait for an element to appear\n",
"- ClickTool (click_element) - click on an element (specified by selector)\n",
"- ExtractTextTool (extract_text) - use beautiful soup to extract text from the current web page\n",
"- ExtractHyperlinksTool (extract_hyperlinks) - use beautiful soup to extract hyperlinks from the current web page\n",
"- GetElementsTool (get_elements) - select elements by CSS selector\n",
"- CurrentPageTool (current_page) - get the current page URL\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# !pip install playwright > /dev/null\n",
"# !pip install lxml\n",
"\n",
"# If this is your first time using playwright, you'll have to install a browser executable.\n",
"# Running `playwright install` by default installs a chromium browser executable.\n",
"# playwright install"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[ClickTool(name='click_element', description='Click on an element with the given CSS selector', args_schema=<class 'langchain.tools.playwright.click.ClickToolInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x11cd0b790>, browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" NavigateTool(name='navigate_browser', description='Navigate a browser to the specified URL', args_schema=<class 'langchain.tools.playwright.navigate.NavigateToolInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x11cd0b790>, browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" NavigateBackTool(name='previous_webpage', description='Navigate back to the previous page in the browser history', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x11cd0b790>, browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" ExtractTextTool(name='extract_text', description='Extract all the text on the current webpage', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x11cd0b790>, browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" ExtractHyperlinksTool(name='extract_hyperlinks', description='Extract all hyperlinks on the current webpage', args_schema=<class 'langchain.tools.playwright.extract_hyperlinks.ExtractHyperlinksToolInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x11cd0b790>, browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" GetElementsTool(name='get_elements', description='Retrieve elements in the current web page matching the given CSS selector', args_schema=<class 'langchain.tools.playwright.get_elements.GetElementsToolInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x11cd0b790>, browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" CurrentWebPageTool(name='current_webpage', description='Returns the URL of the current page', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x11cd0b790>, browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>)]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# This import is required only for jupyter notebooks, since they have their own eventloop\n",
"import nest_asyncio\n",
"nest_asyncio.apply()\n",
"\n",
"toolkit = PlayWrightBrowserToolkit()\n",
"tools = toolkit.get_tools()\n",
"tools"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"tools_by_name = {tool.name: tool for tool in tools}\n",
"navigate_tool = tools_by_name[\"navigate_browser\"]\n",
"get_elements_tool = tools_by_name[\"get_elements\"]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Navigating to https://web.archive.org/web/20230428131116/https://www.cnn.com/world returned status code 200'"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"navigate_tool.run({\"url\": \"https://web.archive.org/web/20230428131116/https://www.cnn.com/world\"})"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'[{\"innerText\": \"As US and Philippine defense ties grow, China warns over Taiwan tensions\"}, {\"innerText\": \"Almost two-thirds of elephant habitat lost across Asia, study finds\"}, {\"innerText\": \"\\\\u2018We don\\\\u2019t sleep \\\\u2026 I would call it fainting\\\\u2019: Working as a doctor in Sudan\\\\u2019s crisis\"}, {\"innerText\": \"Kenya arrests second pastor to face criminal charges \\\\u2018related to mass killing of his followers\\\\u2019\"}, {\"innerText\": \"Ocean census aims to discover 100,000 previously unknown marine species\"}, {\"innerText\": \"Iran\\\\u2019s Navy seizes Marshall Islands-flagged ship\"}, {\"innerText\": \"German landlord wins right to sunbathe naked despite complaints from tenants\"}, {\"innerText\": \"Single people should be \\\\u2018valued\\\\u2019 as Jesus was single, Church of England says\"}, {\"innerText\": \"Turkey\\\\u2019s Erdogan cancels public appearances after falling ill as election nears\"}, {\"innerText\": \"Drought-stricken Spain braces for exceptionally high temperatures expected to break April records\"}, {\"innerText\": \"With Zelensky call, Xi Jinping steps up bid to broker peace \\\\u2013 but does he have a plan?\"}, {\"innerText\": \"Indian and Chinese defense ministers to meet face to face\"}, {\"innerText\": \"Pope to allow women to vote at global bishops meeting\"}, {\"innerText\": \"Catastrophic drought that\\\\u2019s pushed millions into crisis made 100 times more likely by climate change, analysis finds\"}, {\"innerText\": \"\\\\u2018Bring Ya Ya home\\\\u2019: How a panda in the US turbocharged Chinese nationalist sentiment\"}, {\"innerText\": \"\\\\u2018Often they shoot at each other\\\\u2019: Ukrainian drone operator details chaos in Russian ranks\"}, {\"innerText\": \"U.S. talk show host Jerry Springer dies at 79\"}, {\"innerText\": \"Girl to get life-saving treatment for rare immune disease\"}, {\"innerText\": \"Wall Street Journal editor discusses reporter\\\\u2019s arrest in Moscow\"}, {\"innerText\": \"Belgium destroys shipment of American beer after taking issue with \\\\u2018Champagne of Beer\\\\u2019 slogan\"}, {\"innerText\": \"UK Prime Minister Rishi Sunak rocked by resignation of top ally Raab over bullying allegations\"}, {\"innerText\": \"Coronation mishaps King Charles III will want to avoid\"}, {\"innerText\": \"Russian jet accidentally drops bomb on Russian city of Belgorod, state media says\"}, {\"innerText\": \"Queen Camilla\\\\u2019s son, Tom Parker Bowles, says his mother \\\\u2018married the person she loved\\\\u2019\"}, {\"innerText\": \"These Iranian activists fled for freedom. The regime still managed to find them\"}, {\"innerText\": \"A divided Israel stands at a perilous crossroads on its 75th birthday\"}, {\"innerText\": \"Palestinian reporter breaks barriers by reporting in Hebrew on Israeli TV\"}, {\"innerText\": \"One-fifth of water pollution comes from textile dyes. But a shellfish-inspired solution could clean it up\"}, {\"innerText\": \"\\\\u2018People sacrificed their lives for just\\\\u00a010 dollars\\\\u2019: At least 78 killed in Yemen crowd surge\"}, {\"innerText\": \"Israeli police say two men shot near Jewish tomb in Jerusalem in suspected \\\\u2018terror attack\\\\u2019\"}, {\"innerText\": \"Houthis try to reassure skeptics they won\\\\u2019t seek full control of Yemen, as Saudis eye exit\"}, {\"innerText\": \"The week in 33 photos\"}, {\"innerText\": \"Hong Kong\\\\u2019s endangered turtles\"}, {\"innerText\": \"In pictures: Britain\\\\u2019s Queen Camilla\"}, {\"innerText\": \"In pictures: Charles and Camilla\"}, {\"innerText\": \"For years, a UK mining giant was untouchable in Zambia for pollution until a former miner\\\\u2019s son took them on\"}, {\"innerText\": \"Former Sudanese minister Ahmed Haroun wanted on war crimes charges freed from Khartoum prison\"}, {\"innerText\": \"WHO warns of \\\\u2018biological risk\\\\u2019 after Sudan fighters seize lab, as violence mars US-brokered ceasefire\"}, {\"innerText\": \"Rival generals are battling for control in Sudan. Here\\\\u2019s a simple guide to the fighting\"}, {\"innerText\": \"How Colombia\\\\u2019s Petro, a former leftwing guerrilla, found his opening in Washington\"}, {\"innerText\": \"Bolsonaro accidentally created Facebook post questioning Brazil election results, say his attorneys\"}, {\"innerText\": \"Crowd kills over a dozen suspected gang members in Haiti\"}, {\"innerText\": \"Thousands of tequila bottles containing liquid meth seized\"}, {\"innerText\": \"Why send a US stealth submarine to South Korea \\\\u2013 and tell the world about it?\"}, {\"innerText\": \"Fukushima\\\\u2019s fishing industry survived a nuclear disaster. 12 years on, it fears Tokyo\\\\u2019s next move may finish it off\"}, {\"innerText\": \"Singapore executes man for trafficking two pounds of cannabis\"}, {\"innerText\": \"Conservative Thai party looks to woo voters with promise to legalize sex toys\"}, {\"innerText\": \"Watch planes take off in Japan \\\\u2014 from an onsen\"}, {\"innerText\": \"Bilt\\\\u2019s May Rent Day promotion: Fly to Europe for as few as 6,000 Bilt points\"}, {\"innerText\": \"Cabeau just dropped the Evolution Earth, a new eco-minded travel pillow\"}, {\"innerText\": \"Nemo\\\\u2019s Garden: The future of farming could be under the sea\"}, {\"innerText\": \"Cadence\\\\u2019s cult-favorite travel capsules are now available in more sizes\"}, {\"innerText\": \"Judy Blume\\\\u2019s books were formative for generations of readers. Here\\\\u2019s why they endure\"}, {\"innerText\": \"Craft, salvage and sustainability take center stage at Milan Design Week\"}, {\"innerText\": \"Life-sized chocolate King Charles III sculpture unveiled to celebrate coronation\"}, {\"innerText\": \"Rock legend Freddie Mercury\\\\u2019s personal possessions are going up for auction\"}, {\"innerText\": \"John Travolta\\\\u2019s white \\\\u2018Saturday Night Fever\\\\u2019 suit fetches $260K at auction\"}, {\"innerText\": \"The South is in the crosshairs of severe weather again, as the multi-day threat of large hail and tornadoes continues\"}, {\"innerText\": \"Spring snowmelt has cities along the Mississippi bracing for flooding in homes and businesses\"}, {\"innerText\": \"Know the difference between a tornado watch, a tornado warning and a tornado emergency\"}, {\"innerText\": \"Large hail drops on parts of Texas and Florida as South remains at risk of severe storms\"}, {\"innerText\": \"House Republicans adopt bill raising U.S. debt limit and cutting spending\"}, {\"innerText\": \"Judge puts hold on Missouri rule limiting gender-affirming care\"}, {\"innerText\": \"Eleven people killed in suspected Maoist militant attack in central India\"}, {\"innerText\": \"Prosecutors tell judge intel the Air National Guardsman took \\\\u2018far exceeds\\\\u2019 what has been reported\"}, {\"innerText\": \"The son of a Sudanese doctor killed in a mortar attack speaks with Rosemary Church\"}, {\"innerText\": \"Melting snow worsens flooding along the Mississippi River\"}, {\"innerText\": \"Writer E. Jean Carroll testifies in civil suit against Donald Trump\"}, {\"innerText\": \"Nepalese authorities issue record number of Everest permits\"}, {\"innerText\": \"Cruise passenger disappears overboard during trip from Australia to Hawaii\"}, {\"innerText\": \"Watch South Korean president sing \\\\u2018American Pie\\\\u2019 for Biden\"}, {\"innerText\": \"See Russian fighter jet on fire after blowing up mid-flight\"}, {\"innerText\": \"Disney Sues Florida Governor Ron DeSantis\"}, {\"innerText\": \"Yasmeen Lari, \\\\u2018starchitect\\\\u2019 turned social engineer, wins one of architecture\\\\u2019s most coveted prizes\"}, {\"innerText\": \"A massive, newly restored Frank Lloyd Wright mansion is up for sale\"}, {\"innerText\": \"Are these the most sustainable architectural projects in the world?\"}, {\"innerText\": \"Step inside a $72 million London townhouse in a converted army barracks\"}, {\"innerText\": \"A 3D-printing company is preparing to build on the lunar surface. But first, a moonshot at home\"}, {\"innerText\": \"Carolina Panthers select QB Bryce Young with first pick of NFL Draft\"}, {\"innerText\": \"Brittney Griner says she\\\\u2019ll \\\\u2018never go overseas again\\\\u2019 to play unless it\\\\u2019s for the Olympics after being detained in Russia\"}, {\"innerText\": \"Pel\\\\u00e9 added to Portuguese dictionary as an adjective for \\\\u2018out of the ordinary\\\\u2019\"}, {\"innerText\": \"Players reimbursing fans and the interim manager getting sacked: How Tottenham Hotspur fell into disrepair\"}, {\"innerText\": \"This CNN Hero is recruiting recreational divers to help rebuild reefs in Florida one coral at a time\"}, {\"innerText\": \"This CNN Hero offers judgment-free veterinary care for the pets of those experiencing homelessness\"}, {\"innerText\": \"Don\\\\u2019t give up on milestones: A CNN Hero\\\\u2019s message for Autism Awareness Month\"}, {\"innerText\": \"CNN Hero of the Year Nelly Cheboi returned to Kenya with plans to lift more students out of poverty\"}]'"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The browser is shared across tools, so the agent can interact in a stateful manner\n",
"get_elements_tool.run({\"selector\": \".container__headline\", \"attributes\": [\"innerText\"]})"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'https://web.archive.org/web/20230428033754/https://www.cnn.com/world'"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# If the agent wants to remember the current webpage, it can use the `current_webpage` tool\n",
"tools_by_name['current_webpage'].run({})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}