mirror of https://github.com/hwchase17/langchain
Harrison/playwright selector (#3185)
Co-authored-by: zhyuri <4649294+zhyuri@users.noreply.github.com>pull/2885/head^2
parent
68cd37175e
commit
9181cd9b22
@ -0,0 +1,21 @@
|
||||
"""Tests for the Playwright URL loader"""
|
||||
|
||||
from langchain.document_loaders import PlaywrightURLLoader
|
||||
|
||||
|
||||
def test_playwright_url_loader() -> None:
|
||||
"""Test Playwright URL loader."""
|
||||
urls = [
|
||||
"https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||
"https://goo.gl/maps/NDSHwePEyaHMFGwh8",
|
||||
"https://techmeme.com",
|
||||
"https://techcrunch.com",
|
||||
]
|
||||
loader = PlaywrightURLLoader(
|
||||
urls=urls,
|
||||
remove_selectors=["header", "footer"],
|
||||
continue_on_failure=False,
|
||||
headless=True,
|
||||
)
|
||||
docs = loader.load()
|
||||
assert len(docs) > 0
|
Loading…
Reference in New Issue