mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
Recursive url loader w/ test (#8813)
Description: Due to some issue on the test, this is a separate PR with the test for #8502 Tag maintainer: @rlancemartin --------- Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
cb5fb751e9
commit
6221eb5974
@ -0,0 +1,30 @@
|
||||
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
||||
|
||||
|
||||
def test_async_recursive_url_loader() -> None:
|
||||
url = "https://docs.python.org/3.9/"
|
||||
loader = RecursiveUrlLoader(
|
||||
url=url, extractor=lambda _: "placeholder", use_async=True, max_depth=1
|
||||
)
|
||||
docs = loader.load()
|
||||
assert len(docs) == 24
|
||||
assert docs[0].page_content == "placeholder"
|
||||
|
||||
|
||||
def test_sync_recursive_url_loader() -> None:
|
||||
url = "https://docs.python.org/3.9/"
|
||||
loader = RecursiveUrlLoader(
|
||||
url=url, extractor=lambda _: "placeholder", use_async=False, max_depth=1
|
||||
)
|
||||
docs = loader.load()
|
||||
assert len(docs) == 24
|
||||
assert docs[0].page_content == "placeholder"
|
||||
|
||||
|
||||
def test_loading_invalid_url() -> None:
|
||||
url = "https://this.url.is.invalid/this/is/a/test"
|
||||
loader = RecursiveUrlLoader(
|
||||
url=url, max_depth=1, extractor=lambda _: "placeholder", use_async=False
|
||||
)
|
||||
docs = loader.load()
|
||||
assert len(docs) == 0
|
Loading…
Reference in New Issue
Block a user