You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/tests/integration_tests/document_loaders/test_docusaurus.py

44 lines
1.3 KiB
Python

from pathlib import Path
from langchain_community.document_loaders import DocusaurusLoader
DOCS_URL = str(Path(__file__).parent.parent / "examples/docusaurus-sitemap.xml")
def test_docusarus() -> None:
"""Test sitemap loader."""
loader = DocusaurusLoader(DOCS_URL, is_local=True)
documents = loader.load()
assert len(documents) > 1
assert "🦜️🔗 Langchain" in documents[0].page_content
def test_filter_docusaurus_sitemap() -> None:
"""Test sitemap loader."""
loader = DocusaurusLoader(
DOCS_URL,
is_local=True,
filter_urls=[
"https://python.langchain.com/docs/integrations/document_loaders/sitemap"
],
)
documents = loader.load()
assert len(documents) == 1
assert "SitemapLoader" in documents[0].page_content
def test_docusarus_metadata() -> None:
def sitemap_metadata_one(meta: dict, _content: None) -> dict:
return {**meta, "mykey": "Super Important Metadata"}
"""Test sitemap loader."""
loader = DocusaurusLoader(
DOCS_URL,
is_local=True,
meta_function=sitemap_metadata_one,
)
documents = loader.load()
assert len(documents) > 1
assert "mykey" in documents[0].metadata
assert "Super Important Metadata" in documents[0].metadata["mykey"]