You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/tests/unit_tests/document_loaders/test_mhtml.py

26 lines
664 B
Python

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

from pathlib import Path
import pytest
from langchain_community.document_loaders.mhtml import MHTMLLoader
HERE = Path(__file__).parent
EXAMPLES = HERE.parent.parent / "integration_tests" / "examples"
@pytest.mark.requires("bs4", "lxml")
def test_mhtml_loader() -> None:
"""Test mhtml loader."""
file_path = EXAMPLES / "example.mht"
loader = MHTMLLoader(str(file_path))
docs = loader.load()
assert len(docs) == 1
metadata = docs[0].metadata
content = docs[0].page_content
assert metadata["title"] == "LangChain"
assert metadata["source"] == str(file_path)
assert "LANG CHAIN 🦜🔗Official Home Page" in content