langchain/libs/community/tests/unit_tests/document_loaders/test_bibtex.py

62 lines
1.7 KiB
Python
Raw Normal View History

from pathlib import Path
import pytest
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2023-12-11 21:53:30 +00:00
from langchain_community.document_loaders.bibtex import BibtexLoader
BIBTEX_EXAMPLE_FILE = Path(__file__).parent / "sample_documents" / "bibtex.bib"
@pytest.mark.requires("fitz", "bibtexparser")
def test_load_success() -> None:
"""Test that returns one document"""
loader = BibtexLoader(file_path=str(BIBTEX_EXAMPLE_FILE))
docs = loader.load()
assert len(docs) == 1
doc = docs[0]
assert doc.page_content
assert set(doc.metadata) == {
"id",
"published_year",
"title",
"publication",
"authors",
"abstract",
}
@pytest.mark.requires("fitz", "bibtexparser")
def test_load_max_content_chars() -> None:
"""Test that cuts off document contents at max_content_chars."""
loader = BibtexLoader(file_path=str(BIBTEX_EXAMPLE_FILE), max_content_chars=10)
doc = loader.load()[0]
assert len(doc.page_content) == 10
@pytest.mark.requires("fitz", "bibtexparser")
def test_load_load_extra_metadata() -> None:
"""Test that returns extra metadata fields."""
loader = BibtexLoader(file_path=str(BIBTEX_EXAMPLE_FILE), load_extra_metadata=True)
doc = loader.load()[0]
assert set(doc.metadata) == {
"id",
"published_year",
"title",
"publication",
"authors",
"abstract",
"booktitle",
"editor",
"organization",
}
@pytest.mark.requires("fitz", "bibtexparser")
def test_load_file_pattern() -> None:
"""Test that returns no documents when json file pattern specified."""
loader = BibtexLoader(
file_path=str(BIBTEX_EXAMPLE_FILE), file_pattern=r"[^:]+\.json"
)
docs = loader.load()
assert len(docs) == 0