forked from Archives/langchain
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
26 lines
733 B
Python
26 lines
733 B
Python
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from langchain.document_loaders import DirectoryLoader, TextLoader
|
|
|
|
|
|
@pytest.mark.requires("chardet")
|
|
def test_text_loader_detect_encodings() -> None:
|
|
"""Test text loader."""
|
|
path = Path(__file__).parent.parent / "examples"
|
|
files = path.glob("**/*.txt")
|
|
loader = DirectoryLoader(str(path), glob="**/*.txt", loader_cls=TextLoader)
|
|
loader_detect_encoding = DirectoryLoader(
|
|
str(path),
|
|
glob="**/*.txt",
|
|
loader_kwargs={"autodetect_encoding": True},
|
|
loader_cls=TextLoader,
|
|
)
|
|
|
|
with pytest.raises((UnicodeDecodeError, RuntimeError)):
|
|
loader.load()
|
|
|
|
docs = loader_detect_encoding.load()
|
|
assert len(docs) == len(list(files))
|