From 191da06d9bf9854c3e75b95b0bbead9f486d2778 Mon Sep 17 00:00:00 2001 From: blob42 Date: Fri, 12 May 2023 19:51:11 +0200 Subject: [PATCH] move tests to unit tests (#4479) --- pyproject.toml | 2 +- .../document_loaders/test_dir_text_loader.py | 25 ------------------ .../document_loader/test_text_loader.py | 26 +++++++++++++++++++ .../examples/example-non-utf8.txt | 0 .../examples/example-utf8.txt | 0 5 files changed, 27 insertions(+), 26 deletions(-) delete mode 100644 tests/integration_tests/document_loaders/test_dir_text_loader.py create mode 100644 tests/unit_tests/document_loader/test_text_loader.py rename tests/{integration_tests => unit_tests}/examples/example-non-utf8.txt (100%) rename tests/{integration_tests => unit_tests}/examples/example-utf8.txt (100%) diff --git a/pyproject.toml b/pyproject.toml index ea43cda9..91297924 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,7 +174,7 @@ azure = ["azure-identity", "azure-cosmos", "openai", "azure-core"] all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "protobuf", "hnswlib", "chardet"] # An extra used to be able to add extended testing. extended_testing = [ - "pypdf", "pdfminer.six", "tqdm" + "pypdf", "pdfminer.six", "tqdm", "chardet" ] [tool.ruff] diff --git a/tests/integration_tests/document_loaders/test_dir_text_loader.py b/tests/integration_tests/document_loaders/test_dir_text_loader.py deleted file mode 100644 index 3f179ce3..00000000 --- a/tests/integration_tests/document_loaders/test_dir_text_loader.py +++ /dev/null @@ -1,25 +0,0 @@ -from pathlib import Path - -import pytest - -from langchain.document_loaders import DirectoryLoader, TextLoader - - -@pytest.mark.requires("chardet") -def test_text_loader() -> None: - """Test text loader.""" - path = Path(__file__).parent.parent / "examples" - files = path.glob("**/*.txt") - loader = DirectoryLoader(str(path), glob="**/*.txt", loader_cls=TextLoader) - loader_detect_encoding = DirectoryLoader( - str(path), - glob="**/*.txt", - loader_kwargs={"autodetect_encoding": True}, - loader_cls=TextLoader, - ) - - with pytest.raises((UnicodeDecodeError, RuntimeError)): - loader.load() - - docs = loader_detect_encoding.load() - assert len(docs) == len(list(files)) diff --git a/tests/unit_tests/document_loader/test_text_loader.py b/tests/unit_tests/document_loader/test_text_loader.py new file mode 100644 index 00000000..79187ed9 --- /dev/null +++ b/tests/unit_tests/document_loader/test_text_loader.py @@ -0,0 +1,26 @@ +from pathlib import Path + +import pytest + +from langchain.document_loaders import DirectoryLoader, TextLoader + +class TestTextLoader: + + @pytest.mark.requires("chardet") + def test_load_directory(self) -> None: + """Test text loader.""" + path = Path(__file__).parent.parent / "examples" + files = path.glob("**/*.txt") + loader = DirectoryLoader(str(path), glob="**/*.txt", loader_cls=TextLoader) + loader_detect_encoding = DirectoryLoader( + str(path), + glob="**/*.txt", + loader_kwargs={"autodetect_encoding": True}, + loader_cls=TextLoader, + ) + + with pytest.raises((UnicodeDecodeError, RuntimeError)): + loader.load() + + docs = loader_detect_encoding.load() + assert len(docs) == len(list(files)) diff --git a/tests/integration_tests/examples/example-non-utf8.txt b/tests/unit_tests/examples/example-non-utf8.txt similarity index 100% rename from tests/integration_tests/examples/example-non-utf8.txt rename to tests/unit_tests/examples/example-non-utf8.txt diff --git a/tests/integration_tests/examples/example-utf8.txt b/tests/unit_tests/examples/example-utf8.txt similarity index 100% rename from tests/integration_tests/examples/example-utf8.txt rename to tests/unit_tests/examples/example-utf8.txt