Turn on extended tests (#4588)

# Turn on strict extended tests

This PR turns on strict testing for extended tests.
parallel_dir_loader
Eugene Yurtsev 1 year ago committed by GitHub
parent d96f6a106b
commit 08ed927c32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -40,5 +40,9 @@ jobs:
fi
- name: Run ${{matrix.test_type}} tests
run: |
make test
if [ "${{ matrix.test_type }}" == "core" ]; then
make test
else
make extended_tests
fi
shell: bash

@ -1,4 +1,4 @@
.PHONY: all clean format lint test tests test_watch integration_tests docker_tests help
.PHONY: all clean format lint test tests test_watch integration_tests docker_tests help extended_tests
all: help
@ -40,6 +40,9 @@ test:
tests:
poetry run pytest $(TEST_FILE)
extended_tests:
poetry run pytest --only-extended tests/unit_tests
test_watch:
poetry run ptw --now . -- tests/unit_tests
@ -59,7 +62,9 @@ help:
@echo 'format - run code formatters'
@echo 'lint - run linters'
@echo 'test - run unit tests'
@echo 'test - run unit tests'
@echo 'test TEST_FILE=<test_file> - run all tests in file'
@echo 'extended_tests - run only extended unit tests'
@echo 'test_watch - run unit tests in watch mode'
@echo 'integration_tests - run integration tests'
@echo 'docker_tests - run unit tests in docker'

12
poetry.lock generated

@ -419,7 +419,7 @@ name = "arxiv"
version = "1.4.7"
description = "Python wrapper for the arXiv API: http://arxiv.org/help/api/"
category = "main"
optional = true
optional = false
python-versions = ">=3.7"
files = [
{file = "arxiv-1.4.7-py3-none-any.whl", hash = "sha256:22b8f610957bb6859a25fac9dc205ab6ba76d521791119a5762ea52625e398a0"},
@ -1896,7 +1896,7 @@ name = "feedparser"
version = "6.0.10"
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
category = "main"
optional = true
optional = false
python-versions = ">=3.6"
files = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
@ -7656,7 +7656,7 @@ name = "sgmllib3k"
version = "1.0.0"
description = "Py3k port of sgmllib."
category = "main"
optional = true
optional = false
python-versions = "*"
files = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
@ -9998,14 +9998,14 @@ all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api
azure = ["azure-core", "azure-cosmos", "azure-identity", "openai"]
cohere = ["cohere"]
embeddings = ["sentence-transformers"]
extended-testing = ["pdfminer-six", "pypdf"]
extended-testing = ["pdfminer-six", "pypdf", "tqdm"]
hnswlib = ["docarray", "hnswlib", "protobuf"]
in-memory-store = ["docarray"]
llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"]
openai = ["openai"]
openai = ["openai", "tiktoken"]
qdrant = ["qdrant-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "62b7e066979d91e6baf921af79ac1fd0f44d9c0809b697dd511ac7c0fb3a09cc"
content-hash = "6d5c4aa06539e6f7c7531c30d73cbf08fbdea75486bf4b81c106b9e678a13b45"

@ -170,7 +170,9 @@ embeddings = ["sentence-transformers"]
azure = ["azure-identity", "azure-cosmos", "openai", "azure-core"]
all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "protobuf", "hnswlib"]
# An extra used to be able to add extended testing.
extended_testing = ["pypdf", "pdfminer.six"]
extended_testing = [
"pypdf", "pdfminer.six", "tqdm"
]
[tool.ruff]
select = [

@ -7,8 +7,6 @@ from langchain.document_loaders.base import BaseBlobParser
from langchain.document_loaders.blob_loaders import Blob
from langchain.document_loaders.parsers.pdf import (
PDFMinerParser,
PyMuPDFParser,
PyPDFium2Parser,
PyPDFParser,
)
from tests.data import HELLO_PDF, LAYOUT_PARSER_PAPER_PDF
@ -53,12 +51,6 @@ def _assert_with_parser(parser: BaseBlobParser, splits_by_page: bool = True) ->
assert metadata["page"] == 0
@pytest.mark.requires("fitz")
def test_pymupdf_loader() -> None:
"""Test PyMuPDF loader."""
_assert_with_parser(PyMuPDFParser())
@pytest.mark.requires("pypdf")
def test_pypdf_parser() -> None:
"""Test PyPDF parser."""
@ -70,10 +62,3 @@ def test_pdfminer_parser() -> None:
"""Test PDFMiner parser."""
# Does not follow defaults to split by page.
_assert_with_parser(PDFMinerParser(), splits_by_page=False)
@pytest.mark.requires("pypdfium2")
def test_pypdfium2_parser() -> None:
"""Test PyPDFium2 parser."""
# Does not follow defaults to split by page.
_assert_with_parser(PyPDFium2Parser())

Loading…
Cancel
Save