mirror of https://github.com/hwchase17/langchain
standard-tests[minor]: Add standard read write test suite for vectorstores (#23355)
Add standard read write test suite for vectorstorespull/20260/head
parent
3b3ed72d35
commit
1e750f12f6
@ -0,0 +1,301 @@
|
|||||||
|
"""Test suite to test vectostores."""
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_core.embeddings.fake import DeterministicFakeEmbedding, Embeddings
|
||||||
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
|
# Arbitrarily chosen. Using a small embedding size
|
||||||
|
# so tests are faster and easier to debug.
|
||||||
|
EMBEDDING_SIZE = 6
|
||||||
|
|
||||||
|
|
||||||
|
class ReadWriteTestSuite(ABC):
|
||||||
|
"""Test suite for checking the read-write API of a vectorstore.
|
||||||
|
|
||||||
|
This test suite verifies the basic read-write API of a vectorstore.
|
||||||
|
|
||||||
|
The test suite is designed for synchronous vectorstores.
|
||||||
|
|
||||||
|
Implementers should subclass this test suite and provide a fixture
|
||||||
|
that returns an empty vectorstore for each test.
|
||||||
|
|
||||||
|
The fixture should use the `get_embeddings` method to get a pre-defined
|
||||||
|
embeddings model that should be used for this test suite.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
@pytest.fixture
|
||||||
|
def vectorstore(self) -> VectorStore:
|
||||||
|
"""Get the vectorstore class to test.
|
||||||
|
|
||||||
|
The returned vectorstore should be EMPTY.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_embeddings() -> Embeddings:
|
||||||
|
"""A pre-defined embeddings model that should be used for this test."""
|
||||||
|
return DeterministicFakeEmbedding(
|
||||||
|
size=EMBEDDING_SIZE,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test that the vectorstore is empty."""
|
||||||
|
assert vectorstore.similarity_search("foo", k=1) == []
|
||||||
|
|
||||||
|
def test_add_documents(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test adding documents into the vectorstore."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
vectorstore.add_documents(documents)
|
||||||
|
documents = vectorstore.similarity_search("bar", k=2)
|
||||||
|
assert documents == [
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""This test should follow a test that adds documents.
|
||||||
|
|
||||||
|
This just verifies that the fixture is set up properly to be empty
|
||||||
|
after each test.
|
||||||
|
"""
|
||||||
|
assert vectorstore.similarity_search("foo", k=1) == []
|
||||||
|
|
||||||
|
def test_deleting_documents(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test deleting documents from the vectorstore."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
vectorstore.add_documents(documents, ids=["1", "2"])
|
||||||
|
vectorstore.delete(["1"])
|
||||||
|
documents = vectorstore.similarity_search("foo", k=1)
|
||||||
|
assert documents == [Document(page_content="bar", metadata={"id": 2})]
|
||||||
|
|
||||||
|
def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test that we can delete several documents at once."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Document(page_content="baz", metadata={"id": 3}),
|
||||||
|
]
|
||||||
|
|
||||||
|
vectorstore.add_documents(documents, ids=["1", "2", "3"])
|
||||||
|
vectorstore.delete(["1", "2"])
|
||||||
|
documents = vectorstore.similarity_search("foo", k=1)
|
||||||
|
assert documents == [Document(page_content="baz", metadata={"id": 3})]
|
||||||
|
|
||||||
|
def test_delete_missing_content(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Deleting missing content should not raise an exception."""
|
||||||
|
vectorstore.delete(["1"])
|
||||||
|
vectorstore.delete(["1", "2", "3"])
|
||||||
|
|
||||||
|
def test_add_documents_with_ids_is_idempotent(
|
||||||
|
self, vectorstore: VectorStore
|
||||||
|
) -> None:
|
||||||
|
"""Adding by ID should be idempotent."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
vectorstore.add_documents(documents, ids=["1", "2"])
|
||||||
|
vectorstore.add_documents(documents, ids=["1", "2"])
|
||||||
|
documents = vectorstore.similarity_search("bar", k=2)
|
||||||
|
assert documents == [
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_add_documents_without_ids_gets_duplicated(
|
||||||
|
self, vectorstore: VectorStore
|
||||||
|
) -> None:
|
||||||
|
"""Adding documents without specifying IDs should duplicate content."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
|
||||||
|
vectorstore.add_documents(documents)
|
||||||
|
vectorstore.add_documents(documents)
|
||||||
|
documents = vectorstore.similarity_search("bar", k=2)
|
||||||
|
assert documents == [
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_add_documents_by_id_with_mutation(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test that we can overwrite by ID using add_documents."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
|
||||||
|
vectorstore.add_documents(documents=documents, ids=["1", "2"])
|
||||||
|
|
||||||
|
# Now over-write content of ID 1
|
||||||
|
new_documents = [
|
||||||
|
Document(
|
||||||
|
page_content="new foo", metadata={"id": 1, "some_other_field": "foo"}
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
vectorstore.add_documents(documents=new_documents, ids=["1"])
|
||||||
|
|
||||||
|
# Check that the content has been updated
|
||||||
|
documents = vectorstore.similarity_search("new foo", k=2)
|
||||||
|
assert documents == [
|
||||||
|
Document(
|
||||||
|
page_content="new foo", metadata={"id": 1, "some_other_field": "foo"}
|
||||||
|
),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncReadWriteTestSuite(ABC):
|
||||||
|
"""Test suite for checking the **async** read-write API of a vectorstore.
|
||||||
|
|
||||||
|
This test suite verifies the basic read-write API of a vectorstore.
|
||||||
|
|
||||||
|
The test suite is designed for asynchronous vectorstores.
|
||||||
|
|
||||||
|
Implementers should subclass this test suite and provide a fixture
|
||||||
|
that returns an empty vectorstore for each test.
|
||||||
|
|
||||||
|
The fixture should use the `get_embeddings` method to get a pre-defined
|
||||||
|
embeddings model that should be used for this test suite.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
@pytest.fixture
|
||||||
|
async def vectorstore(self) -> VectorStore:
|
||||||
|
"""Get the vectorstore class to test.
|
||||||
|
|
||||||
|
The returned vectorstore should be EMPTY.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_embeddings() -> Embeddings:
|
||||||
|
"""A pre-defined embeddings model that should be used for this test."""
|
||||||
|
return DeterministicFakeEmbedding(
|
||||||
|
size=EMBEDDING_SIZE,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test that the vectorstore is empty."""
|
||||||
|
assert await vectorstore.asimilarity_search("foo", k=1) == []
|
||||||
|
|
||||||
|
async def test_add_documents(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test adding documents into the vectorstore."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
await vectorstore.aadd_documents(documents)
|
||||||
|
documents = await vectorstore.asimilarity_search("bar", k=2)
|
||||||
|
assert documents == [
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""This test should follow a test that adds documents.
|
||||||
|
|
||||||
|
This just verifies that the fixture is set up properly to be empty
|
||||||
|
after each test.
|
||||||
|
"""
|
||||||
|
assert await vectorstore.asimilarity_search("foo", k=1) == []
|
||||||
|
|
||||||
|
async def test_deleting_documents(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test deleting documents from the vectorstore."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
await vectorstore.aadd_documents(documents, ids=["1", "2"])
|
||||||
|
await vectorstore.adelete(["1"])
|
||||||
|
documents = await vectorstore.asimilarity_search("foo", k=1)
|
||||||
|
assert documents == [Document(page_content="bar", metadata={"id": 2})]
|
||||||
|
|
||||||
|
async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Test that we can delete several documents at once."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Document(page_content="baz", metadata={"id": 3}),
|
||||||
|
]
|
||||||
|
|
||||||
|
await vectorstore.aadd_documents(documents, ids=["1", "2", "3"])
|
||||||
|
await vectorstore.adelete(["1", "2"])
|
||||||
|
documents = await vectorstore.asimilarity_search("foo", k=1)
|
||||||
|
assert documents == [Document(page_content="baz", metadata={"id": 3})]
|
||||||
|
|
||||||
|
async def test_delete_missing_content(self, vectorstore: VectorStore) -> None:
|
||||||
|
"""Deleting missing content should not raise an exception."""
|
||||||
|
await vectorstore.adelete(["1"])
|
||||||
|
await vectorstore.adelete(["1", "2", "3"])
|
||||||
|
|
||||||
|
async def test_add_documents_with_ids_is_idempotent(
|
||||||
|
self, vectorstore: VectorStore
|
||||||
|
) -> None:
|
||||||
|
"""Adding by ID should be idempotent."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
await vectorstore.aadd_documents(documents, ids=["1", "2"])
|
||||||
|
await vectorstore.aadd_documents(documents, ids=["1", "2"])
|
||||||
|
documents = await vectorstore.asimilarity_search("bar", k=2)
|
||||||
|
assert documents == [
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def test_add_documents_without_ids_gets_duplicated(
|
||||||
|
self, vectorstore: VectorStore
|
||||||
|
) -> None:
|
||||||
|
"""Adding documents without specifying IDs should duplicate content."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
|
||||||
|
await vectorstore.aadd_documents(documents)
|
||||||
|
await vectorstore.aadd_documents(documents)
|
||||||
|
documents = await vectorstore.asimilarity_search("bar", k=2)
|
||||||
|
assert documents == [
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def test_add_documents_by_id_with_mutation(
|
||||||
|
self, vectorstore: VectorStore
|
||||||
|
) -> None:
|
||||||
|
"""Test that we can overwrite by ID using add_documents."""
|
||||||
|
documents = [
|
||||||
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
||||||
|
|
||||||
|
await vectorstore.aadd_documents(documents=documents, ids=["1", "2"])
|
||||||
|
|
||||||
|
# Now over-write content of ID 1
|
||||||
|
new_documents = [
|
||||||
|
Document(
|
||||||
|
page_content="new foo", metadata={"id": 1, "some_other_field": "foo"}
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
await vectorstore.aadd_documents(documents=new_documents, ids=["1"])
|
||||||
|
|
||||||
|
# Check that the content has been updated
|
||||||
|
documents = await vectorstore.asimilarity_search("new foo", k=2)
|
||||||
|
assert documents == [
|
||||||
|
Document(
|
||||||
|
page_content="new foo", metadata={"id": 1, "some_other_field": "foo"}
|
||||||
|
),
|
||||||
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
]
|
@ -0,0 +1,28 @@
|
|||||||
|
import pytest
|
||||||
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
|
from langchain_standard_tests.integration_tests.vectorstores import (
|
||||||
|
AsyncReadWriteTestSuite,
|
||||||
|
ReadWriteTestSuite,
|
||||||
|
)
|
||||||
|
|
||||||
|
# We'll need to move this dependency to core
|
||||||
|
pytest.importorskip("langchain_community")
|
||||||
|
|
||||||
|
from langchain_community.vectorstores.inmemory import ( # type: ignore # noqa
|
||||||
|
InMemoryVectorStore,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestInMemoryVectorStore(ReadWriteTestSuite):
|
||||||
|
@pytest.fixture
|
||||||
|
def vectorstore(self) -> VectorStore:
|
||||||
|
embeddings = self.get_embeddings()
|
||||||
|
return InMemoryVectorStore(embedding=embeddings)
|
||||||
|
|
||||||
|
|
||||||
|
class TestAysncInMemoryVectorStore(AsyncReadWriteTestSuite):
|
||||||
|
@pytest.fixture
|
||||||
|
async def vectorstore(self) -> VectorStore:
|
||||||
|
embeddings = self.get_embeddings()
|
||||||
|
return InMemoryVectorStore(embedding=embeddings)
|
Loading…
Reference in New Issue