langchain/tests/integration_tests/vectorstores/test_chroma.py

231 lines
8.1 KiB
Python
Raw Normal View History

"""Test Chroma functionality."""
import pytest
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from tests.integration_tests.vectorstores.fake_embeddings import (
ConsistentFakeEmbeddings,
FakeEmbeddings,
)
def test_chroma() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
@pytest.mark.asyncio
async def test_chroma_async() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
)
output = await docsearch.asimilarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_chroma_with_metadatas() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Chroma.from_texts(
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": "0"})]
def test_chroma_with_metadatas_with_scores() -> None:
"""Test end to end construction and scored search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Chroma.from_texts(
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search_with_score("foo", k=1)
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
def test_chroma_with_metadatas_with_scores_using_vector() -> None:
"""Test end to end construction and scored search, using embedding vector."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
embeddings = FakeEmbeddings()
docsearch = Chroma.from_texts(
collection_name="test_collection",
texts=texts,
embedding=embeddings,
metadatas=metadatas,
)
embedded_query = embeddings.embed_query("foo")
output = docsearch.similarity_search_by_vector_with_relevance_scores(
embedding=embedded_query, k=1
)
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
def test_chroma_search_filter() -> None:
"""Test end to end construction and search with metadata filtering."""
texts = ["far", "bar", "baz"]
metadatas = [{"first_letter": "{}".format(text[0])} for text in texts]
docsearch = Chroma.from_texts(
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search("far", k=1, filter={"first_letter": "f"})
assert output == [Document(page_content="far", metadata={"first_letter": "f"})]
output = docsearch.similarity_search("far", k=1, filter={"first_letter": "b"})
assert output == [Document(page_content="bar", metadata={"first_letter": "b"})]
def test_chroma_search_filter_with_scores() -> None:
"""Test end to end construction and scored search with metadata filtering."""
texts = ["far", "bar", "baz"]
metadatas = [{"first_letter": "{}".format(text[0])} for text in texts]
docsearch = Chroma.from_texts(
collection_name="test_collection",
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
)
output = docsearch.similarity_search_with_score(
"far", k=1, filter={"first_letter": "f"}
)
assert output == [
(Document(page_content="far", metadata={"first_letter": "f"}), 0.0)
]
output = docsearch.similarity_search_with_score(
"far", k=1, filter={"first_letter": "b"}
)
assert output == [
(Document(page_content="bar", metadata={"first_letter": "b"}), 1.0)
]
def test_chroma_with_persistence() -> None:
"""Test end to end construction and search, with persistence."""
chroma_persist_dir = "./tests/persist_dir"
collection_name = "test_collection"
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name=collection_name,
texts=texts,
embedding=FakeEmbeddings(),
persist_directory=chroma_persist_dir,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
docsearch.persist()
# Get a new VectorStore from the persisted directory
docsearch = Chroma(
collection_name=collection_name,
embedding_function=FakeEmbeddings(),
persist_directory=chroma_persist_dir,
)
output = docsearch.similarity_search("foo", k=1)
# Clean up
docsearch.delete_collection()
# Persist doesn't need to be called again
# Data will be automatically persisted on object deletion
# Or on program exit
def test_chroma_mmr() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
)
output = docsearch.max_marginal_relevance_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_chroma_mmr_by_vector() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
embeddings = FakeEmbeddings()
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=embeddings
)
embedded_query = embeddings.embed_query("foo")
output = docsearch.max_marginal_relevance_search_by_vector(embedded_query, k=1)
assert output == [Document(page_content="foo")]
def test_chroma_with_include_parameter() -> None:
"""Test end to end construction and include parameter."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
)
output = docsearch.get(include=["embeddings"])
assert output["embeddings"] is not None
output = docsearch.get()
assert output["embeddings"] is None
Fix update_document function, add test and documentation. (#5359) # Fix for `update_document` Function in Chroma ## Summary This pull request addresses an issue with the `update_document` function in the Chroma class, as described in [#5031](https://github.com/hwchase17/langchain/issues/5031#issuecomment-1562577947). The issue was identified as an `AttributeError` raised when calling `update_document` due to a missing corresponding method in the `Collection` object. This fix refactors the `update_document` method in `Chroma` to correctly interact with the `Collection` object. ## Changes 1. Fixed the `update_document` method in the `Chroma` class to correctly call methods on the `Collection` object. 2. Added the corresponding test `test_chroma_update_document` in `tests/integration_tests/vectorstores/test_chroma.py` to reflect the updated method call. 3. Added an example and explanation of how to use the `update_document` function in the Jupyter notebook tutorial for Chroma. ## Test Plan All existing tests pass after this change. In addition, the `test_chroma_update_document` test case now correctly checks the functionality of `update_document`, ensuring that the function works as expected and updates the content of documents correctly. ## Reviewers @dev2049 This fix will ensure that users are able to use the `update_document` function as expected, without encountering the previous `AttributeError`. This will enhance the usability and reliability of the Chroma class for all users. Thank you for considering this pull request. I look forward to your feedback and suggestions.
2023-05-29 13:39:25 +00:00
def test_chroma_update_document() -> None:
"""Test the update_document function in the Chroma class."""
# Make a consistent embedding
embedding = ConsistentFakeEmbeddings()
Fix update_document function, add test and documentation. (#5359) # Fix for `update_document` Function in Chroma ## Summary This pull request addresses an issue with the `update_document` function in the Chroma class, as described in [#5031](https://github.com/hwchase17/langchain/issues/5031#issuecomment-1562577947). The issue was identified as an `AttributeError` raised when calling `update_document` due to a missing corresponding method in the `Collection` object. This fix refactors the `update_document` method in `Chroma` to correctly interact with the `Collection` object. ## Changes 1. Fixed the `update_document` method in the `Chroma` class to correctly call methods on the `Collection` object. 2. Added the corresponding test `test_chroma_update_document` in `tests/integration_tests/vectorstores/test_chroma.py` to reflect the updated method call. 3. Added an example and explanation of how to use the `update_document` function in the Jupyter notebook tutorial for Chroma. ## Test Plan All existing tests pass after this change. In addition, the `test_chroma_update_document` test case now correctly checks the functionality of `update_document`, ensuring that the function works as expected and updates the content of documents correctly. ## Reviewers @dev2049 This fix will ensure that users are able to use the `update_document` function as expected, without encountering the previous `AttributeError`. This will enhance the usability and reliability of the Chroma class for all users. Thank you for considering this pull request. I look forward to your feedback and suggestions.
2023-05-29 13:39:25 +00:00
# Initial document content and id
initial_content = "foo"
document_id = "doc1"
# Create an instance of Document with initial content and metadata
original_doc = Document(page_content=initial_content, metadata={"page": "0"})
# Initialize a Chroma instance with the original document
docsearch = Chroma.from_documents(
collection_name="test_collection",
documents=[original_doc],
embedding=embedding,
Fix update_document function, add test and documentation. (#5359) # Fix for `update_document` Function in Chroma ## Summary This pull request addresses an issue with the `update_document` function in the Chroma class, as described in [#5031](https://github.com/hwchase17/langchain/issues/5031#issuecomment-1562577947). The issue was identified as an `AttributeError` raised when calling `update_document` due to a missing corresponding method in the `Collection` object. This fix refactors the `update_document` method in `Chroma` to correctly interact with the `Collection` object. ## Changes 1. Fixed the `update_document` method in the `Chroma` class to correctly call methods on the `Collection` object. 2. Added the corresponding test `test_chroma_update_document` in `tests/integration_tests/vectorstores/test_chroma.py` to reflect the updated method call. 3. Added an example and explanation of how to use the `update_document` function in the Jupyter notebook tutorial for Chroma. ## Test Plan All existing tests pass after this change. In addition, the `test_chroma_update_document` test case now correctly checks the functionality of `update_document`, ensuring that the function works as expected and updates the content of documents correctly. ## Reviewers @dev2049 This fix will ensure that users are able to use the `update_document` function as expected, without encountering the previous `AttributeError`. This will enhance the usability and reliability of the Chroma class for all users. Thank you for considering this pull request. I look forward to your feedback and suggestions.
2023-05-29 13:39:25 +00:00
ids=[document_id],
)
old_embedding = docsearch._collection.peek()["embeddings"][
docsearch._collection.peek()["ids"].index(document_id)
]
Fix update_document function, add test and documentation. (#5359) # Fix for `update_document` Function in Chroma ## Summary This pull request addresses an issue with the `update_document` function in the Chroma class, as described in [#5031](https://github.com/hwchase17/langchain/issues/5031#issuecomment-1562577947). The issue was identified as an `AttributeError` raised when calling `update_document` due to a missing corresponding method in the `Collection` object. This fix refactors the `update_document` method in `Chroma` to correctly interact with the `Collection` object. ## Changes 1. Fixed the `update_document` method in the `Chroma` class to correctly call methods on the `Collection` object. 2. Added the corresponding test `test_chroma_update_document` in `tests/integration_tests/vectorstores/test_chroma.py` to reflect the updated method call. 3. Added an example and explanation of how to use the `update_document` function in the Jupyter notebook tutorial for Chroma. ## Test Plan All existing tests pass after this change. In addition, the `test_chroma_update_document` test case now correctly checks the functionality of `update_document`, ensuring that the function works as expected and updates the content of documents correctly. ## Reviewers @dev2049 This fix will ensure that users are able to use the `update_document` function as expected, without encountering the previous `AttributeError`. This will enhance the usability and reliability of the Chroma class for all users. Thank you for considering this pull request. I look forward to your feedback and suggestions.
2023-05-29 13:39:25 +00:00
# Define updated content for the document
updated_content = "updated foo"
# Create a new Document instance with the updated content and the same id
updated_doc = Document(page_content=updated_content, metadata={"page": "0"})
# Update the document in the Chroma instance
docsearch.update_document(document_id=document_id, document=updated_doc)
# Perform a similarity search with the updated content
output = docsearch.similarity_search(updated_content, k=1)
# Assert that the updated document is returned by the search
assert output == [Document(page_content=updated_content, metadata={"page": "0"})]
# Assert that the new embedding is correct
new_embedding = docsearch._collection.peek()["embeddings"][
docsearch._collection.peek()["ids"].index(document_id)
]
assert new_embedding == embedding.embed_documents([updated_content])[0]
assert new_embedding != old_embedding