Refactor Milvus/Zilliz (#3047)

Refactoring milvus/zilliz to clean up and have a more consistent
experience.

Signed-off-by: Filip Haltmayer <filip.haltmayer@zilliz.com>
fix_agent_callbacks
Filip Haltmayer 1 year ago committed by GitHub
parent 8191c6b81a
commit 215dcc2d26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -13,6 +13,7 @@ from langchain.vectorstores.pinecone import Pinecone
from langchain.vectorstores.qdrant import Qdrant from langchain.vectorstores.qdrant import Qdrant
from langchain.vectorstores.supabase import SupabaseVectorStore from langchain.vectorstores.supabase import SupabaseVectorStore
from langchain.vectorstores.weaviate import Weaviate from langchain.vectorstores.weaviate import Weaviate
from langchain.vectorstores.zilliz import Zilliz
__all__ = [ __all__ = [
"ElasticVectorSearch", "ElasticVectorSearch",
@ -22,6 +23,7 @@ __all__ = [
"Weaviate", "Weaviate",
"Qdrant", "Qdrant",
"Milvus", "Milvus",
"Zilliz",
"Chroma", "Chroma",
"OpenSearchVectorSearch", "OpenSearchVectorSearch",
"AtlasDB", "AtlasDB",

File diff suppressed because it is too large Load Diff

@ -0,0 +1,106 @@
from __future__ import annotations
import logging
from typing import Any, List, Optional
from langchain.embeddings.base import Embeddings
from langchain.vectorstores.milvus import Milvus
logger = logging.getLogger(__name__)
class Zilliz(Milvus):
def _create_index(self) -> None:
"""Create a index on the collection"""
from pymilvus import Collection, MilvusException
if isinstance(self.col, Collection) and self._get_index() is None:
try:
# If no index params, use a default AutoIndex based one
if self.index_params is None:
self.index_params = {
"metric_type": "L2",
"index_type": "AUTOINDEX",
"params": {},
}
try:
self.col.create_index(
self._vector_field,
index_params=self.index_params,
using=self.alias,
)
# If default did not work, most likely Milvus self-hosted
except MilvusException:
# Use HNSW based index
self.index_params = {
"metric_type": "L2",
"index_type": "HNSW",
"params": {"M": 8, "efConstruction": 64},
}
self.col.create_index(
self._vector_field,
index_params=self.index_params,
using=self.alias,
)
logger.debug(
"Successfully created an index on collection: %s",
self.collection_name,
)
except MilvusException as e:
logger.error(
"Failed to create an index on collection: %s", self.collection_name
)
raise e
@classmethod
def from_texts(
cls,
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
collection_name: str = "LangChainCollection",
connection_args: dict[str, Any] = {},
consistency_level: str = "Session",
index_params: Optional[dict] = None,
search_params: Optional[dict] = None,
drop_old: bool = False,
**kwargs: Any,
) -> Zilliz:
"""Create a Zilliz collection, indexes it with HNSW, and insert data.
Args:
texts (List[str]): Text data.
embedding (Embeddings): Embedding function.
metadatas (Optional[List[dict]]): Metadata for each text if it exists.
Defaults to None.
collection_name (str, optional): Collection name to use. Defaults to
"LangChainCollection".
connection_args (dict[str, Any], optional): Connection args to use. Defaults
to DEFAULT_MILVUS_CONNECTION.
consistency_level (str, optional): Which consistency level to use. Defaults
to "Session".
index_params (Optional[dict], optional): Which index_params to use.
Defaults to None.
search_params (Optional[dict], optional): Which search params to use.
Defaults to None.
drop_old (Optional[bool], optional): Whether to drop the collection with
that name if it exists. Defaults to False.
Returns:
Zilliz: Zilliz Vector Store
"""
vector_db = cls(
embedding_function=embedding,
collection_name=collection_name,
connection_args=connection_args,
consistency_level=consistency_level,
index_params=index_params,
search_params=search_params,
drop_old=drop_old,
**kwargs,
)
vector_db.add_texts(texts=texts, metadatas=metadatas)
return vector_db

@ -9,12 +9,15 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
) )
def _milvus_from_texts(metadatas: Optional[List[dict]] = None) -> Milvus: def _milvus_from_texts(
metadatas: Optional[List[dict]] = None, drop: bool = True
) -> Milvus:
return Milvus.from_texts( return Milvus.from_texts(
fake_texts, fake_texts,
FakeEmbeddings(), FakeEmbeddings(),
metadatas=metadatas, metadatas=metadatas,
connection_args={"host": "127.0.0.1", "port": "19530"}, connection_args={"host": "127.0.0.1", "port": "19530"},
drop_old=drop,
) )
@ -51,3 +54,36 @@ def test_milvus_max_marginal_relevance_search() -> None:
Document(page_content="foo", metadata={"page": 0}), Document(page_content="foo", metadata={"page": 0}),
Document(page_content="baz", metadata={"page": 2}), Document(page_content="baz", metadata={"page": 2}),
] ]
def test_milvus_add_extra() -> None:
"""Test end to end construction and MRR search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _milvus_from_texts(metadatas=metadatas)
docsearch.add_texts(texts, metadatas)
output = docsearch.similarity_search("foo", k=10)
assert len(output) == 6
def test_milvus_no_drop() -> None:
"""Test end to end construction and MRR search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _milvus_from_texts(metadatas=metadatas)
del docsearch
docsearch = _milvus_from_texts(metadatas=metadatas, drop=False)
output = docsearch.similarity_search("foo", k=10)
assert len(output) == 6
# if __name__ == "__main__":
# test_milvus()
# test_milvus_with_score()
# test_milvus_max_marginal_relevance_search()
# test_milvus_add_extra()
# test_milvus_no_drop()

@ -0,0 +1,94 @@
"""Test Zilliz functionality."""
from typing import List, Optional
from langchain.docstore.document import Document
from langchain.vectorstores import Zilliz
from tests.integration_tests.vectorstores.fake_embeddings import (
FakeEmbeddings,
fake_texts,
)
def _zilliz_from_texts(
metadatas: Optional[List[dict]] = None, drop: bool = True
) -> Zilliz:
return Zilliz.from_texts(
fake_texts,
FakeEmbeddings(),
metadatas=metadatas,
connection_args={
"uri": "",
"user": "",
"password": "",
"secure": True,
},
drop_old=drop,
)
def test_zilliz() -> None:
"""Test end to end construction and search."""
docsearch = _zilliz_from_texts()
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_zilliz_with_score() -> None:
"""Test end to end construction and search with scores and IDs."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _zilliz_from_texts(metadatas=metadatas)
output = docsearch.similarity_search_with_score("foo", k=3)
docs = [o[0] for o in output]
scores = [o[1] for o in output]
assert docs == [
Document(page_content="foo", metadata={"page": 0}),
Document(page_content="bar", metadata={"page": 1}),
Document(page_content="baz", metadata={"page": 2}),
]
assert scores[0] < scores[1] < scores[2]
def test_zilliz_max_marginal_relevance_search() -> None:
"""Test end to end construction and MRR search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _zilliz_from_texts(metadatas=metadatas)
output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3)
assert output == [
Document(page_content="foo", metadata={"page": 0}),
Document(page_content="baz", metadata={"page": 2}),
]
def test_zilliz_add_extra() -> None:
"""Test end to end construction and MRR search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _zilliz_from_texts(metadatas=metadatas)
docsearch.add_texts(texts, metadatas)
output = docsearch.similarity_search("foo", k=10)
assert len(output) == 6
def test_zilliz_no_drop() -> None:
"""Test end to end construction and MRR search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _zilliz_from_texts(metadatas=metadatas)
del docsearch
docsearch = _zilliz_from_texts(metadatas=metadatas, drop=False)
output = docsearch.similarity_search("foo", k=10)
assert len(output) == 6
# if __name__ == "__main__":
# test_zilliz()
# test_zilliz_with_score()
# test_zilliz_max_marginal_relevance_search()
# test_zilliz_add_extra()
# test_zilliz_no_drop()
Loading…
Cancel
Save