forked from Archives/langchain
Refactor Milvus/Zilliz (#3047)
Refactoring milvus/zilliz to clean up and have a more consistent experience. Signed-off-by: Filip Haltmayer <filip.haltmayer@zilliz.com>fix_agent_callbacks
parent
8191c6b81a
commit
215dcc2d26
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,106 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
|
from langchain.embeddings.base import Embeddings
|
||||||
|
from langchain.vectorstores.milvus import Milvus
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Zilliz(Milvus):
|
||||||
|
def _create_index(self) -> None:
|
||||||
|
"""Create a index on the collection"""
|
||||||
|
from pymilvus import Collection, MilvusException
|
||||||
|
|
||||||
|
if isinstance(self.col, Collection) and self._get_index() is None:
|
||||||
|
try:
|
||||||
|
# If no index params, use a default AutoIndex based one
|
||||||
|
if self.index_params is None:
|
||||||
|
self.index_params = {
|
||||||
|
"metric_type": "L2",
|
||||||
|
"index_type": "AUTOINDEX",
|
||||||
|
"params": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.col.create_index(
|
||||||
|
self._vector_field,
|
||||||
|
index_params=self.index_params,
|
||||||
|
using=self.alias,
|
||||||
|
)
|
||||||
|
|
||||||
|
# If default did not work, most likely Milvus self-hosted
|
||||||
|
except MilvusException:
|
||||||
|
# Use HNSW based index
|
||||||
|
self.index_params = {
|
||||||
|
"metric_type": "L2",
|
||||||
|
"index_type": "HNSW",
|
||||||
|
"params": {"M": 8, "efConstruction": 64},
|
||||||
|
}
|
||||||
|
self.col.create_index(
|
||||||
|
self._vector_field,
|
||||||
|
index_params=self.index_params,
|
||||||
|
using=self.alias,
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
"Successfully created an index on collection: %s",
|
||||||
|
self.collection_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
except MilvusException as e:
|
||||||
|
logger.error(
|
||||||
|
"Failed to create an index on collection: %s", self.collection_name
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_texts(
|
||||||
|
cls,
|
||||||
|
texts: List[str],
|
||||||
|
embedding: Embeddings,
|
||||||
|
metadatas: Optional[List[dict]] = None,
|
||||||
|
collection_name: str = "LangChainCollection",
|
||||||
|
connection_args: dict[str, Any] = {},
|
||||||
|
consistency_level: str = "Session",
|
||||||
|
index_params: Optional[dict] = None,
|
||||||
|
search_params: Optional[dict] = None,
|
||||||
|
drop_old: bool = False,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Zilliz:
|
||||||
|
"""Create a Zilliz collection, indexes it with HNSW, and insert data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts (List[str]): Text data.
|
||||||
|
embedding (Embeddings): Embedding function.
|
||||||
|
metadatas (Optional[List[dict]]): Metadata for each text if it exists.
|
||||||
|
Defaults to None.
|
||||||
|
collection_name (str, optional): Collection name to use. Defaults to
|
||||||
|
"LangChainCollection".
|
||||||
|
connection_args (dict[str, Any], optional): Connection args to use. Defaults
|
||||||
|
to DEFAULT_MILVUS_CONNECTION.
|
||||||
|
consistency_level (str, optional): Which consistency level to use. Defaults
|
||||||
|
to "Session".
|
||||||
|
index_params (Optional[dict], optional): Which index_params to use.
|
||||||
|
Defaults to None.
|
||||||
|
search_params (Optional[dict], optional): Which search params to use.
|
||||||
|
Defaults to None.
|
||||||
|
drop_old (Optional[bool], optional): Whether to drop the collection with
|
||||||
|
that name if it exists. Defaults to False.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Zilliz: Zilliz Vector Store
|
||||||
|
"""
|
||||||
|
vector_db = cls(
|
||||||
|
embedding_function=embedding,
|
||||||
|
collection_name=collection_name,
|
||||||
|
connection_args=connection_args,
|
||||||
|
consistency_level=consistency_level,
|
||||||
|
index_params=index_params,
|
||||||
|
search_params=search_params,
|
||||||
|
drop_old=drop_old,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
vector_db.add_texts(texts=texts, metadatas=metadatas)
|
||||||
|
return vector_db
|
@ -0,0 +1,94 @@
|
|||||||
|
"""Test Zilliz functionality."""
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from langchain.docstore.document import Document
|
||||||
|
from langchain.vectorstores import Zilliz
|
||||||
|
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||||
|
FakeEmbeddings,
|
||||||
|
fake_texts,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _zilliz_from_texts(
|
||||||
|
metadatas: Optional[List[dict]] = None, drop: bool = True
|
||||||
|
) -> Zilliz:
|
||||||
|
return Zilliz.from_texts(
|
||||||
|
fake_texts,
|
||||||
|
FakeEmbeddings(),
|
||||||
|
metadatas=metadatas,
|
||||||
|
connection_args={
|
||||||
|
"uri": "",
|
||||||
|
"user": "",
|
||||||
|
"password": "",
|
||||||
|
"secure": True,
|
||||||
|
},
|
||||||
|
drop_old=drop,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_zilliz() -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
docsearch = _zilliz_from_texts()
|
||||||
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
assert output == [Document(page_content="foo")]
|
||||||
|
|
||||||
|
|
||||||
|
def test_zilliz_with_score() -> None:
|
||||||
|
"""Test end to end construction and search with scores and IDs."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
|
docsearch = _zilliz_from_texts(metadatas=metadatas)
|
||||||
|
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||||
|
docs = [o[0] for o in output]
|
||||||
|
scores = [o[1] for o in output]
|
||||||
|
assert docs == [
|
||||||
|
Document(page_content="foo", metadata={"page": 0}),
|
||||||
|
Document(page_content="bar", metadata={"page": 1}),
|
||||||
|
Document(page_content="baz", metadata={"page": 2}),
|
||||||
|
]
|
||||||
|
assert scores[0] < scores[1] < scores[2]
|
||||||
|
|
||||||
|
|
||||||
|
def test_zilliz_max_marginal_relevance_search() -> None:
|
||||||
|
"""Test end to end construction and MRR search."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
|
docsearch = _zilliz_from_texts(metadatas=metadatas)
|
||||||
|
output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3)
|
||||||
|
assert output == [
|
||||||
|
Document(page_content="foo", metadata={"page": 0}),
|
||||||
|
Document(page_content="baz", metadata={"page": 2}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_zilliz_add_extra() -> None:
|
||||||
|
"""Test end to end construction and MRR search."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
|
docsearch = _zilliz_from_texts(metadatas=metadatas)
|
||||||
|
|
||||||
|
docsearch.add_texts(texts, metadatas)
|
||||||
|
|
||||||
|
output = docsearch.similarity_search("foo", k=10)
|
||||||
|
assert len(output) == 6
|
||||||
|
|
||||||
|
|
||||||
|
def test_zilliz_no_drop() -> None:
|
||||||
|
"""Test end to end construction and MRR search."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
|
docsearch = _zilliz_from_texts(metadatas=metadatas)
|
||||||
|
del docsearch
|
||||||
|
|
||||||
|
docsearch = _zilliz_from_texts(metadatas=metadatas, drop=False)
|
||||||
|
|
||||||
|
output = docsearch.similarity_search("foo", k=10)
|
||||||
|
assert len(output) == 6
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == "__main__":
|
||||||
|
# test_zilliz()
|
||||||
|
# test_zilliz_with_score()
|
||||||
|
# test_zilliz_max_marginal_relevance_search()
|
||||||
|
# test_zilliz_add_extra()
|
||||||
|
# test_zilliz_no_drop()
|
Loading…
Reference in New Issue