from __future__ import annotations import logging from typing import Any, Dict, List, Optional from langchain_core.embeddings import Embeddings from langchain_community.vectorstores.milvus import Milvus logger = logging.getLogger(__name__) class Zilliz(Milvus): """`Zilliz` vector store. You need to have `pymilvus` installed and a running Zilliz database. See the following documentation for how to run a Zilliz instance: https://docs.zilliz.com/docs/create-cluster IF USING L2/IP metric IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA. Args: embedding_function (Embeddings): Function used to embed the text. collection_name (str): Which Zilliz collection to use. Defaults to "LangChainCollection". connection_args (Optional[dict[str, any]]): The connection args used for this class comes in the form of a dict. consistency_level (str): The consistency level to use for a collection. Defaults to "Session". index_params (Optional[dict]): Which index params to use. Defaults to HNSW/AUTOINDEX depending on service. search_params (Optional[dict]): Which search params to use. Defaults to default of index. drop_old (Optional[bool]): Whether to drop the current collection. Defaults to False. auto_id (bool): Whether to enable auto id for primary key. Defaults to False. If False, you needs to provide text ids (string less than 65535 bytes). If True, Milvus will generate unique integers as primary keys. The connection args used for this class comes in the form of a dict, here are a few of the options: address (str): The actual address of Zilliz instance. Example address: "localhost:19530" uri (str): The uri of Zilliz instance. Example uri: "https://in03-ba4234asae.api.gcp-us-west1.zillizcloud.com", host (str): The host of Zilliz instance. Default at "localhost", PyMilvus will fill in the default host if only port is provided. port (str/int): The port of Zilliz instance. Default at 19530, PyMilvus will fill in the default port if only host is provided. user (str): Use which user to connect to Zilliz instance. If user and password are provided, we will add related header in every RPC call. password (str): Required when user is provided. The password corresponding to the user. token (str): API key, for serverless clusters which can be used as replacements for user and password. secure (bool): Default is false. If set to true, tls will be enabled. client_key_path (str): If use tls two-way authentication, need to write the client.key path. client_pem_path (str): If use tls two-way authentication, need to write the client.pem path. ca_pem_path (str): If use tls two-way authentication, need to write the ca.pem path. server_pem_path (str): If use tls one-way authentication, need to write the server.pem path. server_name (str): If use tls, need to write the common name. Example: .. code-block:: python from langchain_community.vectorstores import Zilliz from langchain_community.embeddings import OpenAIEmbeddings embedding = OpenAIEmbeddings() # Connect to a Zilliz instance milvus_store = Milvus( embedding_function = embedding, collection_name = "LangChainCollection", connection_args = { "uri": "https://in03-ba4234asae.api.gcp-us-west1.zillizcloud.com", "user": "temp", "password": "temp", "token": "temp", # API key as replacements for user and password "secure": True } drop_old: True, ) Raises: ValueError: If the pymilvus python package is not installed. """ def _create_index(self) -> None: """Create a index on the collection""" from pymilvus import Collection, MilvusException if isinstance(self.col, Collection) and self._get_index() is None: try: # If no index params, use a default AutoIndex based one if self.index_params is None: self.index_params = { "metric_type": "L2", "index_type": "AUTOINDEX", "params": {}, } try: self.col.create_index( self._vector_field, index_params=self.index_params, using=self.alias, ) # If default did not work, most likely Milvus self-hosted except MilvusException: # Use HNSW based index self.index_params = { "metric_type": "L2", "index_type": "HNSW", "params": {"M": 8, "efConstruction": 64}, } self.col.create_index( self._vector_field, index_params=self.index_params, using=self.alias, ) logger.debug( "Successfully created an index on collection: %s", self.collection_name, ) except MilvusException as e: logger.error( "Failed to create an index on collection: %s", self.collection_name ) raise e @classmethod def from_texts( cls, texts: List[str], embedding: Embeddings, metadatas: Optional[List[dict]] = None, collection_name: str = "LangChainCollection", connection_args: Optional[Dict[str, Any]] = None, consistency_level: str = "Session", index_params: Optional[dict] = None, search_params: Optional[dict] = None, drop_old: bool = False, *, ids: Optional[List[str]] = None, auto_id: bool = False, **kwargs: Any, ) -> Zilliz: """Create a Zilliz collection, indexes it with HNSW, and insert data. Args: texts (List[str]): Text data. embedding (Embeddings): Embedding function. metadatas (Optional[List[dict]]): Metadata for each text if it exists. Defaults to None. collection_name (str, optional): Collection name to use. Defaults to "LangChainCollection". connection_args (dict[str, Any], optional): Connection args to use. Defaults to DEFAULT_MILVUS_CONNECTION. consistency_level (str, optional): Which consistency level to use. Defaults to "Session". index_params (Optional[dict], optional): Which index_params to use. Defaults to None. search_params (Optional[dict], optional): Which search params to use. Defaults to None. drop_old (Optional[bool], optional): Whether to drop the collection with that name if it exists. Defaults to False. ids (Optional[List[str]]): List of text ids. auto_id (bool): Whether to enable auto id for primary key. Defaults to False. If False, you needs to provide text ids (string less than 65535 bytes). If True, Milvus will generate unique integers as primary keys. Returns: Zilliz: Zilliz Vector Store """ vector_db = cls( embedding_function=embedding, collection_name=collection_name, connection_args=connection_args or {}, consistency_level=consistency_level, index_params=index_params, search_params=search_params, drop_old=drop_old, auto_id=auto_id, **kwargs, ) vector_db.add_texts(texts=texts, metadatas=metadatas, ids=ids) return vector_db