langchain/libs/community/langchain_community/vectorstores/vearch.py

from __future__ import annotations

import os
import time
import uuid
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type

import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore

if TYPE_CHECKING:
    import vearch

DEFAULT_TOPN = 4


class Vearch(VectorStore):
    _DEFAULT_TABLE_NAME = "langchain_vearch"
    _DEFAULT_CLUSTER_DB_NAME = "cluster_client_db"
    _DEFAULT_VERSION = 1

    def __init__(
        self,
        embedding_function: Embeddings,
        path_or_url: Optional[str] = None,
        table_name: str = _DEFAULT_TABLE_NAME,
        db_name: str = _DEFAULT_CLUSTER_DB_NAME,
        flag: int = _DEFAULT_VERSION,
        **kwargs: Any,
    ) -> None:
        """Initialize vearch vector store
        flag 1 for cluster,0 for standalone
        """
        try:
            if flag:
                import vearch_cluster
            else:
                import vearch
        except ImportError:
            raise ValueError(
                "Could not import suitable python package. "
                "Please install it with `pip install vearch or vearch_cluster`."
            )

        if flag:
            if path_or_url is None:
                raise ValueError("Please input url of cluster")
            if not db_name:
                db_name = self._DEFAULT_CLUSTER_DB_NAME
                db_name += "_"
                db_name += str(uuid.uuid4()).split("-")[-1]
            self.using_db_name = db_name
            self.url = path_or_url
            self.vearch = vearch_cluster.VearchCluster(path_or_url)

        else:
            if path_or_url is None:
                metadata_path = os.getcwd().replace("\\", "/")
            else:
                metadata_path = path_or_url
            if not os.path.isdir(metadata_path):
                os.makedirs(metadata_path)
            log_path = os.path.join(metadata_path, "log")
            if not os.path.isdir(log_path):
                os.makedirs(log_path)
            self.vearch = vearch.Engine(metadata_path, log_path)
            self.using_metapath = metadata_path
        if not table_name:
            table_name = self._DEFAULT_TABLE_NAME
            table_name += "_"
            table_name += str(uuid.uuid4()).split("-")[-1]
        self.using_table_name = table_name
        self.embedding_func = embedding_function
        self.flag = flag

    @property
    def embeddings(self) -> Optional[Embeddings]:
        return self.embedding_func

    @classmethod
    def from_documents(
        cls: Type[Vearch],
        documents: List[Document],
        embedding: Embeddings,
        path_or_url: Optional[str] = None,
        table_name: str = _DEFAULT_TABLE_NAME,
        db_name: str = _DEFAULT_CLUSTER_DB_NAME,
        flag: int = _DEFAULT_VERSION,
        **kwargs: Any,
    ) -> Vearch:
        """Return Vearch VectorStore"""

        texts = [d.page_content for d in documents]
        metadatas = [d.metadata for d in documents]

        return cls.from_texts(
            texts=texts,
            embedding=embedding,
            metadatas=metadatas,
            path_or_url=path_or_url,
            table_name=table_name,
            db_name=db_name,
            flag=flag,
            **kwargs,
        )

    @classmethod
    def from_texts(
        cls: Type[Vearch],
        texts: List[str],
        embedding: Embeddings,
        metadatas: Optional[List[dict]] = None,
        path_or_url: Optional[str] = None,
        table_name: str = _DEFAULT_TABLE_NAME,
        db_name: str = _DEFAULT_CLUSTER_DB_NAME,
        flag: int = _DEFAULT_VERSION,
        **kwargs: Any,
    ) -> Vearch:
        """Return Vearch VectorStore"""

        vearch_db = cls(
            embedding_function=embedding,
            embedding=embedding,
            path_or_url=path_or_url,
            db_name=db_name,
            table_name=table_name,
            flag=flag,
        )
        vearch_db.add_texts(texts=texts, metadatas=metadatas)
        return vearch_db

    def _create_table(
        self,
        dim: int = 1024,
        field_list: List[dict] = [
            {"field": "text", "type": "str"},
            {"field": "metadata", "type": "str"},
        ],
    ) -> int:
        """
        Create VectorStore Table
        Args:
            dim:dimension of vector
            fields_list: the field you want to store
        Return:
            code,0 for success,1 for failed
        """

        type_dict = {"int": vearch.dataType.INT, "str": vearch.dataType.STRING}
        engine_info = {
            "index_size": 10000,
            "retrieval_type": "IVFPQ",
            "retrieval_param": {"ncentroids": 2048, "nsubvector": 32},
        }
        fields = [
            vearch.GammaFieldInfo(fi["field"], type_dict[fi["type"]])
            for fi in field_list
        ]
        vector_field = vearch.GammaVectorInfo(
            name="text_embedding",
            type=vearch.dataType.VECTOR,
            is_index=True,
            dimension=dim,
            model_id="",
            store_type="MemoryOnly",
            store_param={"cache_size": 10000},
            has_source=False,
        )
        response_code = self.vearch.create_table(
            engine_info,
            name=self.using_table_name,
            fields=fields,
            vector_field=vector_field,
        )
        return response_code

    def _create_space(
        self,
        dim: int = 1024,
    ) -> int:
        """
        Create VectorStore space
        Args:
            dim:dimension of vector
        Return:
            code,0 failed for ,1 for success
        """
        space_config = {
            "name": self.using_table_name,
            "partition_num": 1,
            "replica_num": 1,
            "engine": {
                "name": "gamma",
                "index_size": 1,
                "retrieval_type": "FLAT",
                "retrieval_param": {
                    "metric_type": "L2",
                },
            },
            "properties": {
                "text": {
                    "type": "string",
                },
                "metadata": {
                    "type": "string",
                },
                "text_embedding": {
                    "type": "vector",
                    "index": True,
                    "dimension": dim,
                    "store_type": "MemoryOnly",
                },
            },
        }
        response_code = self.vearch.create_space(self.using_db_name, space_config)

        return response_code

    def add_texts(
        self,
        texts: Iterable[str],
        metadatas: Optional[List[dict]] = None,
        **kwargs: Any,
    ) -> List[str]:
        """
        Returns:
            List of ids from adding the texts into the vectorstore.
        """
        embeddings = None
        if self.embedding_func is not None:
            embeddings = self.embedding_func.embed_documents(list(texts))
        if embeddings is None:
            raise ValueError("embeddings is None")
        if self.flag:
            dbs_list = self.vearch.list_dbs()
            if self.using_db_name not in dbs_list:
                create_db_code = self.vearch.create_db(self.using_db_name)
                if not create_db_code:
                    raise ValueError("create db failed!!!")
            space_list = self.vearch.list_spaces(self.using_db_name)
            if self.using_table_name not in space_list:
                create_space_code = self._create_space(len(embeddings[0]))
                if not create_space_code:
                    raise ValueError("create space failed!!!")
            docid = []
            if embeddings is not None and metadatas is not None:
                for text, metadata, embed in zip(texts, metadatas, embeddings):
                    profiles: dict[str, Any] = {}
                    profiles["text"] = text
                    profiles["metadata"] = metadata["source"]
                    embed_np = np.array(embed)
                    profiles["text_embedding"] = {
                        "feature": (embed_np / np.linalg.norm(embed_np)).tolist()
                    }
                    insert_res = self.vearch.insert_one(
                        self.using_db_name, self.using_table_name, profiles
                    )
                    if insert_res["status"] == 200:
                        docid.append(insert_res["_id"])
                        continue
                    else:
                        retry_insert = self.vearch.insert_one(
                            self.using_db_name, self.using_table_name, profiles
                        )
                        docid.append(retry_insert["_id"])
                        continue
        else:
            table_path = os.path.join(
                self.using_metapath, self.using_table_name + ".schema"
            )
            if not os.path.exists(table_path):
                dim = len(embeddings[0])
                response_code = self._create_table(dim)
                if response_code:
                    raise ValueError("create table failed!!!")
            if embeddings is not None and metadatas is not None:
                doc_items = []
                for text, metadata, embed in zip(texts, metadatas, embeddings):
                    profiles_v: dict[str, Any] = {}
                    profiles_v["text"] = text
                    profiles_v["metadata"] = metadata["source"]
                    embed_np = np.array(embed)
                    profiles_v["text_embedding"] = embed_np / np.linalg.norm(embed_np)
                    doc_items.append(profiles_v)

                docid = self.vearch.add(doc_items)
                t_time = 0
                while len(docid) != len(embeddings):
                    time.sleep(0.5)
                    if t_time > 6:
                        break
                    t_time += 1
                self.vearch.dump()
        return docid

    def _load(self) -> None:
        """
        load vearch engine for standalone vearch
        """
        self.vearch.load()

    @classmethod
    def load_local(
        cls,
        embedding: Embeddings,
        path_or_url: Optional[str] = None,
        table_name: str = _DEFAULT_TABLE_NAME,
        db_name: str = _DEFAULT_CLUSTER_DB_NAME,
        flag: int = _DEFAULT_VERSION,
        **kwargs: Any,
    ) -> Vearch:
        """Load the local specified table of standalone vearch.
        Returns:
            Success or failure of loading the local specified table
        """
        if not path_or_url:
            raise ValueError("No metadata path!!!")
        if not table_name:
            raise ValueError("No table name!!!")
        table_path = os.path.join(path_or_url, table_name + ".schema")
        if not os.path.exists(table_path):
            raise ValueError("vearch vectorbase table not exist!!!")

        vearch_db = cls(
            embedding_function=embedding,
            path_or_url=path_or_url,
            table_name=table_name,
            db_name=db_name,
            flag=flag,
        )
        vearch_db._load()
        return vearch_db

    def similarity_search(
        self,
        query: str,
        k: int = DEFAULT_TOPN,
        **kwargs: Any,
    ) -> List[Document]:
        """
        Return docs most similar to query.

        """
        if self.embedding_func is None:
            raise ValueError("embedding_func is None!!!")
        embeddings = self.embedding_func.embed_query(query)
        docs = self.similarity_search_by_vector(embeddings, k)
        return docs

    def similarity_search_by_vector(
        self,
        embedding: List[float],
        k: int = DEFAULT_TOPN,
        **kwargs: Any,
    ) -> List[Document]:
        """The most k similar documents and scores of the specified query.
        Args:
            embeddings: embedding vector of the query.
            k: The k most similar documents to the text query.
            min_score: the score of similar documents to the text query
        Returns:
            The k most similar documents to the specified text query.
            0 is dissimilar, 1 is the most similar.
        """
        embed = np.array(embedding)
        if self.flag:
            query_data = {
                "query": {
                    "sum": [
                        {
                            "field": "text_embedding",
                            "feature": (embed / np.linalg.norm(embed)).tolist(),
                        }
                    ],
                },
                "size": k,
                "fields": ["text", "metadata"],
            }
            query_result = self.vearch.search(
                self.using_db_name, self.using_table_name, query_data
            )
            res = query_result["hits"]["hits"]
        else:
            query_data = {
                "vector": [
                    {
                        "field": "text_embedding",
                        "feature": embed / np.linalg.norm(embed),
                    }
                ],
                "fields": [],
                "is_brute_search": 1,
                "retrieval_param": {"metric_type": "InnerProduct", "nprobe": 20},
                "topn": k,
            }
            query_result = self.vearch.search(query_data)
            res = query_result[0]["result_items"]
        docs = []
        for item in res:
            content = ""
            meta_data = {}
            if self.flag:
                item = item["_source"]
            for item_key in item:
                if item_key == "text":
                    content = item[item_key]
                    continue
                if item_key == "metadata":
                    meta_data["source"] = item[item_key]
                    continue
            docs.append(Document(page_content=content, metadata=meta_data))
        return docs

    def similarity_search_with_score(
        self,
        query: str,
        k: int = DEFAULT_TOPN,
        **kwargs: Any,
    ) -> List[Tuple[Document, float]]:
        """The most k similar documents and scores of the specified query.
        Args:
            embeddings: embedding vector of the query.
            k: The k most similar documents to the text query.
            min_score: the score of similar documents to the text query
        Returns:
            The k most similar documents to the specified text query.
            0 is dissimilar, 1 is the most similar.
        """
        if self.embedding_func is None:
            raise ValueError("embedding_func is None!!!")
        embeddings = self.embedding_func.embed_query(query)
        embed = np.array(embeddings)
        if self.flag:
            query_data = {
                "query": {
                    "sum": [
                        {
                            "field": "text_embedding",
                            "feature": (embed / np.linalg.norm(embed)).tolist(),
                        }
                    ],
                },
                "size": k,
                "fields": ["text_embedding", "text", "metadata"],
            }
            query_result = self.vearch.search(
                self.using_db_name, self.using_table_name, query_data
            )
            res = query_result["hits"]["hits"]
        else:
            query_data = {
                "vector": [
                    {
                        "field": "text_embedding",
                        "feature": embed / np.linalg.norm(embed),
                    }
                ],
                "fields": [],
                "is_brute_search": 1,
                "retrieval_param": {"metric_type": "InnerProduct", "nprobe": 20},
                "topn": k,
            }
            query_result = self.vearch.search(query_data)
            res = query_result[0]["result_items"]
        results: List[Tuple[Document, float]] = []
        for item in res:
            content = ""
            meta_data = {}
            if self.flag:
                score = item["_score"]
                item = item["_source"]
            for item_key in item:
                if item_key == "text":
                    content = item[item_key]
                    continue
                if item_key == "metadata":
                    meta_data["source"] = item[item_key]
                    continue
                if self.flag != 1 and item_key == "score":
                    score = item[item_key]
                    continue
            tmp_res = (Document(page_content=content, metadata=meta_data), score)
            results.append(tmp_res)
        return results

    def _similarity_search_with_relevance_scores(
        self,
        query: str,
        k: int = 4,
        **kwargs: Any,
    ) -> List[Tuple[Document, float]]:
        return self.similarity_search_with_score(query, k, **kwargs)

    def delete(
        self,
        ids: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> Optional[bool]:
        """Delete the documents which have the specified ids.

        Args:
            ids: The ids of the embedding vectors.
            **kwargs: Other keyword arguments that subclasses might use.
        Returns:
            Optional[bool]: True if deletion is successful.
            False otherwise, None if not implemented.
        """

        ret: Optional[bool] = None
        tmp_res = []
        if ids is None or ids.__len__() == 0:
            return ret
        for _id in ids:
            if self.flag:
                ret = self.vearch.delete(self.using_db_name, self.using_table_name, _id)
            else:
                ret = self.vearch.del_doc(_id)
            tmp_res.append(ret)
        ret = all(i == 0 for i in tmp_res)
        return ret

    def get(
        self,
        ids: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> Dict[str, Document]:
        """Return docs according ids.

        Args:
            ids: The ids of the embedding vectors.
        Returns:
            Documents which satisfy the input conditions.
        """

        results: Dict[str, Document] = {}
        if ids is None or ids.__len__() == 0:
            return results
        if self.flag:
            query_data = {"query": {"ids": ids}}
            docs_detail = self.vearch.mget_by_ids(
                self.using_db_name, self.using_table_name, query_data
            )
            for record in docs_detail:
                if record["found"] is False:
                    continue
                content = ""
                meta_info = {}
                for field in record["_source"]:
                    if field == "text":
                        content = record["_source"][field]
                        continue
                    elif field == "metadata":
                        meta_info["source"] = record["_source"][field]
                        continue
                results[record["_id"]] = Document(
                    page_content=content, metadata=meta_info
                )
        else:
            for id in ids:
                docs_detail = self.vearch.get_doc_by_id(id)
                if docs_detail == {}:
                    continue
                content = ""
                meta_info = {}
                for field in docs_detail:
                    if field == "text":
                        content = docs_detail[field]
                        continue
                    elif field == "metadata":
                        meta_info["source"] = docs_detail[field]
                        continue
                results[docs_detail["_id"]] = Document(
                    page_content=content, metadata=meta_info
                )
        return results
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes 2023-12-11 21:53:30 +00:00			`from __future__ import annotations`

			`import os`
			`import time`
			`import uuid`
			`from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type`

			`import numpy as np`
			`from langchain_core.documents import Document`
			`from langchain_core.embeddings import Embeddings`
			`from langchain_core.vectorstores import VectorStore`

			`if TYPE_CHECKING:`
			`import vearch`

			`DEFAULT_TOPN = 4`


			`class Vearch(VectorStore):`
			`_DEFAULT_TABLE_NAME = "langchain_vearch"`
			`_DEFAULT_CLUSTER_DB_NAME = "cluster_client_db"`
			`_DEFAULT_VERSION = 1`

			`def __init__(`
			`self,`
			`embedding_function: Embeddings,`
			`path_or_url: Optional[str] = None,`
			`table_name: str = _DEFAULT_TABLE_NAME,`
			`db_name: str = _DEFAULT_CLUSTER_DB_NAME,`
			`flag: int = _DEFAULT_VERSION,`
			`**kwargs: Any,`
			`) -> None:`
			`"""Initialize vearch vector store`
			`flag 1 for cluster,0 for standalone`
			`"""`
			`try:`
			`if flag:`
			`import vearch_cluster`
			`else:`
			`import vearch`
			`except ImportError:`
			`raise ValueError(`
			`"Could not import suitable python package. "`
			"Please install it with `pip install vearch or vearch_cluster`."
			`)`

			`if flag:`
			`if path_or_url is None:`
			`raise ValueError("Please input url of cluster")`
			`if not db_name:`
			`db_name = self._DEFAULT_CLUSTER_DB_NAME`
			`db_name += "_"`
			`db_name += str(uuid.uuid4()).split("-")[-1]`
			`self.using_db_name = db_name`
			`self.url = path_or_url`
			`self.vearch = vearch_cluster.VearchCluster(path_or_url)`

			`else:`
			`if path_or_url is None:`
			`metadata_path = os.getcwd().replace("\\", "/")`
			`else:`
			`metadata_path = path_or_url`
			`if not os.path.isdir(metadata_path):`
			`os.makedirs(metadata_path)`
			`log_path = os.path.join(metadata_path, "log")`
			`if not os.path.isdir(log_path):`
			`os.makedirs(log_path)`
			`self.vearch = vearch.Engine(metadata_path, log_path)`
			`self.using_metapath = metadata_path`
			`if not table_name:`
			`table_name = self._DEFAULT_TABLE_NAME`
			`table_name += "_"`
			`table_name += str(uuid.uuid4()).split("-")[-1]`
			`self.using_table_name = table_name`
			`self.embedding_func = embedding_function`
			`self.flag = flag`

			`@property`
			`def embeddings(self) -> Optional[Embeddings]:`
			`return self.embedding_func`

			`@classmethod`
			`def from_documents(`
			`cls: Type[Vearch],`
			`documents: List[Document],`
			`embedding: Embeddings,`
			`path_or_url: Optional[str] = None,`
			`table_name: str = _DEFAULT_TABLE_NAME,`
			`db_name: str = _DEFAULT_CLUSTER_DB_NAME,`
			`flag: int = _DEFAULT_VERSION,`
			`**kwargs: Any,`
			`) -> Vearch:`
			`"""Return Vearch VectorStore"""`

			`texts = [d.page_content for d in documents]`
			`metadatas = [d.metadata for d in documents]`

			`return cls.from_texts(`
			`texts=texts,`
			`embedding=embedding,`
			`metadatas=metadatas,`
			`path_or_url=path_or_url,`
			`table_name=table_name,`
			`db_name=db_name,`
			`flag=flag,`
			`**kwargs,`
			`)`

			`@classmethod`
			`def from_texts(`
			`cls: Type[Vearch],`
			`texts: List[str],`
			`embedding: Embeddings,`
			`metadatas: Optional[List[dict]] = None,`
			`path_or_url: Optional[str] = None,`
			`table_name: str = _DEFAULT_TABLE_NAME,`
			`db_name: str = _DEFAULT_CLUSTER_DB_NAME,`
			`flag: int = _DEFAULT_VERSION,`
			`**kwargs: Any,`
			`) -> Vearch:`
			`"""Return Vearch VectorStore"""`

			`vearch_db = cls(`
			`embedding_function=embedding,`
			`embedding=embedding,`
			`path_or_url=path_or_url,`
			`db_name=db_name,`
			`table_name=table_name,`
			`flag=flag,`
			`)`
			`vearch_db.add_texts(texts=texts, metadatas=metadatas)`
			`return vearch_db`

			`def _create_table(`
			`self,`
			`dim: int = 1024,`
			`field_list: List[dict] = [`
			`{"field": "text", "type": "str"},`
			`{"field": "metadata", "type": "str"},`
			`],`
			`) -> int:`
			`"""`
			`Create VectorStore Table`
			`Args:`
			`dim:dimension of vector`
			`fields_list: the field you want to store`
			`Return:`
			`code,0 for success,1 for failed`
			`"""`

			`type_dict = {"int": vearch.dataType.INT, "str": vearch.dataType.STRING}`
			`engine_info = {`
			`"index_size": 10000,`
			`"retrieval_type": "IVFPQ",`
			`"retrieval_param": {"ncentroids": 2048, "nsubvector": 32},`
			`}`
			`fields = [`
			`vearch.GammaFieldInfo(fi["field"], type_dict[fi["type"]])`
			`for fi in field_list`
			`]`
			`vector_field = vearch.GammaVectorInfo(`
			`name="text_embedding",`
			`type=vearch.dataType.VECTOR,`
			`is_index=True,`
			`dimension=dim,`
			`model_id="",`
			`store_type="MemoryOnly",`
			`store_param={"cache_size": 10000},`
			`has_source=False,`
			`)`
			`response_code = self.vearch.create_table(`
			`engine_info,`
			`name=self.using_table_name,`
			`fields=fields,`
			`vector_field=vector_field,`
			`)`
			`return response_code`

			`def _create_space(`
			`self,`
			`dim: int = 1024,`
			`) -> int:`
			`"""`
			`Create VectorStore space`
			`Args:`
			`dim:dimension of vector`
			`Return:`
			`code,0 failed for ,1 for success`
			`"""`
			`space_config = {`
			`"name": self.using_table_name,`
			`"partition_num": 1,`
			`"replica_num": 1,`
			`"engine": {`
			`"name": "gamma",`
			`"index_size": 1,`
			`"retrieval_type": "FLAT",`
			`"retrieval_param": {`
			`"metric_type": "L2",`
			`},`
			`},`
			`"properties": {`
			`"text": {`
			`"type": "string",`
			`},`
			`"metadata": {`
			`"type": "string",`
			`},`
			`"text_embedding": {`
			`"type": "vector",`
			`"index": True,`
			`"dimension": dim,`
			`"store_type": "MemoryOnly",`
			`},`
			`},`
			`}`
			`response_code = self.vearch.create_space(self.using_db_name, space_config)`

			`return response_code`

			`def add_texts(`
			`self,`
			`texts: Iterable[str],`
			`metadatas: Optional[List[dict]] = None,`
			`**kwargs: Any,`
			`) -> List[str]:`
			`"""`
			`Returns:`
			`List of ids from adding the texts into the vectorstore.`
			`"""`
			`embeddings = None`
			`if self.embedding_func is not None:`
			`embeddings = self.embedding_func.embed_documents(list(texts))`
			`if embeddings is None:`
			`raise ValueError("embeddings is None")`
			`if self.flag:`
			`dbs_list = self.vearch.list_dbs()`
			`if self.using_db_name not in dbs_list:`
			`create_db_code = self.vearch.create_db(self.using_db_name)`
			`if not create_db_code:`
			`raise ValueError("create db failed!!!")`
			`space_list = self.vearch.list_spaces(self.using_db_name)`
			`if self.using_table_name not in space_list:`
			`create_space_code = self._create_space(len(embeddings[0]))`
			`if not create_space_code:`
			`raise ValueError("create space failed!!!")`
			`docid = []`
			`if embeddings is not None and metadatas is not None:`
			`for text, metadata, embed in zip(texts, metadatas, embeddings):`
			`profiles: dict[str, Any] = {}`
			`profiles["text"] = text`
			`profiles["metadata"] = metadata["source"]`
			`embed_np = np.array(embed)`
			`profiles["text_embedding"] = {`
			`"feature": (embed_np / np.linalg.norm(embed_np)).tolist()`
			`}`
			`insert_res = self.vearch.insert_one(`
			`self.using_db_name, self.using_table_name, profiles`
			`)`
			`if insert_res["status"] == 200:`
			`docid.append(insert_res["_id"])`
			`continue`
			`else:`
			`retry_insert = self.vearch.insert_one(`
			`self.using_db_name, self.using_table_name, profiles`
			`)`
			`docid.append(retry_insert["_id"])`
			`continue`
			`else:`
			`table_path = os.path.join(`
			`self.using_metapath, self.using_table_name + ".schema"`
			`)`
			`if not os.path.exists(table_path):`
			`dim = len(embeddings[0])`
			`response_code = self._create_table(dim)`
			`if response_code:`
			`raise ValueError("create table failed!!!")`
			`if embeddings is not None and metadatas is not None:`
			`doc_items = []`
			`for text, metadata, embed in zip(texts, metadatas, embeddings):`
			`profiles_v: dict[str, Any] = {}`
			`profiles_v["text"] = text`
			`profiles_v["metadata"] = metadata["source"]`
			`embed_np = np.array(embed)`
			`profiles_v["text_embedding"] = embed_np / np.linalg.norm(embed_np)`
			`doc_items.append(profiles_v)`

			`docid = self.vearch.add(doc_items)`
			`t_time = 0`
			`while len(docid) != len(embeddings):`
			`time.sleep(0.5)`
			`if t_time > 6:`
			`break`
			`t_time += 1`
			`self.vearch.dump()`
			`return docid`

			`def _load(self) -> None:`
			`"""`
			`load vearch engine for standalone vearch`
			`"""`
			`self.vearch.load()`

			`@classmethod`
			`def load_local(`
			`cls,`
			`embedding: Embeddings,`
			`path_or_url: Optional[str] = None,`
			`table_name: str = _DEFAULT_TABLE_NAME,`
			`db_name: str = _DEFAULT_CLUSTER_DB_NAME,`
			`flag: int = _DEFAULT_VERSION,`
			`**kwargs: Any,`
			`) -> Vearch:`
			`"""Load the local specified table of standalone vearch.`
			`Returns:`
			`Success or failure of loading the local specified table`
			`"""`
			`if not path_or_url:`
			`raise ValueError("No metadata path!!!")`
			`if not table_name:`
			`raise ValueError("No table name!!!")`
			`table_path = os.path.join(path_or_url, table_name + ".schema")`
			`if not os.path.exists(table_path):`
			`raise ValueError("vearch vectorbase table not exist!!!")`

			`vearch_db = cls(`
			`embedding_function=embedding,`
			`path_or_url=path_or_url,`
			`table_name=table_name,`
			`db_name=db_name,`
			`flag=flag,`
			`)`
			`vearch_db._load()`
			`return vearch_db`

			`def similarity_search(`
			`self,`
			`query: str,`
			`k: int = DEFAULT_TOPN,`
			`**kwargs: Any,`
			`) -> List[Document]:`
			`"""`
			`Return docs most similar to query.`

			`"""`
			`if self.embedding_func is None:`
			`raise ValueError("embedding_func is None!!!")`
			`embeddings = self.embedding_func.embed_query(query)`
			`docs = self.similarity_search_by_vector(embeddings, k)`
			`return docs`

			`def similarity_search_by_vector(`
			`self,`
			`embedding: List[float],`
			`k: int = DEFAULT_TOPN,`
			`**kwargs: Any,`
			`) -> List[Document]:`
			`"""The most k similar documents and scores of the specified query.`
			`Args:`
			`embeddings: embedding vector of the query.`
			`k: The k most similar documents to the text query.`
			`min_score: the score of similar documents to the text query`
			`Returns:`
			`The k most similar documents to the specified text query.`
			`0 is dissimilar, 1 is the most similar.`
			`"""`
			`embed = np.array(embedding)`
			`if self.flag:`
			`query_data = {`
			`"query": {`
			`"sum": [`
			`{`
			`"field": "text_embedding",`
			`"feature": (embed / np.linalg.norm(embed)).tolist(),`
			`}`
			`],`
			`},`
			`"size": k,`
			`"fields": ["text", "metadata"],`
			`}`
			`query_result = self.vearch.search(`
			`self.using_db_name, self.using_table_name, query_data`
			`)`
			`res = query_result["hits"]["hits"]`
			`else:`
			`query_data = {`
			`"vector": [`
			`{`
			`"field": "text_embedding",`
			`"feature": embed / np.linalg.norm(embed),`
			`}`
			`],`
			`"fields": [],`
			`"is_brute_search": 1,`
			`"retrieval_param": {"metric_type": "InnerProduct", "nprobe": 20},`
			`"topn": k,`
			`}`
			`query_result = self.vearch.search(query_data)`
			`res = query_result[0]["result_items"]`
			`docs = []`
			`for item in res:`
			`content = ""`
			`meta_data = {}`
			`if self.flag:`
			`item = item["_source"]`
			`for item_key in item:`
			`if item_key == "text":`
			`content = item[item_key]`
			`continue`
			`if item_key == "metadata":`
			`meta_data["source"] = item[item_key]`
			`continue`
			`docs.append(Document(page_content=content, metadata=meta_data))`
			`return docs`

			`def similarity_search_with_score(`
			`self,`
			`query: str,`
			`k: int = DEFAULT_TOPN,`
			`**kwargs: Any,`
			`) -> List[Tuple[Document, float]]:`
			`"""The most k similar documents and scores of the specified query.`
			`Args:`
			`embeddings: embedding vector of the query.`
			`k: The k most similar documents to the text query.`
			`min_score: the score of similar documents to the text query`
			`Returns:`
			`The k most similar documents to the specified text query.`
			`0 is dissimilar, 1 is the most similar.`
			`"""`
			`if self.embedding_func is None:`
			`raise ValueError("embedding_func is None!!!")`
			`embeddings = self.embedding_func.embed_query(query)`
			`embed = np.array(embeddings)`
			`if self.flag:`
			`query_data = {`
			`"query": {`
			`"sum": [`
			`{`
			`"field": "text_embedding",`
			`"feature": (embed / np.linalg.norm(embed)).tolist(),`
			`}`
			`],`
			`},`
			`"size": k,`
			`"fields": ["text_embedding", "text", "metadata"],`
			`}`
			`query_result = self.vearch.search(`
			`self.using_db_name, self.using_table_name, query_data`
			`)`
			`res = query_result["hits"]["hits"]`
			`else:`
			`query_data = {`
			`"vector": [`
			`{`
			`"field": "text_embedding",`
			`"feature": embed / np.linalg.norm(embed),`
			`}`
			`],`
			`"fields": [],`
			`"is_brute_search": 1,`
			`"retrieval_param": {"metric_type": "InnerProduct", "nprobe": 20},`
			`"topn": k,`
			`}`
			`query_result = self.vearch.search(query_data)`
			`res = query_result[0]["result_items"]`
			`results: List[Tuple[Document, float]] = []`
			`for item in res:`
			`content = ""`
			`meta_data = {}`
			`if self.flag:`
			`score = item["_score"]`
			`item = item["_source"]`
			`for item_key in item:`
			`if item_key == "text":`
			`content = item[item_key]`
			`continue`
			`if item_key == "metadata":`
			`meta_data["source"] = item[item_key]`
			`continue`
			`if self.flag != 1 and item_key == "score":`
			`score = item[item_key]`
			`continue`
			`tmp_res = (Document(page_content=content, metadata=meta_data), score)`
			`results.append(tmp_res)`
			`return results`

			`def _similarity_search_with_relevance_scores(`
			`self,`
			`query: str,`
			`k: int = 4,`
			`**kwargs: Any,`
			`) -> List[Tuple[Document, float]]:`
			`return self.similarity_search_with_score(query, k, **kwargs)`

			`def delete(`
			`self,`
			`ids: Optional[List[str]] = None,`
			`**kwargs: Any,`
			`) -> Optional[bool]:`
			`"""Delete the documents which have the specified ids.`

			`Args:`
			`ids: The ids of the embedding vectors.`
			`**kwargs: Other keyword arguments that subclasses might use.`
			`Returns:`
			`Optional[bool]: True if deletion is successful.`
			`False otherwise, None if not implemented.`
			`"""`

			`ret: Optional[bool] = None`
			`tmp_res = []`
			`if ids is None or ids.__len__() == 0:`
			`return ret`
			`for _id in ids:`
			`if self.flag:`
			`ret = self.vearch.delete(self.using_db_name, self.using_table_name, _id)`
			`else:`
			`ret = self.vearch.del_doc(_id)`
			`tmp_res.append(ret)`
			`ret = all(i == 0 for i in tmp_res)`
			`return ret`

			`def get(`
			`self,`
			`ids: Optional[List[str]] = None,`
			`**kwargs: Any,`
			`) -> Dict[str, Document]:`
			`"""Return docs according ids.`

			`Args:`
			`ids: The ids of the embedding vectors.`
			`Returns:`
			`Documents which satisfy the input conditions.`
			`"""`

			`results: Dict[str, Document] = {}`
			`if ids is None or ids.__len__() == 0:`
			`return results`
			`if self.flag:`
			`query_data = {"query": {"ids": ids}}`
			`docs_detail = self.vearch.mget_by_ids(`
			`self.using_db_name, self.using_table_name, query_data`
			`)`
			`for record in docs_detail:`
			`if record["found"] is False:`
			`continue`
			`content = ""`
			`meta_info = {}`
			`for field in record["_source"]:`
			`if field == "text":`
			`content = record["_source"][field]`
			`continue`
			`elif field == "metadata":`
			`meta_info["source"] = record["_source"][field]`
			`continue`
			`results[record["_id"]] = Document(`
			`page_content=content, metadata=meta_info`
			`)`
			`else:`
			`for id in ids:`
			`docs_detail = self.vearch.get_doc_by_id(id)`
			`if docs_detail == {}:`
			`continue`
			`content = ""`
			`meta_info = {}`
			`for field in docs_detail:`
			`if field == "text":`
			`content = docs_detail[field]`
			`continue`
			`elif field == "metadata":`
			`meta_info["source"] = docs_detail[field]`
			`continue`
			`results[docs_detail["_id"]] = Document(`
			`page_content=content, metadata=meta_info`
			`)`
			`return results`