forked from Archives/langchain
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
70 lines
2.4 KiB
Python
70 lines
2.4 KiB
Python
"""Wrapper around in-memory storage."""
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Dict, List, Literal, Optional
|
|
|
|
from langchain.embeddings.base import Embeddings
|
|
from langchain.vectorstores.docarray.base import (
|
|
DocArrayIndex,
|
|
_check_docarray_import,
|
|
)
|
|
|
|
|
|
class DocArrayInMemorySearch(DocArrayIndex):
|
|
"""Wrapper around in-memory storage for exact search.
|
|
|
|
To use it, you should have the ``docarray`` package with version >=0.31.0 installed.
|
|
You can install it with `pip install "langchain[in_memory_store]"`.
|
|
"""
|
|
|
|
@classmethod
|
|
def from_params(
|
|
cls,
|
|
embedding: Embeddings,
|
|
metric: Literal[
|
|
"cosine_sim", "euclidian_dist", "sgeuclidean_dist"
|
|
] = "cosine_sim",
|
|
**kwargs: Any,
|
|
) -> DocArrayInMemorySearch:
|
|
"""Initialize DocArrayInMemorySearch store.
|
|
|
|
Args:
|
|
embedding (Embeddings): Embedding function.
|
|
metric (str): metric for exact nearest-neighbor search.
|
|
Can be one of: "cosine_sim", "euclidean_dist" and "sqeuclidean_dist".
|
|
Defaults to "cosine_sim".
|
|
**kwargs: Other keyword arguments to be passed to the get_doc_cls method.
|
|
"""
|
|
_check_docarray_import()
|
|
from docarray.index import InMemoryExactNNIndex
|
|
|
|
doc_cls = cls._get_doc_cls(space=metric, **kwargs)
|
|
doc_index = InMemoryExactNNIndex[doc_cls]() # type: ignore
|
|
return cls(doc_index, embedding)
|
|
|
|
@classmethod
|
|
def from_texts(
|
|
cls,
|
|
texts: List[str],
|
|
embedding: Embeddings,
|
|
metadatas: Optional[List[Dict[Any, Any]]] = None,
|
|
**kwargs: Any,
|
|
) -> DocArrayInMemorySearch:
|
|
"""Create an DocArrayInMemorySearch store and insert data.
|
|
|
|
Args:
|
|
texts (List[str]): Text data.
|
|
embedding (Embeddings): Embedding function.
|
|
metadatas (Optional[List[Dict[Any, Any]]]): Metadata for each text
|
|
if it exists. Defaults to None.
|
|
metric (str): metric for exact nearest-neighbor search.
|
|
Can be one of: "cosine_sim", "euclidean_dist" and "sqeuclidean_dist".
|
|
Defaults to "cosine_sim".
|
|
|
|
Returns:
|
|
DocArrayInMemorySearch Vector Store
|
|
"""
|
|
store = cls.from_params(embedding, **kwargs)
|
|
store.add_texts(texts=texts, metadatas=metadatas)
|
|
return store
|