mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
679 lines
23 KiB
Python
679 lines
23 KiB
Python
|
from __future__ import annotations
|
||
|
|
||
|
import logging
|
||
|
import warnings
|
||
|
from dataclasses import asdict, dataclass
|
||
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
|
||
|
|
||
|
from langchain_core.documents import Document
|
||
|
from langchain_core.embeddings import Embeddings
|
||
|
from langchain_core.vectorstores import VectorStore
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from zep_python.document import Document as ZepDocument
|
||
|
from zep_python.document import DocumentCollection
|
||
|
|
||
|
|
||
|
logger = logging.getLogger()
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class CollectionConfig:
|
||
|
"""Configuration for a `Zep Collection`.
|
||
|
|
||
|
If the collection does not exist, it will be created.
|
||
|
|
||
|
Attributes:
|
||
|
name (str): The name of the collection.
|
||
|
description (Optional[str]): An optional description of the collection.
|
||
|
metadata (Optional[Dict[str, Any]]): Optional metadata for the collection.
|
||
|
embedding_dimensions (int): The number of dimensions for the embeddings in
|
||
|
the collection. This should match the Zep server configuration
|
||
|
if auto-embed is true.
|
||
|
is_auto_embedded (bool): A flag indicating whether the collection is
|
||
|
automatically embedded by Zep.
|
||
|
"""
|
||
|
|
||
|
name: str
|
||
|
description: Optional[str]
|
||
|
metadata: Optional[Dict[str, Any]]
|
||
|
embedding_dimensions: int
|
||
|
is_auto_embedded: bool
|
||
|
|
||
|
|
||
|
class ZepVectorStore(VectorStore):
|
||
|
"""`Zep` vector store.
|
||
|
|
||
|
It provides methods for adding texts or documents to the store,
|
||
|
searching for similar documents, and deleting documents.
|
||
|
|
||
|
Search scores are calculated using cosine similarity normalized to [0, 1].
|
||
|
|
||
|
Args:
|
||
|
api_url (str): The URL of the Zep API.
|
||
|
collection_name (str): The name of the collection in the Zep store.
|
||
|
api_key (Optional[str]): The API key for the Zep API.
|
||
|
config (Optional[CollectionConfig]): The configuration for the collection.
|
||
|
Required if the collection does not already exist.
|
||
|
embedding (Optional[Embeddings]): Optional embedding function to use to
|
||
|
embed the texts. Required if the collection is not auto-embedded.
|
||
|
"""
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
collection_name: str,
|
||
|
api_url: str,
|
||
|
*,
|
||
|
api_key: Optional[str] = None,
|
||
|
config: Optional[CollectionConfig] = None,
|
||
|
embedding: Optional[Embeddings] = None,
|
||
|
) -> None:
|
||
|
super().__init__()
|
||
|
if not collection_name:
|
||
|
raise ValueError(
|
||
|
"collection_name must be specified when using ZepVectorStore."
|
||
|
)
|
||
|
try:
|
||
|
from zep_python import ZepClient
|
||
|
except ImportError:
|
||
|
raise ImportError(
|
||
|
"Could not import zep-python python package. "
|
||
|
"Please install it with `pip install zep-python`."
|
||
|
)
|
||
|
self._client = ZepClient(api_url, api_key=api_key)
|
||
|
|
||
|
self.collection_name = collection_name
|
||
|
# If for some reason the collection name is not the same as the one in the
|
||
|
# config, update it.
|
||
|
if config and config.name != self.collection_name:
|
||
|
config.name = self.collection_name
|
||
|
|
||
|
self._collection_config = config
|
||
|
self._collection = self._load_collection()
|
||
|
self._embedding = embedding
|
||
|
|
||
|
# self.add_texts(texts, metadatas=metadatas, **kwargs)
|
||
|
|
||
|
@property
|
||
|
def embeddings(self) -> Optional[Embeddings]:
|
||
|
"""Access the query embedding object if available."""
|
||
|
return self._embedding
|
||
|
|
||
|
def _load_collection(self) -> DocumentCollection:
|
||
|
"""
|
||
|
Load the collection from the Zep backend.
|
||
|
"""
|
||
|
from zep_python import NotFoundError
|
||
|
|
||
|
try:
|
||
|
collection = self._client.document.get_collection(self.collection_name)
|
||
|
except NotFoundError:
|
||
|
logger.info(
|
||
|
f"Collection {self.collection_name} not found. Creating new collection."
|
||
|
)
|
||
|
collection = self._create_collection()
|
||
|
|
||
|
return collection
|
||
|
|
||
|
def _create_collection(self) -> DocumentCollection:
|
||
|
"""
|
||
|
Create a new collection in the Zep backend.
|
||
|
"""
|
||
|
if not self._collection_config:
|
||
|
raise ValueError(
|
||
|
"Collection config must be specified when creating a new collection."
|
||
|
)
|
||
|
collection = self._client.document.add_collection(
|
||
|
**asdict(self._collection_config)
|
||
|
)
|
||
|
return collection
|
||
|
|
||
|
def _generate_documents_to_add(
|
||
|
self,
|
||
|
texts: Iterable[str],
|
||
|
metadatas: Optional[List[Dict[Any, Any]]] = None,
|
||
|
document_ids: Optional[List[str]] = None,
|
||
|
) -> List[ZepDocument]:
|
||
|
from zep_python.document import Document as ZepDocument
|
||
|
|
||
|
embeddings = None
|
||
|
if self._collection and self._collection.is_auto_embedded:
|
||
|
if self._embedding is not None:
|
||
|
warnings.warn(
|
||
|
"""The collection is set to auto-embed and an embedding
|
||
|
function is present. Ignoring the embedding function.""",
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
elif self._embedding is not None:
|
||
|
embeddings = self._embedding.embed_documents(list(texts))
|
||
|
if self._collection and self._collection.embedding_dimensions != len(
|
||
|
embeddings[0]
|
||
|
):
|
||
|
raise ValueError(
|
||
|
"The embedding dimensions of the collection and the embedding"
|
||
|
" function do not match. Collection dimensions:"
|
||
|
f" {self._collection.embedding_dimensions}, Embedding dimensions:"
|
||
|
f" {len(embeddings[0])}"
|
||
|
)
|
||
|
else:
|
||
|
pass
|
||
|
|
||
|
documents: List[ZepDocument] = []
|
||
|
for i, d in enumerate(texts):
|
||
|
documents.append(
|
||
|
ZepDocument(
|
||
|
content=d,
|
||
|
metadata=metadatas[i] if metadatas else None,
|
||
|
document_id=document_ids[i] if document_ids else None,
|
||
|
embedding=embeddings[i] if embeddings else None,
|
||
|
)
|
||
|
)
|
||
|
return documents
|
||
|
|
||
|
def add_texts(
|
||
|
self,
|
||
|
texts: Iterable[str],
|
||
|
metadatas: Optional[List[Dict[str, Any]]] = None,
|
||
|
document_ids: Optional[List[str]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[str]:
|
||
|
"""Run more texts through the embeddings and add to the vectorstore.
|
||
|
|
||
|
Args:
|
||
|
texts: Iterable of strings to add to the vectorstore.
|
||
|
metadatas: Optional list of metadatas associated with the texts.
|
||
|
document_ids: Optional list of document ids associated with the texts.
|
||
|
kwargs: vectorstore specific parameters
|
||
|
|
||
|
Returns:
|
||
|
List of ids from adding the texts into the vectorstore.
|
||
|
"""
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
documents = self._generate_documents_to_add(texts, metadatas, document_ids)
|
||
|
uuids = self._collection.add_documents(documents)
|
||
|
|
||
|
return uuids
|
||
|
|
||
|
async def aadd_texts(
|
||
|
self,
|
||
|
texts: Iterable[str],
|
||
|
metadatas: Optional[List[Dict[str, Any]]] = None,
|
||
|
document_ids: Optional[List[str]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[str]:
|
||
|
"""Run more texts through the embeddings and add to the vectorstore."""
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
documents = self._generate_documents_to_add(texts, metadatas, document_ids)
|
||
|
uuids = await self._collection.aadd_documents(documents)
|
||
|
|
||
|
return uuids
|
||
|
|
||
|
def search(
|
||
|
self,
|
||
|
query: str,
|
||
|
search_type: str,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
k: int = 3,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs most similar to query using specified search type."""
|
||
|
if search_type == "similarity":
|
||
|
return self.similarity_search(query, k=k, metadata=metadata, **kwargs)
|
||
|
elif search_type == "mmr":
|
||
|
return self.max_marginal_relevance_search(
|
||
|
query, k=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
else:
|
||
|
raise ValueError(
|
||
|
f"search_type of {search_type} not allowed. Expected "
|
||
|
"search_type to be 'similarity' or 'mmr'."
|
||
|
)
|
||
|
|
||
|
async def asearch(
|
||
|
self,
|
||
|
query: str,
|
||
|
search_type: str,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
k: int = 3,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs most similar to query using specified search type."""
|
||
|
if search_type == "similarity":
|
||
|
return await self.asimilarity_search(
|
||
|
query, k=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
elif search_type == "mmr":
|
||
|
return await self.amax_marginal_relevance_search(
|
||
|
query, k=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
else:
|
||
|
raise ValueError(
|
||
|
f"search_type of {search_type} not allowed. Expected "
|
||
|
"search_type to be 'similarity' or 'mmr'."
|
||
|
)
|
||
|
|
||
|
def similarity_search(
|
||
|
self,
|
||
|
query: str,
|
||
|
k: int = 4,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs most similar to query."""
|
||
|
|
||
|
results = self._similarity_search_with_relevance_scores(
|
||
|
query, k=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
return [doc for doc, _ in results]
|
||
|
|
||
|
def similarity_search_with_score(
|
||
|
self,
|
||
|
query: str,
|
||
|
k: int = 4,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Tuple[Document, float]]:
|
||
|
"""Run similarity search with distance."""
|
||
|
|
||
|
return self._similarity_search_with_relevance_scores(
|
||
|
query, k=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
|
||
|
def _similarity_search_with_relevance_scores(
|
||
|
self,
|
||
|
query: str,
|
||
|
k: int = 4,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Tuple[Document, float]]:
|
||
|
"""
|
||
|
Default similarity search with relevance scores. Modify if necessary
|
||
|
in subclass.
|
||
|
Return docs and relevance scores in the range [0, 1].
|
||
|
|
||
|
0 is dissimilar, 1 is most similar.
|
||
|
|
||
|
Args:
|
||
|
query: input text
|
||
|
k: Number of Documents to return. Defaults to 4.
|
||
|
metadata: Optional, metadata filter
|
||
|
**kwargs: kwargs to be passed to similarity search. Should include:
|
||
|
score_threshold: Optional, a floating point value between 0 to 1 and
|
||
|
filter the resulting set of retrieved docs
|
||
|
|
||
|
Returns:
|
||
|
List of Tuples of (doc, similarity_score)
|
||
|
"""
|
||
|
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
if not self._collection.is_auto_embedded and self._embedding:
|
||
|
query_vector = self._embedding.embed_query(query)
|
||
|
results = self._collection.search(
|
||
|
embedding=query_vector, limit=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
else:
|
||
|
results = self._collection.search(
|
||
|
query, limit=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
|
||
|
return [
|
||
|
(
|
||
|
Document(
|
||
|
page_content=doc.content,
|
||
|
metadata=doc.metadata,
|
||
|
),
|
||
|
doc.score or 0.0,
|
||
|
)
|
||
|
for doc in results
|
||
|
]
|
||
|
|
||
|
async def asimilarity_search_with_relevance_scores(
|
||
|
self,
|
||
|
query: str,
|
||
|
k: int = 4,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Tuple[Document, float]]:
|
||
|
"""Return docs most similar to query."""
|
||
|
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
if not self._collection.is_auto_embedded and self._embedding:
|
||
|
query_vector = self._embedding.embed_query(query)
|
||
|
results = await self._collection.asearch(
|
||
|
embedding=query_vector, limit=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
else:
|
||
|
results = await self._collection.asearch(
|
||
|
query, limit=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
|
||
|
return [
|
||
|
(
|
||
|
Document(
|
||
|
page_content=doc.content,
|
||
|
metadata=doc.metadata,
|
||
|
),
|
||
|
doc.score or 0.0,
|
||
|
)
|
||
|
for doc in results
|
||
|
]
|
||
|
|
||
|
async def asimilarity_search(
|
||
|
self,
|
||
|
query: str,
|
||
|
k: int = 4,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs most similar to query."""
|
||
|
|
||
|
results = await self.asimilarity_search_with_relevance_scores(
|
||
|
query, k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
|
||
|
return [doc for doc, _ in results]
|
||
|
|
||
|
def similarity_search_by_vector(
|
||
|
self,
|
||
|
embedding: List[float],
|
||
|
k: int = 4,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs most similar to embedding vector.
|
||
|
|
||
|
Args:
|
||
|
embedding: Embedding to look up documents similar to.
|
||
|
k: Number of Documents to return. Defaults to 4.
|
||
|
metadata: Optional, metadata filter
|
||
|
|
||
|
Returns:
|
||
|
List of Documents most similar to the query vector.
|
||
|
"""
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
results = self._collection.search(
|
||
|
embedding=embedding, limit=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
|
||
|
return [
|
||
|
Document(
|
||
|
page_content=doc.content,
|
||
|
metadata=doc.metadata,
|
||
|
)
|
||
|
for doc in results
|
||
|
]
|
||
|
|
||
|
async def asimilarity_search_by_vector(
|
||
|
self,
|
||
|
embedding: List[float],
|
||
|
k: int = 4,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs most similar to embedding vector."""
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
results = self._collection.search(
|
||
|
embedding=embedding, limit=k, metadata=metadata, **kwargs
|
||
|
)
|
||
|
|
||
|
return [
|
||
|
Document(
|
||
|
page_content=doc.content,
|
||
|
metadata=doc.metadata,
|
||
|
)
|
||
|
for doc in results
|
||
|
]
|
||
|
|
||
|
def max_marginal_relevance_search(
|
||
|
self,
|
||
|
query: str,
|
||
|
k: int = 4,
|
||
|
fetch_k: int = 20,
|
||
|
lambda_mult: float = 0.5,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs selected using the maximal marginal relevance.
|
||
|
|
||
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||
|
among selected documents.
|
||
|
|
||
|
Args:
|
||
|
query: Text to look up documents similar to.
|
||
|
k: Number of Documents to return. Defaults to 4.
|
||
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||
|
Zep determines this automatically and this parameter is
|
||
|
ignored.
|
||
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||
|
of diversity among the results with 0 corresponding
|
||
|
to maximum diversity and 1 to minimum diversity.
|
||
|
Defaults to 0.5.
|
||
|
metadata: Optional, metadata to filter the resulting set of retrieved docs
|
||
|
Returns:
|
||
|
List of Documents selected by maximal marginal relevance.
|
||
|
"""
|
||
|
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
if not self._collection.is_auto_embedded and self._embedding:
|
||
|
query_vector = self._embedding.embed_query(query)
|
||
|
results = self._collection.search(
|
||
|
embedding=query_vector,
|
||
|
limit=k,
|
||
|
metadata=metadata,
|
||
|
search_type="mmr",
|
||
|
mmr_lambda=lambda_mult,
|
||
|
**kwargs,
|
||
|
)
|
||
|
else:
|
||
|
results, query_vector = self._collection.search_return_query_vector(
|
||
|
query,
|
||
|
limit=k,
|
||
|
metadata=metadata,
|
||
|
search_type="mmr",
|
||
|
mmr_lambda=lambda_mult,
|
||
|
**kwargs,
|
||
|
)
|
||
|
|
||
|
return [Document(page_content=d.content, metadata=d.metadata) for d in results]
|
||
|
|
||
|
async def amax_marginal_relevance_search(
|
||
|
self,
|
||
|
query: str,
|
||
|
k: int = 4,
|
||
|
fetch_k: int = 20,
|
||
|
lambda_mult: float = 0.5,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs selected using the maximal marginal relevance."""
|
||
|
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
if not self._collection.is_auto_embedded and self._embedding:
|
||
|
query_vector = self._embedding.embed_query(query)
|
||
|
results = await self._collection.asearch(
|
||
|
embedding=query_vector,
|
||
|
limit=k,
|
||
|
metadata=metadata,
|
||
|
search_type="mmr",
|
||
|
mmr_lambda=lambda_mult,
|
||
|
**kwargs,
|
||
|
)
|
||
|
else:
|
||
|
results, query_vector = await self._collection.asearch_return_query_vector(
|
||
|
query,
|
||
|
limit=k,
|
||
|
metadata=metadata,
|
||
|
search_type="mmr",
|
||
|
mmr_lambda=lambda_mult,
|
||
|
**kwargs,
|
||
|
)
|
||
|
|
||
|
return [Document(page_content=d.content, metadata=d.metadata) for d in results]
|
||
|
|
||
|
def max_marginal_relevance_search_by_vector(
|
||
|
self,
|
||
|
embedding: List[float],
|
||
|
k: int = 4,
|
||
|
fetch_k: int = 20,
|
||
|
lambda_mult: float = 0.5,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs selected using the maximal marginal relevance.
|
||
|
|
||
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||
|
among selected documents.
|
||
|
|
||
|
Args:
|
||
|
embedding: Embedding to look up documents similar to.
|
||
|
k: Number of Documents to return. Defaults to 4.
|
||
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||
|
Zep determines this automatically and this parameter is
|
||
|
ignored.
|
||
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||
|
of diversity among the results with 0 corresponding
|
||
|
to maximum diversity and 1 to minimum diversity.
|
||
|
Defaults to 0.5.
|
||
|
metadata: Optional, metadata to filter the resulting set of retrieved docs
|
||
|
Returns:
|
||
|
List of Documents selected by maximal marginal relevance.
|
||
|
"""
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
results = self._collection.search(
|
||
|
embedding=embedding,
|
||
|
limit=k,
|
||
|
metadata=metadata,
|
||
|
search_type="mmr",
|
||
|
mmr_lambda=lambda_mult,
|
||
|
**kwargs,
|
||
|
)
|
||
|
|
||
|
return [Document(page_content=d.content, metadata=d.metadata) for d in results]
|
||
|
|
||
|
async def amax_marginal_relevance_search_by_vector(
|
||
|
self,
|
||
|
embedding: List[float],
|
||
|
k: int = 4,
|
||
|
fetch_k: int = 20,
|
||
|
lambda_mult: float = 0.5,
|
||
|
metadata: Optional[Dict[str, Any]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> List[Document]:
|
||
|
"""Return docs selected using the maximal marginal relevance."""
|
||
|
if not self._collection:
|
||
|
raise ValueError(
|
||
|
"collection should be an instance of a Zep DocumentCollection"
|
||
|
)
|
||
|
|
||
|
results = await self._collection.asearch(
|
||
|
embedding=embedding,
|
||
|
limit=k,
|
||
|
metadata=metadata,
|
||
|
search_type="mmr",
|
||
|
mmr_lambda=lambda_mult,
|
||
|
**kwargs,
|
||
|
)
|
||
|
|
||
|
return [Document(page_content=d.content, metadata=d.metadata) for d in results]
|
||
|
|
||
|
@classmethod
|
||
|
def from_texts(
|
||
|
cls,
|
||
|
texts: List[str],
|
||
|
embedding: Optional[Embeddings] = None,
|
||
|
metadatas: Optional[List[dict]] = None,
|
||
|
collection_name: str = "",
|
||
|
api_url: str = "",
|
||
|
api_key: Optional[str] = None,
|
||
|
config: Optional[CollectionConfig] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> ZepVectorStore:
|
||
|
"""
|
||
|
Class method that returns a ZepVectorStore instance initialized from texts.
|
||
|
|
||
|
If the collection does not exist, it will be created.
|
||
|
|
||
|
Args:
|
||
|
texts (List[str]): The list of texts to add to the vectorstore.
|
||
|
embedding (Optional[Embeddings]): Optional embedding function to use to
|
||
|
embed the texts.
|
||
|
metadatas (Optional[List[Dict[str, Any]]]): Optional list of metadata
|
||
|
associated with the texts.
|
||
|
collection_name (str): The name of the collection in the Zep store.
|
||
|
api_url (str): The URL of the Zep API.
|
||
|
api_key (Optional[str]): The API key for the Zep API.
|
||
|
config (Optional[CollectionConfig]): The configuration for the collection.
|
||
|
**kwargs: Additional parameters specific to the vectorstore.
|
||
|
|
||
|
Returns:
|
||
|
ZepVectorStore: An instance of ZepVectorStore.
|
||
|
"""
|
||
|
vecstore = cls(
|
||
|
collection_name,
|
||
|
api_url,
|
||
|
api_key=api_key,
|
||
|
config=config,
|
||
|
embedding=embedding,
|
||
|
)
|
||
|
vecstore.add_texts(texts, metadatas)
|
||
|
return vecstore
|
||
|
|
||
|
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
|
||
|
"""Delete by Zep vector UUIDs.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
ids : Optional[List[str]]
|
||
|
The UUIDs of the vectors to delete.
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
ValueError
|
||
|
If no UUIDs are provided.
|
||
|
"""
|
||
|
|
||
|
if ids is None or len(ids) == 0:
|
||
|
raise ValueError("No uuids provided to delete.")
|
||
|
|
||
|
if self._collection is None:
|
||
|
raise ValueError("No collection name provided.")
|
||
|
|
||
|
for u in ids:
|
||
|
self._collection.delete_document(u)
|