mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
Added filter and delete all option to delete function in Pinecone integration, updated base VectorStore's delete function (#6876)
### Description: Updated the delete function in the Pinecone integration to allow for deletion of vectors by specifying a filter condition, and to delete all vectors in a namespace. Made the ids parameter optional in the delete function in the base VectorStore class and allowed for additional keyword arguments. Updated the delete function in several classes (Redis, Chroma, Supabase, Deeplake, Elastic, Weaviate, and Cassandra) to match the changes made in the base VectorStore class. This involved making the ids parameter optional and allowing for additional keyword arguments.
This commit is contained in:
parent
5a45363954
commit
e41b382e1c
@ -53,20 +53,19 @@ class VectorStore(ABC):
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
|
||||
def delete(self, ids: List[str]) -> Optional[bool]:
|
||||
"""Delete by vector ID.
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
||||
"""Delete by vector ID or other criteria.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
**kwargs: Other keyword arguments that subclasses might use.
|
||||
|
||||
Returns:
|
||||
Optional[bool]: True if deletion is successful,
|
||||
False otherwise, None if not implemented.
|
||||
"""
|
||||
|
||||
raise NotImplementedError(
|
||||
"delete_by_id method must be implemented by subclass."
|
||||
)
|
||||
raise NotImplementedError("delete method must be implemented by subclass.")
|
||||
|
||||
async def aadd_texts(
|
||||
self,
|
||||
|
@ -91,8 +91,9 @@ class Cassandra(VectorStore):
|
||||
def delete_by_document_id(self, document_id: str) -> None:
|
||||
return self.table.delete(document_id)
|
||||
|
||||
def delete(self, ids: List[str]) -> Optional[bool]:
|
||||
"""Delete by vector ID.
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
||||
"""Delete by vector IDs.
|
||||
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
@ -101,6 +102,10 @@ class Cassandra(VectorStore):
|
||||
Optional[bool]: True if deletion is successful,
|
||||
False otherwise, None if not implemented.
|
||||
"""
|
||||
|
||||
if ids is None:
|
||||
raise ValueError("No ids provided to delete.")
|
||||
|
||||
for document_id in ids:
|
||||
self.delete_by_document_id(document_id)
|
||||
return True
|
||||
|
@ -470,7 +470,7 @@ class Chroma(VectorStore):
|
||||
client=client,
|
||||
)
|
||||
|
||||
def delete(self, ids: List[str]) -> None:
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
|
||||
"""Delete by vector IDs.
|
||||
|
||||
Args:
|
||||
|
@ -744,30 +744,23 @@ class DeepLake(VectorStore):
|
||||
)
|
||||
return deeplake_dataset
|
||||
|
||||
def delete(
|
||||
self,
|
||||
ids: Any[List[str], None] = None,
|
||||
filter: Any[Dict[str, str], None] = None,
|
||||
delete_all: Any[bool, None] = None,
|
||||
) -> bool:
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> bool:
|
||||
"""Delete the entities in the dataset.
|
||||
|
||||
Args:
|
||||
ids (Optional[List[str]], optional): The document_ids to delete.
|
||||
Defaults to None.
|
||||
filter (Optional[Dict[str, str]], optional): The filter to delete by.
|
||||
Defaults to None.
|
||||
delete_all (Optional[bool], optional): Whether to drop the dataset.
|
||||
Defaults to None.
|
||||
**kwargs: Other keyword arguments that subclasses might use.
|
||||
- filter (Optional[Dict[str, str]], optional): The filter to delete by.
|
||||
- delete_all (Optional[bool], optional): Whether to drop the dataset.
|
||||
|
||||
Returns:
|
||||
bool: Whether the delete operation was successful.
|
||||
"""
|
||||
self.vectorstore.delete(
|
||||
ids=ids,
|
||||
filter=filter,
|
||||
delete_all=delete_all,
|
||||
)
|
||||
filter = kwargs.get("filter")
|
||||
delete_all = kwargs.get("delete_all")
|
||||
|
||||
self.vectorstore.delete(ids=ids, filter=filter, delete_all=delete_all)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -317,13 +317,16 @@ class ElasticVectorSearch(VectorStore, ABC):
|
||||
)
|
||||
return response
|
||||
|
||||
def delete(self, ids: List[str]) -> None:
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
|
||||
"""Delete by vector IDs.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
"""
|
||||
|
||||
if ids is None:
|
||||
raise ValueError("No ids provided to delete.")
|
||||
|
||||
# TODO: Check if this can be done in bulk
|
||||
for id in ids:
|
||||
self.client.delete(index=self.index_name, id=id)
|
||||
|
@ -354,16 +354,33 @@ class Pinecone(VectorStore):
|
||||
pinecone.Index(index_name), embedding.embed_query, text_key, namespace
|
||||
)
|
||||
|
||||
def delete(self, ids: List[str], namespace: Optional[str] = None) -> None:
|
||||
"""Delete by vector IDs.
|
||||
def delete(
|
||||
self,
|
||||
ids: Optional[List[str]] = None,
|
||||
delete_all: Optional[bool] = None,
|
||||
namespace: Optional[str] = None,
|
||||
filter: Optional[dict] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Delete by vector IDs or filter.
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
filter: Dictionary of conditions to filter vectors to delete.
|
||||
"""
|
||||
|
||||
# This is the maximum number of IDs that can be deleted
|
||||
if namespace is None:
|
||||
namespace = self._namespace
|
||||
|
||||
if delete_all:
|
||||
self._index.delete(delete_all=True, namespace=namespace, **kwargs)
|
||||
elif ids is not None:
|
||||
chunk_size = 1000
|
||||
for i in range(0, len(ids), chunk_size):
|
||||
chunk = ids[i : i + chunk_size]
|
||||
self._index.delete(ids=chunk, namespace=namespace)
|
||||
self._index.delete(ids=chunk, namespace=namespace, **kwargs)
|
||||
elif filter is not None:
|
||||
self._index.delete(filter=filter, namespace=namespace, **kwargs)
|
||||
else:
|
||||
raise ValueError("Either ids, delete_all, or filter must be provided.")
|
||||
|
||||
return None
|
||||
|
@ -469,7 +469,7 @@ class Redis(VectorStore):
|
||||
|
||||
@staticmethod
|
||||
def delete(
|
||||
ids: List[str],
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> bool:
|
||||
"""
|
||||
|
@ -346,12 +346,16 @@ class SupabaseVectorStore(VectorStore):
|
||||
)
|
||||
return docs
|
||||
|
||||
def delete(self, ids: List[str]) -> None:
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
|
||||
"""Delete by vector IDs.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
"""
|
||||
|
||||
if ids is None:
|
||||
raise ValueError("No ids provided to delete.")
|
||||
|
||||
rows: List[dict[str, Any]] = [
|
||||
{
|
||||
"id": id,
|
||||
|
@ -470,13 +470,16 @@ class Weaviate(VectorStore):
|
||||
by_text=by_text,
|
||||
)
|
||||
|
||||
def delete(self, ids: List[str]) -> None:
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
|
||||
"""Delete by vector IDs.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
"""
|
||||
|
||||
if ids is None:
|
||||
raise ValueError("No ids provided to delete.")
|
||||
|
||||
# TODO: Check if this can be done in bulk
|
||||
for id in ids:
|
||||
self._client.data_object.delete(uuid=id)
|
||||
|
Loading…
Reference in New Issue
Block a user