Added filter and delete all option to delete function in Pinecone integration, updated base VectorStore's delete function (#6876)

### Description:
Updated the delete function in the Pinecone integration to allow for
deletion of vectors by specifying a filter condition, and to delete all
vectors in a namespace.

Made the ids parameter optional in the delete function in the base
VectorStore class and allowed for additional keyword arguments.

Updated the delete function in several classes (Redis, Chroma, Supabase,
Deeplake, Elastic, Weaviate, and Cassandra) to match the changes made in
the base VectorStore class. This involved making the ids parameter
optional and allowing for additional keyword arguments.
This commit is contained in:
0xcha05 2023-07-03 00:16:19 +05:30 committed by GitHub
parent 5a45363954
commit e41b382e1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 58 additions and 34 deletions

View File

@ -53,20 +53,19 @@ class VectorStore(ABC):
List of ids from adding the texts into the vectorstore.
"""
def delete(self, ids: List[str]) -> Optional[bool]:
"""Delete by vector ID.
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
"""Delete by vector ID or other criteria.
Args:
ids: List of ids to delete.
**kwargs: Other keyword arguments that subclasses might use.
Returns:
Optional[bool]: True if deletion is successful,
False otherwise, None if not implemented.
"""
raise NotImplementedError(
"delete_by_id method must be implemented by subclass."
)
raise NotImplementedError("delete method must be implemented by subclass.")
async def aadd_texts(
self,

View File

@ -91,8 +91,9 @@ class Cassandra(VectorStore):
def delete_by_document_id(self, document_id: str) -> None:
return self.table.delete(document_id)
def delete(self, ids: List[str]) -> Optional[bool]:
"""Delete by vector ID.
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
"""Delete by vector IDs.
Args:
ids: List of ids to delete.
@ -101,6 +102,10 @@ class Cassandra(VectorStore):
Optional[bool]: True if deletion is successful,
False otherwise, None if not implemented.
"""
if ids is None:
raise ValueError("No ids provided to delete.")
for document_id in ids:
self.delete_by_document_id(document_id)
return True

View File

@ -470,7 +470,7 @@ class Chroma(VectorStore):
client=client,
)
def delete(self, ids: List[str]) -> None:
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
"""Delete by vector IDs.
Args:

View File

@ -744,30 +744,23 @@ class DeepLake(VectorStore):
)
return deeplake_dataset
def delete(
self,
ids: Any[List[str], None] = None,
filter: Any[Dict[str, str], None] = None,
delete_all: Any[bool, None] = None,
) -> bool:
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> bool:
"""Delete the entities in the dataset.
Args:
ids (Optional[List[str]], optional): The document_ids to delete.
Defaults to None.
filter (Optional[Dict[str, str]], optional): The filter to delete by.
Defaults to None.
delete_all (Optional[bool], optional): Whether to drop the dataset.
Defaults to None.
**kwargs: Other keyword arguments that subclasses might use.
- filter (Optional[Dict[str, str]], optional): The filter to delete by.
- delete_all (Optional[bool], optional): Whether to drop the dataset.
Returns:
bool: Whether the delete operation was successful.
"""
self.vectorstore.delete(
ids=ids,
filter=filter,
delete_all=delete_all,
)
filter = kwargs.get("filter")
delete_all = kwargs.get("delete_all")
self.vectorstore.delete(ids=ids, filter=filter, delete_all=delete_all)
return True

View File

@ -317,13 +317,16 @@ class ElasticVectorSearch(VectorStore, ABC):
)
return response
def delete(self, ids: List[str]) -> None:
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
"""Delete by vector IDs.
Args:
ids: List of ids to delete.
"""
if ids is None:
raise ValueError("No ids provided to delete.")
# TODO: Check if this can be done in bulk
for id in ids:
self.client.delete(index=self.index_name, id=id)

View File

@ -354,16 +354,33 @@ class Pinecone(VectorStore):
pinecone.Index(index_name), embedding.embed_query, text_key, namespace
)
def delete(self, ids: List[str], namespace: Optional[str] = None) -> None:
"""Delete by vector IDs.
def delete(
self,
ids: Optional[List[str]] = None,
delete_all: Optional[bool] = None,
namespace: Optional[str] = None,
filter: Optional[dict] = None,
**kwargs: Any,
) -> None:
"""Delete by vector IDs or filter.
Args:
ids: List of ids to delete.
filter: Dictionary of conditions to filter vectors to delete.
"""
# This is the maximum number of IDs that can be deleted
if namespace is None:
namespace = self._namespace
if delete_all:
self._index.delete(delete_all=True, namespace=namespace, **kwargs)
elif ids is not None:
chunk_size = 1000
for i in range(0, len(ids), chunk_size):
chunk = ids[i : i + chunk_size]
self._index.delete(ids=chunk, namespace=namespace)
self._index.delete(ids=chunk, namespace=namespace, **kwargs)
elif filter is not None:
self._index.delete(filter=filter, namespace=namespace, **kwargs)
else:
raise ValueError("Either ids, delete_all, or filter must be provided.")
return None

View File

@ -469,7 +469,7 @@ class Redis(VectorStore):
@staticmethod
def delete(
ids: List[str],
ids: Optional[List[str]] = None,
**kwargs: Any,
) -> bool:
"""

View File

@ -346,12 +346,16 @@ class SupabaseVectorStore(VectorStore):
)
return docs
def delete(self, ids: List[str]) -> None:
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
"""Delete by vector IDs.
Args:
ids: List of ids to delete.
"""
if ids is None:
raise ValueError("No ids provided to delete.")
rows: List[dict[str, Any]] = [
{
"id": id,

View File

@ -470,13 +470,16 @@ class Weaviate(VectorStore):
by_text=by_text,
)
def delete(self, ids: List[str]) -> None:
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
"""Delete by vector IDs.
Args:
ids: List of ids to delete.
"""
if ids is None:
raise ValueError("No ids provided to delete.")
# TODO: Check if this can be done in bulk
for id in ids:
self._client.data_object.delete(uuid=id)