community: Allow deleting by ID and collection in pgvector (#15627)

- **Description:** The `delete_collection` method deletes an entire
collection regardless of custom ID. The `delete` method deletes
everything with the provided custom IDs regardless of collection. It can
be useful to restrict deletion to both the collection and a set of
custom IDs. This change adds support for that by allowing you to
optionally specify that `delete` should be restricted to the collection
defined on the `PGVector` instance.
This commit is contained in:
Chad Norvell 2024-01-07 08:33:21 -08:00 committed by GitHub
parent f6226d464e
commit d1bfb70bc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -281,12 +281,14 @@ class PGVector(VectorStore):
def delete( def delete(
self, self,
ids: Optional[List[str]] = None, ids: Optional[List[str]] = None,
collection_only: bool = False,
**kwargs: Any, **kwargs: Any,
) -> None: ) -> None:
"""Delete vectors by ids or uuids. """Delete vectors by ids or uuids.
Args: Args:
ids: List of ids to delete. ids: List of ids to delete.
collection_only: Only delete ids in the collection.
""" """
with Session(self._bind) as session: with Session(self._bind) as session:
if ids is not None: if ids is not None:
@ -294,9 +296,20 @@ class PGVector(VectorStore):
"Trying to delete vectors by ids (represented by the model " "Trying to delete vectors by ids (represented by the model "
"using the custom ids field)" "using the custom ids field)"
) )
stmt = delete(self.EmbeddingStore).where(
self.EmbeddingStore.custom_id.in_(ids) stmt = delete(self.EmbeddingStore)
if collection_only:
collection = self.get_collection(session)
if not collection:
self.logger.warning("Collection not found")
return
stmt = stmt.where(
self.EmbeddingStore.collection_id == collection.uuid
) )
stmt = stmt.where(self.EmbeddingStore.custom_id.in_(ids))
session.execute(stmt) session.execute(stmt)
session.commit() session.commit()