From d1bfb70bc42cdfb66f48238c51aee8c2beb09f8c Mon Sep 17 00:00:00 2001 From: Chad Norvell Date: Sun, 7 Jan 2024 08:33:21 -0800 Subject: [PATCH] community: Allow deleting by ID and collection in `pgvector` (#15627) - **Description:** The `delete_collection` method deletes an entire collection regardless of custom ID. The `delete` method deletes everything with the provided custom IDs regardless of collection. It can be useful to restrict deletion to both the collection and a set of custom IDs. This change adds support for that by allowing you to optionally specify that `delete` should be restricted to the collection defined on the `PGVector` instance. --- .../vectorstores/pgvector.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/pgvector.py b/libs/community/langchain_community/vectorstores/pgvector.py index 8e3186b6b8..e57fb16358 100644 --- a/libs/community/langchain_community/vectorstores/pgvector.py +++ b/libs/community/langchain_community/vectorstores/pgvector.py @@ -281,12 +281,14 @@ class PGVector(VectorStore): def delete( self, ids: Optional[List[str]] = None, + collection_only: bool = False, **kwargs: Any, ) -> None: """Delete vectors by ids or uuids. Args: ids: List of ids to delete. + collection_only: Only delete ids in the collection. """ with Session(self._bind) as session: if ids is not None: @@ -294,9 +296,20 @@ class PGVector(VectorStore): "Trying to delete vectors by ids (represented by the model " "using the custom ids field)" ) - stmt = delete(self.EmbeddingStore).where( - self.EmbeddingStore.custom_id.in_(ids) - ) + + stmt = delete(self.EmbeddingStore) + + if collection_only: + collection = self.get_collection(session) + if not collection: + self.logger.warning("Collection not found") + return + + stmt = stmt.where( + self.EmbeddingStore.collection_id == collection.uuid + ) + + stmt = stmt.where(self.EmbeddingStore.custom_id.in_(ids)) session.execute(stmt) session.commit()