langchain: Add aadd_documents to ParentDocumentRetriever (#23969)

- **Description:** Add an async version of `add_documents` to
`ParentDocumentRetriever`
-  **Twitter handle:** @johnkdev

---------

Co-authored-by: John Kelly <j.kelly@mwam.com>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
John Kelly 2024-07-19 18:12:39 +01:00 committed by GitHub
parent f9d64d22e5
commit 5affbada61
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,5 @@
import uuid
from typing import Any, List, Optional, Sequence
from typing import Any, List, Optional, Sequence, Tuple
from langchain_core.documents import Document
from langchain_text_splitters import TextSplitter
@ -69,27 +69,12 @@ class ParentDocumentRetriever(MultiVectorRetriever):
metadata.
"""
def add_documents(
def _split_docs_for_adding(
self,
documents: List[Document],
ids: Optional[List[str]] = None,
add_to_docstore: bool = True,
**kwargs: Any,
) -> None:
"""Adds documents to the docstore and vectorstores.
Args:
documents: List of documents to add
ids: Optional list of ids for documents. If provided should be the same
length as the list of documents. Can be provided if parent documents
are already in the document store and you don't want to re-add
to the docstore. If not provided, random UUIDs will be used as
ids.
add_to_docstore: Boolean of whether to add documents to docstore.
This can be false if and only if `ids` are provided. You may want
to set this to False if the documents are already in the docstore
and you don't want to re-add them.
"""
) -> Tuple[List[Document], List[Tuple[str, Document]]]:
if self.parent_splitter is not None:
documents = self.parent_splitter.split_documents(documents)
if ids is None:
@ -120,6 +105,43 @@ class ParentDocumentRetriever(MultiVectorRetriever):
_doc.metadata[self.id_key] = _id
docs.extend(sub_docs)
full_docs.append((_id, doc))
return docs, full_docs
def add_documents(
self,
documents: List[Document],
ids: Optional[List[str]] = None,
add_to_docstore: bool = True,
**kwargs: Any,
) -> None:
"""Adds documents to the docstore and vectorstores.
Args:
documents: List of documents to add
ids: Optional list of ids for documents. If provided should be the same
length as the list of documents. Can be provided if parent documents
are already in the document store and you don't want to re-add
to the docstore. If not provided, random UUIDs will be used as
ids.
add_to_docstore: Boolean of whether to add documents to docstore.
This can be false if and only if `ids` are provided. You may want
to set this to False if the documents are already in the docstore
and you don't want to re-add them.
"""
docs, full_docs = self._split_docs_for_adding(documents, ids, add_to_docstore)
self.vectorstore.add_documents(docs, **kwargs)
if add_to_docstore:
self.docstore.mset(full_docs)
async def aadd_documents(
self,
documents: List[Document],
ids: Optional[List[str]] = None,
add_to_docstore: bool = True,
**kwargs: Any,
) -> None:
docs, full_docs = self._split_docs_for_adding(documents, ids, add_to_docstore)
await self.vectorstore.aadd_documents(docs, **kwargs)
if add_to_docstore:
await self.docstore.amset(full_docs)