mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
langchain: Add aadd_documents
to ParentDocumentRetriever
(#23969)
- **Description:** Add an async version of `add_documents` to `ParentDocumentRetriever` - **Twitter handle:** @johnkdev --------- Co-authored-by: John Kelly <j.kelly@mwam.com> Co-authored-by: Chester Curme <chester.curme@gmail.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
f9d64d22e5
commit
5affbada61
@ -1,5 +1,5 @@
|
||||
import uuid
|
||||
from typing import Any, List, Optional, Sequence
|
||||
from typing import Any, List, Optional, Sequence, Tuple
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_text_splitters import TextSplitter
|
||||
@ -69,27 +69,12 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
||||
metadata.
|
||||
"""
|
||||
|
||||
def add_documents(
|
||||
def _split_docs_for_adding(
|
||||
self,
|
||||
documents: List[Document],
|
||||
ids: Optional[List[str]] = None,
|
||||
add_to_docstore: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Adds documents to the docstore and vectorstores.
|
||||
|
||||
Args:
|
||||
documents: List of documents to add
|
||||
ids: Optional list of ids for documents. If provided should be the same
|
||||
length as the list of documents. Can be provided if parent documents
|
||||
are already in the document store and you don't want to re-add
|
||||
to the docstore. If not provided, random UUIDs will be used as
|
||||
ids.
|
||||
add_to_docstore: Boolean of whether to add documents to docstore.
|
||||
This can be false if and only if `ids` are provided. You may want
|
||||
to set this to False if the documents are already in the docstore
|
||||
and you don't want to re-add them.
|
||||
"""
|
||||
) -> Tuple[List[Document], List[Tuple[str, Document]]]:
|
||||
if self.parent_splitter is not None:
|
||||
documents = self.parent_splitter.split_documents(documents)
|
||||
if ids is None:
|
||||
@ -120,6 +105,43 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
||||
_doc.metadata[self.id_key] = _id
|
||||
docs.extend(sub_docs)
|
||||
full_docs.append((_id, doc))
|
||||
|
||||
return docs, full_docs
|
||||
|
||||
def add_documents(
|
||||
self,
|
||||
documents: List[Document],
|
||||
ids: Optional[List[str]] = None,
|
||||
add_to_docstore: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Adds documents to the docstore and vectorstores.
|
||||
|
||||
Args:
|
||||
documents: List of documents to add
|
||||
ids: Optional list of ids for documents. If provided should be the same
|
||||
length as the list of documents. Can be provided if parent documents
|
||||
are already in the document store and you don't want to re-add
|
||||
to the docstore. If not provided, random UUIDs will be used as
|
||||
ids.
|
||||
add_to_docstore: Boolean of whether to add documents to docstore.
|
||||
This can be false if and only if `ids` are provided. You may want
|
||||
to set this to False if the documents are already in the docstore
|
||||
and you don't want to re-add them.
|
||||
"""
|
||||
docs, full_docs = self._split_docs_for_adding(documents, ids, add_to_docstore)
|
||||
self.vectorstore.add_documents(docs, **kwargs)
|
||||
if add_to_docstore:
|
||||
self.docstore.mset(full_docs)
|
||||
|
||||
async def aadd_documents(
|
||||
self,
|
||||
documents: List[Document],
|
||||
ids: Optional[List[str]] = None,
|
||||
add_to_docstore: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
docs, full_docs = self._split_docs_for_adding(documents, ids, add_to_docstore)
|
||||
await self.vectorstore.aadd_documents(docs, **kwargs)
|
||||
if add_to_docstore:
|
||||
await self.docstore.amset(full_docs)
|
||||
|
Loading…
Reference in New Issue
Block a user