Parent Doc Retriever (#9214)

2 things: - Implement the private method rather than the public one so callbacks are handled properly - Add search_kwargs (Open to not adding this if we are trying to deprecate this UX but seems like as a user i'd assume similar args to the vector store retriever. In fact some may assume this implements the same interface but I'm not dealing with that here) -
1 year ago · 7124f2ebfa
parent 17ae2998e7
commit 7124f2ebfa
1 changed files with 16 additions and 11 deletions
--- a/libs/langchain/langchain/retrievers/parent_document_retriever.py
+++ b/libs/langchain/langchain/retrievers/parent_document_retriever.py
@ -1,7 +1,9 @@
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import List, Optional

-from langchain.callbacks.base import Callbacks
+from pydantic import Field
+
+from langchain.callbacks.manager import CallbackManagerForRetrieverRun
 from langchain.schema.document import Document
 from langchain.schema.retriever import BaseRetriever
 from langchain.schema.storage import BaseStore
@ -71,17 +73,20 @@ class ParentDocumentRetriever(BaseRetriever):
    parent_splitter: Optional[TextSplitter] = None
    """The text splitter to use to create parent documents.
    If none, then the parent documents will be the raw documents passed in."""
+    search_kwargs: dict = Field(default_factory=dict)
+    """Keyword arguments to pass to the search function."""

-    def get_relevant_documents(
-        self,
-        query: str,
-        *,
-        callbacks: Callbacks = None,
-        tags: Optional[List[str]] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-        **kwargs: Any,
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
-        sub_docs = self.vectorstore.similarity_search(query)
+        """Get documents relevant to a query.
+        Args:
+            query: String to find relevant documents for
+            run_manager: The callbacks handler to use
+        Returns:
+            List of relevant documents
+        """
+        sub_docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
        # We do this to maintain the order of the ids that are returned
        ids = []
        for d in sub_docs: