Parent Doc Retriever (#9214)

2 things:
- Implement the private method rather than the public one so callbacks
are handled properly
- Add search_kwargs (Open to not adding this if we are trying to
deprecate this UX but seems like as a user i'd assume similar args to
the vector store retriever. In fact some may assume this implements the
same interface but I'm not dealing with that here)
-
pull/9230/head
William FH 1 year ago committed by GitHub
parent 17ae2998e7
commit 7124f2ebfa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,7 +1,9 @@
import uuid import uuid
from typing import Any, Dict, List, Optional from typing import List, Optional
from langchain.callbacks.base import Callbacks from pydantic import Field
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema.document import Document from langchain.schema.document import Document
from langchain.schema.retriever import BaseRetriever from langchain.schema.retriever import BaseRetriever
from langchain.schema.storage import BaseStore from langchain.schema.storage import BaseStore
@ -71,17 +73,20 @@ class ParentDocumentRetriever(BaseRetriever):
parent_splitter: Optional[TextSplitter] = None parent_splitter: Optional[TextSplitter] = None
"""The text splitter to use to create parent documents. """The text splitter to use to create parent documents.
If none, then the parent documents will be the raw documents passed in.""" If none, then the parent documents will be the raw documents passed in."""
search_kwargs: dict = Field(default_factory=dict)
"""Keyword arguments to pass to the search function."""
def get_relevant_documents( def _get_relevant_documents(
self, self, query: str, *, run_manager: CallbackManagerForRetrieverRun
query: str,
*,
callbacks: Callbacks = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[Document]: ) -> List[Document]:
sub_docs = self.vectorstore.similarity_search(query) """Get documents relevant to a query.
Args:
query: String to find relevant documents for
run_manager: The callbacks handler to use
Returns:
List of relevant documents
"""
sub_docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
# We do this to maintain the order of the ids that are returned # We do this to maintain the order of the ids that are returned
ids = [] ids = []
for d in sub_docs: for d in sub_docs:

Loading…
Cancel
Save