Parent Doc Retriever (#9214)

2 things:
- Implement the private method rather than the public one so callbacks
are handled properly
- Add search_kwargs (Open to not adding this if we are trying to
deprecate this UX but seems like as a user i'd assume similar args to
the vector store retriever. In fact some may assume this implements the
same interface but I'm not dealing with that here)
-
pull/9230/head
William FH 1 year ago committed by GitHub
parent 17ae2998e7
commit 7124f2ebfa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,7 +1,9 @@
import uuid
from typing import Any, Dict, List, Optional
from typing import List, Optional
from langchain.callbacks.base import Callbacks
from pydantic import Field
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema.document import Document
from langchain.schema.retriever import BaseRetriever
from langchain.schema.storage import BaseStore
@ -71,17 +73,20 @@ class ParentDocumentRetriever(BaseRetriever):
parent_splitter: Optional[TextSplitter] = None
"""The text splitter to use to create parent documents.
If none, then the parent documents will be the raw documents passed in."""
search_kwargs: dict = Field(default_factory=dict)
"""Keyword arguments to pass to the search function."""
def get_relevant_documents(
self,
query: str,
*,
callbacks: Callbacks = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
sub_docs = self.vectorstore.similarity_search(query)
"""Get documents relevant to a query.
Args:
query: String to find relevant documents for
run_manager: The callbacks handler to use
Returns:
List of relevant documents
"""
sub_docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
# We do this to maintain the order of the ids that are returned
ids = []
for d in sub_docs:

Loading…
Cancel
Save