mirror of
https://github.com/hwchase17/langchain
synced 2024-11-16 06:13:16 +00:00
core[patch]: Update documentation for base retriever (#20345)
Updating in code documentation for base retriever to direct folks toward the .invoke and .ainvoke methods + explain how to implement
This commit is contained in:
parent
d2f4153fe6
commit
2900720cd3
@ -51,12 +51,48 @@ RetrieverOutputLike = Runnable[Any, RetrieverOutput]
|
||||
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
"""Abstract base class for a Document retrieval system.
|
||||
|
||||
A retrieval system is defined as something that can take string queries and return
|
||||
the most 'relevant' Documents from some source.
|
||||
|
||||
Example:
|
||||
A retrieval system is defined as something that can take string queries and return
|
||||
the most 'relevant' Documents from some source.
|
||||
|
||||
Usage:
|
||||
|
||||
A retriever follows the standard Runnable interface, and should be used
|
||||
via the standard runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`.
|
||||
|
||||
Implementation:
|
||||
|
||||
When implementing a custom retriever, the class should implement
|
||||
the `_get_relevant_documents` method to define the logic for retrieving documents.
|
||||
|
||||
Optionally, an async native implementations can be provided by overriding the
|
||||
`_aget_relevant_documents` method.
|
||||
|
||||
Example: A retriever that returns the first 5 documents from a list of documents
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core import Document, BaseRetriever
|
||||
from typing import List
|
||||
|
||||
class SimpleRetriever(BaseRetriever):
|
||||
docs: List[Document]
|
||||
k: int = 5
|
||||
|
||||
def _get_relevant_documents(self, query: str) -> List[Document]:
|
||||
\"\"\"Return the first k documents from the list of documents\"\"\"
|
||||
return self.docs[:self.k]
|
||||
|
||||
async def _aget_relevant_documents(self, query: str) -> List[Document]:
|
||||
\"\"\"(Optional) async native implementation.\"\"\"
|
||||
return self.docs[:self.k]
|
||||
|
||||
Example: A simple retriever based on a scitkit learn vectorizer
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
class TFIDFRetriever(BaseRetriever, BaseModel):
|
||||
vectorizer: Any
|
||||
docs: List[Document]
|
||||
@ -66,9 +102,7 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
def _get_relevant_documents(self, query: str) -> List[Document]:
|
||||
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
||||
query_vec = self.vectorizer.transform([query])
|
||||
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
||||
@ -137,6 +171,24 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
def invoke(
|
||||
self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Invoke the retriever to get relevant documents.
|
||||
|
||||
Main entry point for synchronous retriever invocations.
|
||||
|
||||
Args:
|
||||
input: The query string
|
||||
config: Configuration for the retriever
|
||||
**kwargs: Additional arguments to pass to the retriever
|
||||
|
||||
Returns:
|
||||
List of relevant documents
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
retriever.invoke("query")
|
||||
"""
|
||||
config = ensure_config(config)
|
||||
return self.get_relevant_documents(
|
||||
input,
|
||||
@ -153,6 +205,24 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
config: Optional[RunnableConfig] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Asynchronously invoke the retriever to get relevant documents.
|
||||
|
||||
Main entry point for asynchronous retriever invocations.
|
||||
|
||||
Args:
|
||||
input: The query string
|
||||
config: Configuration for the retriever
|
||||
**kwargs: Additional arguments to pass to the retriever
|
||||
|
||||
Returns:
|
||||
List of relevant documents
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
await retriever.ainvoke("query")
|
||||
"""
|
||||
config = ensure_config(config)
|
||||
return await self.aget_relevant_documents(
|
||||
input,
|
||||
@ -203,6 +273,10 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Retrieve documents relevant to a query.
|
||||
|
||||
Users should favor using `.invoke` or `.batch` rather than
|
||||
`get_relevant_documents directly`.
|
||||
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
callbacks: Callback manager or list of callbacks
|
||||
@ -212,6 +286,8 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
metadata: Optional metadata associated with the retriever. Defaults to None
|
||||
This metadata will be associated with each call to this retriever,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
run_name: Optional name for the run.
|
||||
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
@ -260,6 +336,10 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Asynchronously get documents relevant to a query.
|
||||
|
||||
Users should favor using `.ainvoke` or `.abatch` rather than
|
||||
`aget_relevant_documents directly`.
|
||||
|
||||
Args:
|
||||
query: string to find relevant documents for
|
||||
callbacks: Callback manager or list of callbacks
|
||||
@ -269,6 +349,8 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
metadata: Optional metadata associated with the retriever. Defaults to None
|
||||
This metadata will be associated with each call to this retriever,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
run_name: Optional name for the run.
|
||||
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user