core[patch]: Update documentation for base retriever (#20345)

Updating in code documentation for base retriever to direct folks toward the .invoke and .ainvoke methods + explain how to implement
2024-11-16 06:13:16 +00:00 · 2024-04-11 16:20:14 -04:00 · 2024-04-11 16:20:14 -04:00 · 2900720cd3
commit 2900720cd3
parent d2f4153fe6
1 changed files with 88 additions and 6 deletions
--- a/libs/core/langchain_core/retrievers.py
+++ b/libs/core/langchain_core/retrievers.py
@ -51,12 +51,48 @@ RetrieverOutputLike = Runnable[Any, RetrieverOutput]
 class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
    """Abstract base class for a Document retrieval system.

-    A retrieval system is defined as something that can take string queries and return
-        the most 'relevant' Documents from some source.

-    Example:
+    A retrieval system is defined as something that can take string queries and return
+    the most 'relevant' Documents from some source.
+
+    Usage:
+
+    A retriever follows the standard Runnable interface, and should be used
+    via the standard runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`.
+
+    Implementation:
+
+    When implementing a custom retriever, the class should implement
+    the `_get_relevant_documents` method to define the logic for retrieving documents.
+
+    Optionally, an async native implementations can be provided by overriding the
+    `_aget_relevant_documents` method.
+
+    Example: A retriever that returns the first 5 documents from a list of documents
+
        .. code-block:: python

+            from langchain_core import Document, BaseRetriever
+            from typing import List
+
+            class SimpleRetriever(BaseRetriever):
+                docs: List[Document]
+                k: int = 5
+
+                def _get_relevant_documents(self, query: str) -> List[Document]:
+                    \"\"\"Return the first k documents from the list of documents\"\"\"
+                    return self.docs[:self.k]
+
+                async def _aget_relevant_documents(self, query: str) -> List[Document]:
+                    \"\"\"(Optional) async native implementation.\"\"\"
+                    return self.docs[:self.k]
+
+    Example: A simple retriever based on a scitkit learn vectorizer
+
+        .. code-block:: python
+
+            from sklearn.metrics.pairwise import cosine_similarity
+
            class TFIDFRetriever(BaseRetriever, BaseModel):
                vectorizer: Any
                docs: List[Document]
@ -66,9 +102,7 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
                class Config:
                    arbitrary_types_allowed = True

-                def get_relevant_documents(self, query: str) -> List[Document]:
-                    from sklearn.metrics.pairwise import cosine_similarity
-
+                def _get_relevant_documents(self, query: str) -> List[Document]:
                    # Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
                    query_vec = self.vectorizer.transform([query])
                    # Op -- (n_docs,1) -- Cosine Sim with each doc
@ -137,6 +171,24 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
    def invoke(
        self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any
    ) -> List[Document]:
+        """Invoke the retriever to get relevant documents.
+
+        Main entry point for synchronous retriever invocations.
+
+        Args:
+            input: The query string
+            config: Configuration for the retriever
+            **kwargs: Additional arguments to pass to the retriever
+
+        Returns:
+            List of relevant documents
+
+        Examples:
+
+        .. code-block:: python
+
+            retriever.invoke("query")
+        """
        config = ensure_config(config)
        return self.get_relevant_documents(
            input,
@ -153,6 +205,24 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
        config: Optional[RunnableConfig] = None,
        **kwargs: Any,
    ) -> List[Document]:
+        """Asynchronously invoke the retriever to get relevant documents.
+
+        Main entry point for asynchronous retriever invocations.
+
+        Args:
+            input: The query string
+            config: Configuration for the retriever
+            **kwargs: Additional arguments to pass to the retriever
+
+        Returns:
+            List of relevant documents
+
+        Examples:
+
+        .. code-block:: python
+
+            await retriever.ainvoke("query")
+        """
        config = ensure_config(config)
        return await self.aget_relevant_documents(
            input,
@ -203,6 +273,10 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
        **kwargs: Any,
    ) -> List[Document]:
        """Retrieve documents relevant to a query.
+
+        Users should favor using `.invoke` or `.batch` rather than
+        `get_relevant_documents directly`.
+
        Args:
            query: string to find relevant documents for
            callbacks: Callback manager or list of callbacks
@ -212,6 +286,8 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
            metadata: Optional metadata associated with the retriever. Defaults to None
                This metadata will be associated with each call to this retriever,
                and passed as arguments to the handlers defined in `callbacks`.
+            run_name: Optional name for the run.
+
        Returns:
            List of relevant documents
        """
@ -260,6 +336,10 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
        **kwargs: Any,
    ) -> List[Document]:
        """Asynchronously get documents relevant to a query.
+
+        Users should favor using `.ainvoke` or `.abatch` rather than
+        `aget_relevant_documents directly`.
+
        Args:
            query: string to find relevant documents for
            callbacks: Callback manager or list of callbacks
@ -269,6 +349,8 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
            metadata: Optional metadata associated with the retriever. Defaults to None
                This metadata will be associated with each call to this retriever,
                and passed as arguments to the handlers defined in `callbacks`.
+            run_name: Optional name for the run.
+
        Returns:
            List of relevant documents
        """