📖 docstrings `retrievers` consistency (#9422)

📜 
- updated the top-level descriptions to a consistent format;
- changed the format of several 100% internal functions from "name" to
"_name". So, these functions are not shown in the Top-level API
Reference page (with lists of classes/functions)
pull/9454/head
Leonid Ganeline 9 months ago committed by GitHub
parent 9417961b17
commit a3dd4dcadf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -6,8 +6,7 @@ from langchain.utilities.arxiv import ArxivAPIWrapper
class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
"""
Retriever for Arxiv.
"""`Arxiv` retriever.
It wraps load() to get_relevant_documents().
It uses all ArxivAPIWrapper arguments without any change.

@ -1,5 +1,3 @@
"""Retriever for the Azure Cognitive Search service."""
from __future__ import annotations
import json
@ -18,7 +16,7 @@ from langchain.utils import get_from_dict_or_env
class AzureCognitiveSearchRetriever(BaseRetriever):
"""Retriever for the Azure Cognitive Search service."""
"""`Azure Cognitive Search` service retriever."""
service_name: str = ""
"""Name of Azure Cognitive Search service"""

@ -1,8 +1,3 @@
"""
BM25 Retriever without elastic search
"""
from __future__ import annotations
from typing import Any, Callable, Dict, Iterable, List, Optional
@ -16,7 +11,7 @@ def default_preprocessing_func(text: str) -> List[str]:
class BM25Retriever(BaseRetriever):
"""BM25 Retriever without elastic search."""
"""`BM25` retriever without Elasticsearch."""
vectorizer: Any
""" BM25 vectorizer."""

@ -11,7 +11,7 @@ from langchain.schema import BaseRetriever, Document
class ChaindeskRetriever(BaseRetriever):
"""Retriever for the Chaindesk API."""
"""`Chaindesk API` retriever."""
datastore_url: str
top_k: Optional[int]

@ -13,7 +13,7 @@ from langchain.schema import BaseRetriever, Document
class ChatGPTPluginRetriever(BaseRetriever):
"""Retrieves documents from a ChatGPT plugin."""
"""`ChatGPT plugin` retriever."""
url: str
"""URL of the ChatGPT plugin."""

@ -11,7 +11,7 @@ from langchain.schema import BaseRetriever, Document
class DataberryRetriever(BaseRetriever):
"""Retriever for the Databerry API."""
"""`Databerry API` retriever."""
datastore_url: str
top_k: Optional[int]

@ -17,10 +17,9 @@ class SearchType(str, Enum):
class DocArrayRetriever(BaseRetriever):
"""
Retriever for DocArray Document Indices.
"""`DocArray Document Indices` retriever.
Currently, supports 5 backends:
Currently, it supports 5 backends:
InMemoryExactNNIndex, HnswDocumentIndex, QdrantDocumentIndex,
ElasticDocIndex, and WeaviateDocumentIndex.

@ -1,4 +1,3 @@
"""Interface for retrieved document compressors."""
from abc import ABC, abstractmethod
from inspect import signature
from typing import List, Optional, Sequence, Union
@ -9,7 +8,7 @@ from langchain.schema import BaseDocumentTransformer, Document
class BaseDocumentCompressor(BaseModel, ABC):
"""Base abstraction interface for document compression."""
"""Base class for document compressors."""
@abstractmethod
def compress_documents(
@ -31,7 +30,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
class DocumentCompressorPipeline(BaseDocumentCompressor):
"""Document compressor that uses a pipeline of transformers."""
"""Document compressor that uses a pipeline of Transformers."""
transformers: List[Union[BaseDocumentTransformer, BaseDocumentCompressor]]
"""List of document filters that are chained together and run in sequence."""

@ -42,7 +42,7 @@ def _get_default_chain_prompt() -> PromptTemplate:
class LLMChainExtractor(BaseDocumentCompressor):
"""DocumentCompressor that uses an LLM chain to extract
"""Document compressor that uses an LLM chain to extract
the relevant parts of documents."""
llm_chain: LLMChain

@ -20,7 +20,7 @@ else:
class CohereRerank(BaseDocumentCompressor):
"""DocumentCompressor that uses Cohere's rerank API to compress documents."""
"""Document compressor that uses `Cohere Rerank API`."""
client: Client
"""Cohere client to use for compressing documents."""

@ -11,7 +11,7 @@ from langchain.schema import BaseRetriever
class ElasticSearchBM25Retriever(BaseRetriever):
"""Retriever for the Elasticsearch using BM25 as a retrieval method.
"""`Elasticsearch` retriever that uses `BM25`.
To connect to an Elasticsearch instance that requires login credentials,
including Elastic Cloud, use the Elasticsearch URL format

@ -13,8 +13,9 @@ from langchain.schema import BaseRetriever, Document
class EnsembleRetriever(BaseRetriever):
"""
This class ensemble the results of multiple retrievers by using rank fusion.
"""Retriever that ensembles the multiple retrievers.
It uses a rank fusion.
Args:
retrievers: A list of retrievers to ensemble.

@ -17,11 +17,10 @@ if TYPE_CHECKING:
class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
"""Retriever for the Google Cloud Enterprise Search Service API.
For the detailed explanation of the Enterprise Search concepts
and configuration parameters refer to the product documentation.
"""`Google Cloud Enterprise Search API` retriever.
For a detailed explanation of the Enterprise Search concepts
and configuration parameters, refer to the product documentation.
https://cloud.google.com/generative-ai-app-builder/docs/enterprise-search-introduction
"""

@ -9,7 +9,7 @@ from langchain.schema import BaseRetriever
def clean_excerpt(excerpt: str) -> str:
"""Cleans an excerpt from Kendra.
"""Clean an excerpt from Kendra.
Args:
excerpt: The excerpt to clean.
@ -25,7 +25,7 @@ def clean_excerpt(excerpt: str) -> str:
def combined_text(item: "ResultItem") -> str:
"""Combines a ResultItem title and excerpt into a single string.
"""Combine a ResultItem title and excerpt into a single string.
Args:
item: the ResultItem of a Kendra search.
@ -45,15 +45,15 @@ def combined_text(item: "ResultItem") -> str:
DocumentAttributeValueType = Union[str, int, List[str], None]
"""Possible types of a DocumentAttributeValue. Dates are also represented as str."""
"""Possible types of a DocumentAttributeValue.
Dates are also represented as str.
"""
# Unexpected keyword argument "extra" for "__init_subclass__" of "object"
class Highlight(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
"""
Represents the information that can be
used to highlight key words in the excerpt.
"""
"""Information that highlights the key words in the excerpt."""
BeginOffset: int
"""The zero-based location in the excerpt where the highlight starts."""
@ -79,7 +79,7 @@ class TextWithHighLights(BaseModel, extra=Extra.allow): # type: ignore[call-arg
class AdditionalResultAttributeValue( # type: ignore[call-arg]
BaseModel, extra=Extra.allow
):
"""The value of an additional result attribute."""
"""Value of an additional result attribute."""
TextWithHighlightsValue: TextWithHighLights
"""The text with highlights value."""
@ -87,7 +87,7 @@ class AdditionalResultAttributeValue( # type: ignore[call-arg]
# Unexpected keyword argument "extra" for "__init_subclass__" of "object"
class AdditionalResultAttribute(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
"""An additional result attribute."""
"""Additional result attribute."""
Key: str
"""The key of the attribute."""
@ -102,7 +102,7 @@ class AdditionalResultAttribute(BaseModel, extra=Extra.allow): # type: ignore[c
# Unexpected keyword argument "extra" for "__init_subclass__" of "object"
class DocumentAttributeValue(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
"""The value of a document attribute."""
"""Value of a document attribute."""
DateValue: Optional[str]
"""The date expressed as an ISO 8601 string."""
@ -133,7 +133,7 @@ class DocumentAttributeValue(BaseModel, extra=Extra.allow): # type: ignore[call
# Unexpected keyword argument "extra" for "__init_subclass__" of "object"
class DocumentAttribute(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
"""A document attribute."""
"""Document attribute."""
Key: str
"""The key of the attribute."""
@ -143,7 +143,7 @@ class DocumentAttribute(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
# Unexpected keyword argument "extra" for "__init_subclass__" of "object"
class ResultItem(BaseModel, ABC, extra=Extra.allow): # type: ignore[call-arg]
"""Abstract class that represents a result item."""
"""Base class of a result item."""
Id: Optional[str]
"""The ID of the relevant result item."""
@ -199,7 +199,7 @@ class ResultItem(BaseModel, ABC, extra=Extra.allow): # type: ignore[call-arg]
class QueryResultItem(ResultItem):
"""A Query API result item."""
"""Query API result item."""
DocumentTitle: TextWithHighLights
"""The document title."""
@ -248,7 +248,7 @@ class QueryResultItem(ResultItem):
class RetrieveResultItem(ResultItem):
"""A Retrieve API result item."""
"""Retrieve API result item."""
DocumentTitle: Optional[str]
"""The document title."""
@ -264,11 +264,12 @@ class RetrieveResultItem(ResultItem):
# Unexpected keyword argument "extra" for "__init_subclass__" of "object"
class QueryResult(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
"""
Represents an Amazon Kendra Query API search result, which is composed of:
"""`Amazon Kendra Query API` search result.
It is composed of:
* Relevant suggested answers: either a text excerpt or table excerpt.
* Matching FAQs or questions-answer from your FAQ file.
* Documents including an excerpt of each document with the its title.
* Documents including an excerpt of each document with its title.
"""
ResultItems: List[QueryResultItem]
@ -277,8 +278,9 @@ class QueryResult(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
# Unexpected keyword argument "extra" for "__init_subclass__" of "object"
class RetrieveResult(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
"""
Represents an Amazon Kendra Retrieve API search result, which is composed of:
"""`Amazon Kendra Retrieve API` search result.
It is composed of:
* relevant passages or text excerpts given an input query.
"""
@ -289,7 +291,7 @@ class RetrieveResult(BaseModel, extra=Extra.allow): # type: ignore[call-arg]
class AmazonKendraRetriever(BaseRetriever):
"""Retriever for the Amazon Kendra Index.
"""`Amazon Kendra Index` retriever.
Args:
index_id: Kendra index id

@ -30,7 +30,7 @@ def create_index(contexts: List[str], embeddings: Embeddings) -> np.ndarray:
class KNNRetriever(BaseRetriever):
"""KNN Retriever."""
"""`KNN` retriever."""
embeddings: Embeddings
"""Embeddings model to use."""

@ -6,7 +6,9 @@ from langchain.schema import BaseRetriever, Document
class LlamaIndexRetriever(BaseRetriever):
"""Retriever for the question-answering with sources over
"""`LlamaIndex` retriever.
It is used for the question-answering with sources over
an LlamaIndex data structure."""
index: Any
@ -40,7 +42,9 @@ class LlamaIndexRetriever(BaseRetriever):
class LlamaIndexGraphRetriever(BaseRetriever):
"""Retriever for question-answering with sources over an LlamaIndex
"""`LlamaIndex` graph data structure retriever.
It is used for question-answering with sources over an LlamaIndex
graph data structure."""
graph: Any

@ -6,7 +6,7 @@ from langchain.schema import BaseRetriever, Document
class MetalRetriever(BaseRetriever):
"""Retriever that uses the Metal API."""
"""`Metal API` retriever."""
client: Any
"""The Metal client to use."""

@ -12,7 +12,7 @@ from langchain.vectorstores.milvus import Milvus
class MilvusRetriever(BaseRetriever):
"""Retriever that uses the Milvus API."""
"""`Milvus API` retriever."""
embedding_function: Embeddings
collection_name: str = "LangChainCollection"

@ -44,9 +44,10 @@ DEFAULT_QUERY_PROMPT = PromptTemplate(
class MultiQueryRetriever(BaseRetriever):
"""Given a query, use an LLM to write a set of queries.
"""Given a user query, use an LLM to write a set of queries.
Retrieve docs for each query. Rake the unique union of all retrieved docs."""
Retrieve docs for each query. Rake the unique union of all retrieved docs.
"""
retriever: BaseRetriever
llm_chain: LLMChain

@ -11,7 +11,7 @@ from langchain.vectorstores.base import VectorStore
class ParentDocumentRetriever(BaseRetriever):
"""Fetches small chunks, then fetches their parent documents.
"""Retrieve small chunks then retrieve their parent documents.
When splitting documents for retrieval, there are often conflicting desires:

@ -29,13 +29,13 @@ def create_index(
ids: Optional[List[str]] = None,
metadatas: Optional[List[dict]] = None,
) -> None:
"""
Create a Pinecone index from a list of contexts.
Modifies the index argument in-place.
"""Create an index from a list of contexts.
It modifies the index argument in-place!
Args:
contexts: List of contexts to embed.
index: Pinecone index to use.
index: Index to use.
embeddings: Embeddings model to use.
sparse_encoder: Sparse encoder to use.
ids: List of ids to use for the documents.
@ -95,7 +95,7 @@ def create_index(
class PineconeHybridSearchRetriever(BaseRetriever):
"""Pinecone Hybrid Search Retriever."""
"""`Pinecone Hybrid Search` retriever."""
embeddings: Embeddings
"""Embeddings model to use."""

@ -6,7 +6,7 @@ from langchain.utilities.pubmed import PubMedAPIWrapper
class PubMedRetriever(BaseRetriever, PubMedAPIWrapper):
"""Retriever for PubMed API.
"""`PubMed API` retriever.
It wraps load() to get_relevant_documents().
It uses all PubMedAPIWrapper arguments without any change.

@ -23,9 +23,8 @@ DEFAULT_QUERY_PROMPT = PromptTemplate.from_template(DEFAULT_TEMPLATE)
class RePhraseQueryRetriever(BaseRetriever):
"""Given a user query, use an LLM to re-phrase it.
Then, retrieve docs for re-phrased query."""
"""Given a query, use an LLM to re-phrase it.
Then, retrieve docs for the re-phrased query."""
retriever: BaseRetriever
llm_chain: LLMChain

@ -11,7 +11,7 @@ from langchain.schema import BaseRetriever, Document
class RemoteLangChainRetriever(BaseRetriever):
"""Retriever for remote LangChain API."""
"""`LangChain API` retriever."""
url: str
"""URL of the remote LangChain API."""

@ -11,7 +11,7 @@ from langchain.chains.query_constructor.ir import (
class ChromaTranslator(Visitor):
"""Translate internal query language elements to valid filters."""
"""Translate `Chroma` internal query language elements to valid filters."""
allowed_operators = [Operator.AND, Operator.OR]
"""Subset of allowed logical operators."""

@ -35,7 +35,7 @@ def can_cast_to_float(string: str) -> bool:
class DeepLakeTranslator(Visitor):
"""Logic for converting internal query language elements to valid filters."""
"""Translate `DeepLake` internal query language elements to valid filters."""
allowed_operators = [Operator.AND, Operator.OR]
"""Subset of allowed logical operators."""

@ -11,7 +11,7 @@ from langchain.chains.query_constructor.ir import (
class ElasticsearchTranslator(Visitor):
"""Translate the internal query language elements to valid filters."""
"""Translate `Elasticsearch` internal query language elements to valid filters."""
allowed_comparators = [
Comparator.EQ,

@ -12,7 +12,7 @@ from langchain.chains.query_constructor.ir import (
)
def DEFAULT_COMPOSER(op_name: str) -> Callable:
def _DEFAULT_COMPOSER(op_name: str) -> Callable:
"""
Default composer for logical operators.
@ -30,9 +30,10 @@ def DEFAULT_COMPOSER(op_name: str) -> Callable:
return f
def FUNCTION_COMPOSER(op_name: str) -> Callable:
def _FUNCTION_COMPOSER(op_name: str) -> Callable:
"""
Composer for functions.
Args:
op_name: Name of the function.
@ -48,7 +49,7 @@ def FUNCTION_COMPOSER(op_name: str) -> Callable:
class MyScaleTranslator(Visitor):
"""Translate internal query language elements to valid filters."""
"""Translate `MyScale` internal query language elements to valid filters."""
allowed_operators = [Operator.AND, Operator.OR, Operator.NOT]
"""Subset of allowed logical operators."""
@ -64,16 +65,16 @@ class MyScaleTranslator(Visitor):
]
map_dict = {
Operator.AND: DEFAULT_COMPOSER("AND"),
Operator.OR: DEFAULT_COMPOSER("OR"),
Operator.NOT: DEFAULT_COMPOSER("NOT"),
Comparator.EQ: DEFAULT_COMPOSER("="),
Comparator.GT: DEFAULT_COMPOSER(">"),
Comparator.GTE: DEFAULT_COMPOSER(">="),
Comparator.LT: DEFAULT_COMPOSER("<"),
Comparator.LTE: DEFAULT_COMPOSER("<="),
Comparator.CONTAIN: FUNCTION_COMPOSER("has"),
Comparator.LIKE: DEFAULT_COMPOSER("ILIKE"),
Operator.AND: _DEFAULT_COMPOSER("AND"),
Operator.OR: _DEFAULT_COMPOSER("OR"),
Operator.NOT: _DEFAULT_COMPOSER("NOT"),
Comparator.EQ: _DEFAULT_COMPOSER("="),
Comparator.GT: _DEFAULT_COMPOSER(">"),
Comparator.GTE: _DEFAULT_COMPOSER(">="),
Comparator.LT: _DEFAULT_COMPOSER("<"),
Comparator.LTE: _DEFAULT_COMPOSER("<="),
Comparator.CONTAIN: _FUNCTION_COMPOSER("has"),
Comparator.LIKE: _DEFAULT_COMPOSER("ILIKE"),
}
def __init__(self, metadata_key: str = "metadata") -> None:

@ -11,7 +11,7 @@ from langchain.chains.query_constructor.ir import (
class PineconeTranslator(Visitor):
"""Translate the internal query language elements to valid filters."""
"""Translate `Pinecone` internal query language elements to valid filters."""
allowed_comparators = (
Comparator.EQ,

@ -16,7 +16,7 @@ if TYPE_CHECKING:
class QdrantTranslator(Visitor):
"""Translate the internal query language elements to valid filters."""
"""Translate `Qdrant` internal query language elements to valid filters."""
allowed_comparators = (
Comparator.EQ,

@ -11,7 +11,7 @@ from langchain.chains.query_constructor.ir import (
class WeaviateTranslator(Visitor):
"""Translate the internal query language elements to valid filters."""
"""Translate `Weaviate` internal query language elements to valid filters."""
allowed_operators = [Operator.AND, Operator.OR]
"""Subset of allowed logical operators."""

@ -26,7 +26,7 @@ def create_index(contexts: List[str], embeddings: Embeddings) -> np.ndarray:
class SVMRetriever(BaseRetriever):
"""SVM Retriever.
"""`SVM` retriever.
Largely based on
https://github.com/karpathy/randomfun/blob/master/knn_vs_svm.ipynb

@ -9,7 +9,7 @@ from langchain.schema import BaseRetriever, Document
class TFIDFRetriever(BaseRetriever):
"""TF-IDF Retriever.
"""`TF-IDF` retriever.
Largely based on
https://github.com/asvskartheek/Text-Retrieval/blob/master/TF-IDF%20Search%20Engine%20(SKLEARN).ipynb

@ -9,7 +9,7 @@ from langchain.vectorstores.base import VectorStore
def _get_hours_passed(time: datetime.datetime, ref_time: datetime.datetime) -> float:
"""Get the hours passed between two datetime objects."""
"""Get the hours passed between two datetimes."""
return (time - ref_time).total_seconds() / 3600

@ -11,7 +11,7 @@ if TYPE_CHECKING:
class VespaRetriever(BaseRetriever):
"""Retriever that uses Vespa."""
"""`Vespa` retriever."""
app: Vespa
"""Vespa application to query."""

@ -10,7 +10,11 @@ from langchain.schema import BaseRetriever
class WeaviateHybridSearchRetriever(BaseRetriever):
"""Retriever for the Weaviate's hybrid search."""
"""`Weaviate hybrid search` retriever.
See the documentation:
https://weaviate.io/blog/hybrid-search-explained
"""
client: Any
"""keyword arguments to pass to the Weaviate client."""

@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
class SearchQueries(BaseModel):
"""Search queries to run to research for the user's goal."""
"""Search queries to research for the user's goal."""
queries: List[str] = Field(
..., description="List of search queries to look up on Google"
@ -66,7 +66,7 @@ class QuestionListOutputParser(PydanticOutputParser):
class WebResearchRetriever(BaseRetriever):
"""Retriever for web research based on the Google Search API."""
"""`Google Search API` retriever."""
# Inputs
vectorstore: VectorStore = Field(

@ -6,7 +6,7 @@ from langchain.utilities.wikipedia import WikipediaAPIWrapper
class WikipediaRetriever(BaseRetriever, WikipediaAPIWrapper):
"""Retriever for Wikipedia API.
"""`Wikipedia API` retriever.
It wraps load() to get_relevant_documents().
It uses all WikipediaAPIWrapper arguments without any change.

@ -14,7 +14,7 @@ if TYPE_CHECKING:
class ZepRetriever(BaseRetriever):
"""Retriever for the Zep long-term memory store.
"""`Zep` long-term memory store retriever.
Search your user's long-term chat history with Zep.

@ -11,7 +11,7 @@ from langchain.vectorstores.zilliz import Zilliz
class ZillizRetriever(BaseRetriever):
"""Retriever for the Zilliz API."""
"""`Zilliz API` retriever."""
embedding_function: Embeddings
"""The underlying embedding function from which documents will be retrieved."""

Loading…
Cancel
Save