docstrings for embeddings (#7973)

Added/updated docstrings for the `embeddings` @baskaryan
2024-11-06 03:20:49 +00:00 · 2023-07-20 06:26:44 -07:00 · 2023-07-20 06:26:44 -07:00 · 24b26a922a
commit 24b26a922a
parent 0613ed5b95
30 changed files with 54 additions and 66 deletions
--- a/langchain/embeddings/aleph_alpha.py
+++ b/langchain/embeddings/aleph_alpha.py
@ -7,8 +7,8 @@ from langchain.utils import get_from_dict_or_env


 class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
-    """
-    Wrapper for Aleph Alpha's Asymmetric Embeddings
+    """Aleph Alpha's asymmetric semantic embedding.
+
    AA provides you with an endpoint to embed a document and a query.
    The models were optimized to make the embeddings of documents and
    the query for a document as similar as possible.
@ -30,7 +30,7 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
    """

    client: Any  #: :meta private:
-
+    """Aleph Alpha client."""
    model: Optional[str] = "luminous-base"
    """Model name to use."""
    hosting: Optional[str] = "https://api.aleph-alpha.com"
--- a/langchain/embeddings/base.py
+++ b/langchain/embeddings/base.py
@ -1,4 +1,3 @@
-"""Interface for embedding models."""
 from abc import ABC, abstractmethod
 from typing import List

@ -15,9 +14,9 @@ class Embeddings(ABC):
        """Embed query text."""

    async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
-        """Embed search docs."""
+        """Asynchronous Embed search docs."""
        raise NotImplementedError

    async def aembed_query(self, text: str) -> List[float]:
-        """Embed query text."""
+        """Asynchronous Embed query text."""
        raise NotImplementedError
--- a/langchain/embeddings/bedrock.py
+++ b/langchain/embeddings/bedrock.py
@ -8,7 +8,7 @@ from langchain.embeddings.base import Embeddings


 class BedrockEmbeddings(BaseModel, Embeddings):
-    """Embeddings provider to invoke Bedrock embedding models.
+    """Bedrock embedding models.

    To authenticate, the AWS client uses the following methods to
    automatically load credentials:
@ -39,7 +39,7 @@ class BedrockEmbeddings(BaseModel, Embeddings):
    """

    client: Any  #: :meta private:
-
+    """Bedrock client."""
    region_name: Optional[str] = None
    """The aws region e.g., `us-west-2`. Fallsback to AWS_DEFAULT_REGION env variable
    or region specified in ~/.aws/config in case it is not provided here.
--- a/langchain/embeddings/clarifai.py
+++ b/langchain/embeddings/clarifai.py
@ -1,4 +1,3 @@
-"""Wrapper around Clarifai embedding models."""
 import logging
 from typing import Any, Dict, List, Optional

@ -11,7 +10,7 @@ logger = logging.getLogger(__name__)


 class ClarifaiEmbeddings(BaseModel, Embeddings):
-    """Wrapper around Clarifai embedding models.
+    """Clarifai embedding models.

    To use, you should have the ``clarifai`` python package installed, and the
    environment variable ``CLARIFAI_PAT`` set with your personal access token or pass it
@ -27,22 +26,19 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
    """

    stub: Any  #: :meta private:
+    """Clarifai stub."""
    userDataObject: Any
-
+    """Clarifai user data object."""
    model_id: Optional[str] = None
    """Model id to use."""
-
    model_version_id: Optional[str] = None
    """Model version id to use."""
-
    app_id: Optional[str] = None
    """Clarifai application id to use."""
-
    user_id: Optional[str] = None
    """Clarifai user id to use."""
-
    pat: Optional[str] = None
-
+    """Clarifai personal access token to use."""
    api_base: str = "https://api.clarifai.com"

    class Config:
--- a/langchain/embeddings/cohere.py
+++ b/langchain/embeddings/cohere.py
@ -1,4 +1,3 @@
-"""Wrapper around Cohere embedding models."""
 from typing import Any, Dict, List, Optional

 from pydantic import BaseModel, Extra, root_validator
@ -8,7 +7,7 @@ from langchain.utils import get_from_dict_or_env


 class CohereEmbeddings(BaseModel, Embeddings):
-    """Wrapper around Cohere embedding models.
+    """Cohere embedding models.

    To use, you should have the ``cohere`` python package installed, and the
    environment variable ``COHERE_API_KEY`` set with your API key or pass it
@ -24,6 +23,7 @@ class CohereEmbeddings(BaseModel, Embeddings):
    """

    client: Any  #: :meta private:
+    """Cohere client."""
    model: str = "embed-english-v2.0"
    """Model name to use."""

--- a/langchain/embeddings/dashscope.py
+++ b/langchain/embeddings/dashscope.py
@ -1,4 +1,3 @@
-"""Wrapper around DashScope embedding models."""
 from __future__ import annotations

 import logging
@ -65,7 +64,7 @@ def embed_with_retry(embeddings: DashScopeEmbeddings, **kwargs: Any) -> Any:


 class DashScopeEmbeddings(BaseModel, Embeddings):
-    """Wrapper around DashScope embedding models.
+    """DashScope embedding models.

    To use, you should have the ``dashscope`` python package installed, and the
    environment variable ``DASHSCOPE_API_KEY`` set with your API key or pass it
@ -93,10 +92,11 @@ class DashScopeEmbeddings(BaseModel, Embeddings):
    """

    client: Any  #: :meta private:
+    """The DashScope client."""
    model: str = "text-embedding-v1"
    dashscope_api_key: Optional[str] = None
-    """Maximum number of retries to make when generating."""
    max_retries: int = 5
+    """Maximum number of retries to make when generating."""

    class Config:
        """Configuration for this pydantic object."""
--- a/langchain/embeddings/deepinfra.py
+++ b/langchain/embeddings/deepinfra.py
@ -10,7 +10,7 @@ DEFAULT_MODEL_ID = "sentence-transformers/clip-ViT-B-32"


 class DeepInfraEmbeddings(BaseModel, Embeddings):
-    """Wrapper around Deep Infra's embedding inference service.
+    """Deep Infra's embedding inference service.

    To use, you should have the
    environment variable ``DEEPINFRA_API_TOKEN`` set with your API token, or pass
--- a/langchain/embeddings/elasticsearch.py
+++ b/langchain/embeddings/elasticsearch.py
@ -12,8 +12,7 @@ from langchain.embeddings.base import Embeddings


 class ElasticsearchEmbeddings(Embeddings):
-    """
-    Wrapper around Elasticsearch embedding models.
+    """Elasticsearch embedding models.

    This class provides an interface to generate embeddings using a model deployed
    in an Elasticsearch cluster. It requires an Elasticsearch connection object
--- a/langchain/embeddings/embaas.py
+++ b/langchain/embeddings/embaas.py
@ -1,4 +1,3 @@
-"""Wrapper around embaas embeddings API."""
 from typing import Any, Dict, List, Mapping, Optional

 import requests
@ -22,7 +21,7 @@ class EmbaasEmbeddingsPayload(TypedDict):


 class EmbaasEmbeddings(BaseModel, Embeddings):
-    """Wrapper around embaas's embedding service.
+    """Embaas's embedding service.

    To use, you should have the
    environment variable ``EMBAAS_API_KEY`` set with your API key, or pass
--- a/langchain/embeddings/fake.py
+++ b/langchain/embeddings/fake.py
@ -7,7 +7,10 @@ from langchain.embeddings.base import Embeddings


 class FakeEmbeddings(Embeddings, BaseModel):
+    """Fake embedding model."""
+
    size: int
+    """The size of the embedding vector."""

    def _get_embedding(self) -> List[float]:
        return list(np.random.normal(size=self.size))
--- a/langchain/embeddings/google_palm.py
+++ b/langchain/embeddings/google_palm.py
@ -1,4 +1,3 @@
-"""Wrapper around Google's PaLM Embeddings APIs."""
 from __future__ import annotations

 import logging
@ -55,6 +54,8 @@ def embed_with_retry(


 class GooglePalmEmbeddings(BaseModel, Embeddings):
+    """Google's PaLM Embeddings APIs."""
+
    client: Any
    google_api_key: Optional[str]
    model_name: str = "models/embedding-gecko-001"
--- a/langchain/embeddings/gpt4all.py
+++ b/langchain/embeddings/gpt4all.py
@ -1,4 +1,3 @@
-"""Wrapper around GPT4All embedding models."""
 from typing import Any, Dict, List

 from pydantic import BaseModel, root_validator
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings


 class GPT4AllEmbeddings(BaseModel, Embeddings):
-    """Wrapper around GPT4All embedding models.
+    """GPT4All embedding models.

    To use, you should have the gpt4all python package installed

@ -30,7 +29,7 @@ class GPT4AllEmbeddings(BaseModel, Embeddings):

            values["client"] = Embed4All()
        except ImportError:
-            raise ModuleNotFoundError(
+            raise ImportError(
                "Could not import gpt4all library. "
                "Please install the gpt4all library to "
                "use this embedding model: pip install gpt4all"
--- a/langchain/embeddings/huggingface.py
+++ b/langchain/embeddings/huggingface.py
@ -1,4 +1,3 @@
-"""Wrapper around HuggingFace embedding models."""
 from typing import Any, Dict, List, Optional

 from pydantic import BaseModel, Extra, Field
@ -14,7 +13,7 @@ DEFAULT_QUERY_INSTRUCTION = (


 class HuggingFaceEmbeddings(BaseModel, Embeddings):
-    """Wrapper around sentence_transformers embedding models.
+    """HuggingFace sentence_transformers embedding models.

    To use, you should have the ``sentence_transformers`` python package installed.

--- a/langchain/embeddings/huggingface_hub.py
+++ b/langchain/embeddings/huggingface_hub.py
@ -1,4 +1,3 @@
-"""Wrapper around HuggingFace Hub embedding models."""
 from typing import Any, Dict, List, Optional

 from pydantic import BaseModel, Extra, root_validator
@ -11,7 +10,7 @@ VALID_TASKS = ("feature-extraction",)


 class HuggingFaceHubEmbeddings(BaseModel, Embeddings):
-    """Wrapper around HuggingFaceHub embedding models.
+    """HuggingFaceHub embedding models.

    To use, you should have the ``huggingface_hub`` python package installed, and the
    environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass
@ -71,7 +70,7 @@ class HuggingFaceHubEmbeddings(BaseModel, Embeddings):
                )
            values["client"] = client
        except ImportError:
-            raise ValueError(
+            raise ImportError(
                "Could not import huggingface_hub python package. "
                "Please install it with `pip install huggingface_hub`."
            )
--- a/langchain/embeddings/jina.py
+++ b/langchain/embeddings/jina.py
@ -1,5 +1,3 @@
-"""Wrapper around Jina embedding models."""
-
 import os
 from typing import Any, Dict, List, Optional

@ -11,6 +9,8 @@ from langchain.utils import get_from_dict_or_env


 class JinaEmbeddings(BaseModel, Embeddings):
+    """Jina embedding models."""
+
    client: Any  #: :meta private:

    model_name: str = "ViT-B-32::openai"
--- a/langchain/embeddings/llamacpp.py
+++ b/langchain/embeddings/llamacpp.py
@ -1,4 +1,3 @@
-"""Wrapper around llama.cpp embedding models."""
 from typing import Any, Dict, List, Optional

 from pydantic import BaseModel, Extra, Field, root_validator
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings


 class LlamaCppEmbeddings(BaseModel, Embeddings):
-    """Wrapper around llama.cpp embedding models.
+    """llama.cpp embedding models.

    To use, you should have the llama-cpp-python library installed, and provide the
    path to the Llama model as a named parameter to the constructor.
--- a/langchain/embeddings/minimax.py
+++ b/langchain/embeddings/minimax.py
@ -1,4 +1,3 @@
-"""Wrapper around MiniMax APIs."""
 from __future__ import annotations

 import logging
@ -47,7 +46,7 @@ def embed_with_retry(embeddings: MiniMaxEmbeddings, *args: Any, **kwargs: Any) -


 class MiniMaxEmbeddings(BaseModel, Embeddings):
-    """Wrapper around MiniMax's embedding inference service.
+    """MiniMax's embedding service.

    To use, you should have the environment variable ``MINIMAX_GROUP_ID`` and
    ``MINIMAX_API_KEY`` set with your API token, or pass it as a named parameter to
--- a/langchain/embeddings/mlflow_gateway.py
+++ b/langchain/embeddings/mlflow_gateway.py
@ -13,8 +13,12 @@ def _chunk(texts: List[str], size: int) -> Iterator[List[str]]:


 class MlflowAIGatewayEmbeddings(Embeddings, BaseModel):
+    """MLflow AI Gateway Embeddings APIs."""
+
    route: str
+    """The route to use for the MLflow AI Gateway API."""
    gateway_uri: Optional[str] = None
+    """The URI for the MLflow AI Gateway API."""

    def __init__(self, **kwargs: Any):
        try:
--- a/langchain/embeddings/modelscope_hub.py
+++ b/langchain/embeddings/modelscope_hub.py
@ -1,4 +1,3 @@
-"""Wrapper around ModelScopeHub embedding models."""
 from typing import Any, List

 from pydantic import BaseModel, Extra
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings


 class ModelScopeEmbeddings(BaseModel, Embeddings):
-    """Wrapper around modelscope_hub embedding models.
+    """ModelScopeHub embedding models.

    To use, you should have the ``modelscope`` python package installed.

--- a/langchain/embeddings/mosaicml.py
+++ b/langchain/embeddings/mosaicml.py
@ -1,4 +1,3 @@
-"""Wrapper around MosaicML APIs."""
 from typing import Any, Dict, List, Mapping, Optional, Tuple

 import requests
@ -9,7 +8,7 @@ from langchain.utils import get_from_dict_or_env


 class MosaicMLInstructorEmbeddings(BaseModel, Embeddings):
-    """Wrapper around MosaicML's embedding inference service.
+    """MosaicML embedding service.

    To use, you should have the
    environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass
--- a/langchain/embeddings/nlpcloud.py
+++ b/langchain/embeddings/nlpcloud.py
@ -1,4 +1,3 @@
-"""Wrapper around NLP Cloud embedding models."""
 from typing import Any, Dict, List

 from pydantic import BaseModel, root_validator
@ -8,7 +7,7 @@ from langchain.utils import get_from_dict_or_env


 class NLPCloudEmbeddings(BaseModel, Embeddings):
-    """Wrapper around NLP Cloud embedding models.
+    """NLP Cloud embedding models.

    To use, you should have the nlpcloud python package installed

--- a/langchain/embeddings/octoai_embeddings.py
+++ b/langchain/embeddings/octoai_embeddings.py
@ -1,5 +1,3 @@
-"""Module providing a wrapper around OctoAI Compute Service embedding models."""
-
 from typing import Any, Dict, List, Mapping, Optional

 from pydantic import BaseModel, Extra, Field, root_validator
@ -12,7 +10,7 @@ DEFAULT_QUERY_INSTRUCTION = "Represent the question for retrieving similar docum


 class OctoAIEmbeddings(BaseModel, Embeddings):
-    """Wrapper around OctoAI Compute Service embedding models.
+    """OctoAI Compute Service embedding models.

    The environment variable ``OCTOAI_API_TOKEN`` should be set
    with your API token, or it can be passed
--- a/langchain/embeddings/openai.py
+++ b/langchain/embeddings/openai.py
@ -1,4 +1,3 @@
-"""Wrapper around OpenAI embedding models."""
 from __future__ import annotations

 import logging
@ -120,7 +119,7 @@ async def async_embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) ->


 class OpenAIEmbeddings(BaseModel, Embeddings):
-    """Wrapper around OpenAI embedding models.
+    """OpenAI embedding models.

    To use, you should have the ``openai`` python package installed, and the
    environment variable ``OPENAI_API_KEY`` set with your API key or pass it
@ -171,6 +170,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
    # to support explicit proxy for OpenAI
    openai_proxy: Optional[str] = None
    embedding_ctx_length: int = 8191
+    """The maximum number of tokens to embed at once."""
    openai_api_key: Optional[str] = None
    openai_organization: Optional[str] = None
    allowed_special: Union[Literal["all"], Set[str]] = set()
--- a/langchain/embeddings/sagemaker_endpoint.py
+++ b/langchain/embeddings/sagemaker_endpoint.py
@ -1,4 +1,3 @@
-"""Wrapper around Sagemaker InvokeEndpoint API."""
 from typing import Any, Dict, List, Optional

 from pydantic import BaseModel, Extra, root_validator
@ -12,7 +11,7 @@ class EmbeddingsContentHandler(ContentHandlerBase[List[str], List[List[float]]])


 class SagemakerEndpointEmbeddings(BaseModel, Embeddings):
-    """Wrapper around custom Sagemaker Inference Endpoints.
+    """Custom Sagemaker Inference Endpoints.

    To use, you must supply the endpoint name from your deployed
    Sagemaker model & the region where it is deployed.
@ -133,7 +132,7 @@ class SagemakerEndpointEmbeddings(BaseModel, Embeddings):
                ) from e

        except ImportError:
-            raise ValueError(
+            raise ImportError(
                "Could not import boto3 python package. "
                "Please install it with `pip install boto3`."
            )
--- a/langchain/embeddings/self_hosted.py
+++ b/langchain/embeddings/self_hosted.py
@ -1,4 +1,3 @@
-"""Running custom embedding models on self-hosted remote hardware."""
 from typing import Any, Callable, List

 from pydantic import Extra
@ -17,7 +16,7 @@ def _embed_documents(pipeline: Any, *args: Any, **kwargs: Any) -> List[List[floa


 class SelfHostedEmbeddings(SelfHostedPipeline, Embeddings):
-    """Runs custom embedding models on self-hosted remote hardware.
+    """Custom embedding models on self-hosted remote hardware.

    Supported hardware includes auto-launched instances on AWS, GCP, Azure,
    and Lambda, as well as servers specified
--- a/langchain/embeddings/self_hosted_hugging_face.py
+++ b/langchain/embeddings/self_hosted_hugging_face.py
@ -1,4 +1,3 @@
-"""Wrapper around HuggingFace embedding models for self-hosted remote hardware."""
 import importlib
 import logging
 from typing import Any, Callable, List, Optional
@ -58,7 +57,7 @@ def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0)


 class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings):
-    """Runs sentence_transformers embedding models on self-hosted remote hardware.
+    """HuggingFace embedding models on self-hosted remote hardware.

    Supported hardware includes auto-launched instances on AWS, GCP, Azure,
    and Lambda, as well as servers specified
@ -101,7 +100,7 @@ class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings):


 class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings):
-    """Runs InstructorEmbedding embedding models on self-hosted remote hardware.
+    """HuggingFace InstructEmbedding models on self-hosted remote hardware.

    Supported hardware includes auto-launched instances on AWS, GCP, Azure,
    and Lambda, as well as servers specified
--- a/langchain/embeddings/sentence_transformer.py
+++ b/langchain/embeddings/sentence_transformer.py
@ -1,4 +1,4 @@
-"""Wrapper around sentence transformer embedding models."""
+"""HuggingFace sentence_transformer embedding models."""
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings

 SentenceTransformerEmbeddings = HuggingFaceEmbeddings
--- a/langchain/embeddings/spacy_embeddings.py
+++ b/langchain/embeddings/spacy_embeddings.py
@ -7,8 +7,8 @@ from langchain.embeddings.base import Embeddings


 class SpacyEmbeddings(BaseModel, Embeddings):
-    """
-    SpacyEmbeddings is a class for generating embeddings using the Spacy library.
+    """Embeddings by SpaCy models.
+
    It only supports the 'en_core_web_sm' model.

    Attributes:
--- a/langchain/embeddings/tensorflow_hub.py
+++ b/langchain/embeddings/tensorflow_hub.py
@ -1,4 +1,3 @@
-"""Wrapper around TensorflowHub embedding models."""
 from typing import Any, List

 from pydantic import BaseModel, Extra
@ -9,7 +8,7 @@ DEFAULT_MODEL_URL = "https://tfhub.dev/google/universal-sentence-encoder-multili


 class TensorflowHubEmbeddings(BaseModel, Embeddings):
-    """Wrapper around tensorflow_hub embedding models.
+    """TensorflowHub embedding models.

    To use, you should have the ``tensorflow_text`` python package installed.

--- a/langchain/embeddings/vertexai.py
+++ b/langchain/embeddings/vertexai.py
@ -1,4 +1,3 @@
-"""Wrapper around Google VertexAI embedding models."""
 from typing import Dict, List

 from pydantic import root_validator
@ -9,6 +8,8 @@ from langchain.utilities.vertexai import raise_vertex_import_error


 class VertexAIEmbeddings(_VertexAICommon, Embeddings):
+    """Google Cloud VertexAI embedding models."""
+
    model_name: str = "textembedding-gecko"

    @root_validator()