docstrings for embeddings (#7973)

Added/updated docstrings for the `embeddings`

@baskaryan
This commit is contained in:
Leonid Ganeline 2023-07-20 06:26:44 -07:00 committed by GitHub
parent 0613ed5b95
commit 24b26a922a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 54 additions and 66 deletions

View File

@ -7,8 +7,8 @@ from langchain.utils import get_from_dict_or_env
class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
"""
Wrapper for Aleph Alpha's Asymmetric Embeddings
"""Aleph Alpha's asymmetric semantic embedding.
AA provides you with an endpoint to embed a document and a query.
The models were optimized to make the embeddings of documents and
the query for a document as similar as possible.
@ -30,7 +30,7 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
"""
client: Any #: :meta private:
"""Aleph Alpha client."""
model: Optional[str] = "luminous-base"
"""Model name to use."""
hosting: Optional[str] = "https://api.aleph-alpha.com"

View File

@ -1,4 +1,3 @@
"""Interface for embedding models."""
from abc import ABC, abstractmethod
from typing import List
@ -15,9 +14,9 @@ class Embeddings(ABC):
"""Embed query text."""
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
"""Embed search docs."""
"""Asynchronous Embed search docs."""
raise NotImplementedError
async def aembed_query(self, text: str) -> List[float]:
"""Embed query text."""
"""Asynchronous Embed query text."""
raise NotImplementedError

View File

@ -8,7 +8,7 @@ from langchain.embeddings.base import Embeddings
class BedrockEmbeddings(BaseModel, Embeddings):
"""Embeddings provider to invoke Bedrock embedding models.
"""Bedrock embedding models.
To authenticate, the AWS client uses the following methods to
automatically load credentials:
@ -39,7 +39,7 @@ class BedrockEmbeddings(BaseModel, Embeddings):
"""
client: Any #: :meta private:
"""Bedrock client."""
region_name: Optional[str] = None
"""The aws region e.g., `us-west-2`. Fallsback to AWS_DEFAULT_REGION env variable
or region specified in ~/.aws/config in case it is not provided here.

View File

@ -1,4 +1,3 @@
"""Wrapper around Clarifai embedding models."""
import logging
from typing import Any, Dict, List, Optional
@ -11,7 +10,7 @@ logger = logging.getLogger(__name__)
class ClarifaiEmbeddings(BaseModel, Embeddings):
"""Wrapper around Clarifai embedding models.
"""Clarifai embedding models.
To use, you should have the ``clarifai`` python package installed, and the
environment variable ``CLARIFAI_PAT`` set with your personal access token or pass it
@ -27,22 +26,19 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
"""
stub: Any #: :meta private:
"""Clarifai stub."""
userDataObject: Any
"""Clarifai user data object."""
model_id: Optional[str] = None
"""Model id to use."""
model_version_id: Optional[str] = None
"""Model version id to use."""
app_id: Optional[str] = None
"""Clarifai application id to use."""
user_id: Optional[str] = None
"""Clarifai user id to use."""
pat: Optional[str] = None
"""Clarifai personal access token to use."""
api_base: str = "https://api.clarifai.com"
class Config:

View File

@ -1,4 +1,3 @@
"""Wrapper around Cohere embedding models."""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
@ -8,7 +7,7 @@ from langchain.utils import get_from_dict_or_env
class CohereEmbeddings(BaseModel, Embeddings):
"""Wrapper around Cohere embedding models.
"""Cohere embedding models.
To use, you should have the ``cohere`` python package installed, and the
environment variable ``COHERE_API_KEY`` set with your API key or pass it
@ -24,6 +23,7 @@ class CohereEmbeddings(BaseModel, Embeddings):
"""
client: Any #: :meta private:
"""Cohere client."""
model: str = "embed-english-v2.0"
"""Model name to use."""

View File

@ -1,4 +1,3 @@
"""Wrapper around DashScope embedding models."""
from __future__ import annotations
import logging
@ -65,7 +64,7 @@ def embed_with_retry(embeddings: DashScopeEmbeddings, **kwargs: Any) -> Any:
class DashScopeEmbeddings(BaseModel, Embeddings):
"""Wrapper around DashScope embedding models.
"""DashScope embedding models.
To use, you should have the ``dashscope`` python package installed, and the
environment variable ``DASHSCOPE_API_KEY`` set with your API key or pass it
@ -93,10 +92,11 @@ class DashScopeEmbeddings(BaseModel, Embeddings):
"""
client: Any #: :meta private:
"""The DashScope client."""
model: str = "text-embedding-v1"
dashscope_api_key: Optional[str] = None
"""Maximum number of retries to make when generating."""
max_retries: int = 5
"""Maximum number of retries to make when generating."""
class Config:
"""Configuration for this pydantic object."""

View File

@ -10,7 +10,7 @@ DEFAULT_MODEL_ID = "sentence-transformers/clip-ViT-B-32"
class DeepInfraEmbeddings(BaseModel, Embeddings):
"""Wrapper around Deep Infra's embedding inference service.
"""Deep Infra's embedding inference service.
To use, you should have the
environment variable ``DEEPINFRA_API_TOKEN`` set with your API token, or pass

View File

@ -12,8 +12,7 @@ from langchain.embeddings.base import Embeddings
class ElasticsearchEmbeddings(Embeddings):
"""
Wrapper around Elasticsearch embedding models.
"""Elasticsearch embedding models.
This class provides an interface to generate embeddings using a model deployed
in an Elasticsearch cluster. It requires an Elasticsearch connection object

View File

@ -1,4 +1,3 @@
"""Wrapper around embaas embeddings API."""
from typing import Any, Dict, List, Mapping, Optional
import requests
@ -22,7 +21,7 @@ class EmbaasEmbeddingsPayload(TypedDict):
class EmbaasEmbeddings(BaseModel, Embeddings):
"""Wrapper around embaas's embedding service.
"""Embaas's embedding service.
To use, you should have the
environment variable ``EMBAAS_API_KEY`` set with your API key, or pass

View File

@ -7,7 +7,10 @@ from langchain.embeddings.base import Embeddings
class FakeEmbeddings(Embeddings, BaseModel):
"""Fake embedding model."""
size: int
"""The size of the embedding vector."""
def _get_embedding(self) -> List[float]:
return list(np.random.normal(size=self.size))

View File

@ -1,4 +1,3 @@
"""Wrapper around Google's PaLM Embeddings APIs."""
from __future__ import annotations
import logging
@ -55,6 +54,8 @@ def embed_with_retry(
class GooglePalmEmbeddings(BaseModel, Embeddings):
"""Google's PaLM Embeddings APIs."""
client: Any
google_api_key: Optional[str]
model_name: str = "models/embedding-gecko-001"

View File

@ -1,4 +1,3 @@
"""Wrapper around GPT4All embedding models."""
from typing import Any, Dict, List
from pydantic import BaseModel, root_validator
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings
class GPT4AllEmbeddings(BaseModel, Embeddings):
"""Wrapper around GPT4All embedding models.
"""GPT4All embedding models.
To use, you should have the gpt4all python package installed
@ -30,7 +29,7 @@ class GPT4AllEmbeddings(BaseModel, Embeddings):
values["client"] = Embed4All()
except ImportError:
raise ModuleNotFoundError(
raise ImportError(
"Could not import gpt4all library. "
"Please install the gpt4all library to "
"use this embedding model: pip install gpt4all"

View File

@ -1,4 +1,3 @@
"""Wrapper around HuggingFace embedding models."""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, Field
@ -14,7 +13,7 @@ DEFAULT_QUERY_INSTRUCTION = (
class HuggingFaceEmbeddings(BaseModel, Embeddings):
"""Wrapper around sentence_transformers embedding models.
"""HuggingFace sentence_transformers embedding models.
To use, you should have the ``sentence_transformers`` python package installed.

View File

@ -1,4 +1,3 @@
"""Wrapper around HuggingFace Hub embedding models."""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
@ -11,7 +10,7 @@ VALID_TASKS = ("feature-extraction",)
class HuggingFaceHubEmbeddings(BaseModel, Embeddings):
"""Wrapper around HuggingFaceHub embedding models.
"""HuggingFaceHub embedding models.
To use, you should have the ``huggingface_hub`` python package installed, and the
environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass
@ -71,7 +70,7 @@ class HuggingFaceHubEmbeddings(BaseModel, Embeddings):
)
values["client"] = client
except ImportError:
raise ValueError(
raise ImportError(
"Could not import huggingface_hub python package. "
"Please install it with `pip install huggingface_hub`."
)

View File

@ -1,5 +1,3 @@
"""Wrapper around Jina embedding models."""
import os
from typing import Any, Dict, List, Optional
@ -11,6 +9,8 @@ from langchain.utils import get_from_dict_or_env
class JinaEmbeddings(BaseModel, Embeddings):
"""Jina embedding models."""
client: Any #: :meta private:
model_name: str = "ViT-B-32::openai"

View File

@ -1,4 +1,3 @@
"""Wrapper around llama.cpp embedding models."""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, Field, root_validator
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings
class LlamaCppEmbeddings(BaseModel, Embeddings):
"""Wrapper around llama.cpp embedding models.
"""llama.cpp embedding models.
To use, you should have the llama-cpp-python library installed, and provide the
path to the Llama model as a named parameter to the constructor.

View File

@ -1,4 +1,3 @@
"""Wrapper around MiniMax APIs."""
from __future__ import annotations
import logging
@ -47,7 +46,7 @@ def embed_with_retry(embeddings: MiniMaxEmbeddings, *args: Any, **kwargs: Any) -
class MiniMaxEmbeddings(BaseModel, Embeddings):
"""Wrapper around MiniMax's embedding inference service.
"""MiniMax's embedding service.
To use, you should have the environment variable ``MINIMAX_GROUP_ID`` and
``MINIMAX_API_KEY`` set with your API token, or pass it as a named parameter to

View File

@ -13,8 +13,12 @@ def _chunk(texts: List[str], size: int) -> Iterator[List[str]]:
class MlflowAIGatewayEmbeddings(Embeddings, BaseModel):
"""MLflow AI Gateway Embeddings APIs."""
route: str
"""The route to use for the MLflow AI Gateway API."""
gateway_uri: Optional[str] = None
"""The URI for the MLflow AI Gateway API."""
def __init__(self, **kwargs: Any):
try:

View File

@ -1,4 +1,3 @@
"""Wrapper around ModelScopeHub embedding models."""
from typing import Any, List
from pydantic import BaseModel, Extra
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings
class ModelScopeEmbeddings(BaseModel, Embeddings):
"""Wrapper around modelscope_hub embedding models.
"""ModelScopeHub embedding models.
To use, you should have the ``modelscope`` python package installed.

View File

@ -1,4 +1,3 @@
"""Wrapper around MosaicML APIs."""
from typing import Any, Dict, List, Mapping, Optional, Tuple
import requests
@ -9,7 +8,7 @@ from langchain.utils import get_from_dict_or_env
class MosaicMLInstructorEmbeddings(BaseModel, Embeddings):
"""Wrapper around MosaicML's embedding inference service.
"""MosaicML embedding service.
To use, you should have the
environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass

View File

@ -1,4 +1,3 @@
"""Wrapper around NLP Cloud embedding models."""
from typing import Any, Dict, List
from pydantic import BaseModel, root_validator
@ -8,7 +7,7 @@ from langchain.utils import get_from_dict_or_env
class NLPCloudEmbeddings(BaseModel, Embeddings):
"""Wrapper around NLP Cloud embedding models.
"""NLP Cloud embedding models.
To use, you should have the nlpcloud python package installed

View File

@ -1,5 +1,3 @@
"""Module providing a wrapper around OctoAI Compute Service embedding models."""
from typing import Any, Dict, List, Mapping, Optional
from pydantic import BaseModel, Extra, Field, root_validator
@ -12,7 +10,7 @@ DEFAULT_QUERY_INSTRUCTION = "Represent the question for retrieving similar docum
class OctoAIEmbeddings(BaseModel, Embeddings):
"""Wrapper around OctoAI Compute Service embedding models.
"""OctoAI Compute Service embedding models.
The environment variable ``OCTOAI_API_TOKEN`` should be set
with your API token, or it can be passed

View File

@ -1,4 +1,3 @@
"""Wrapper around OpenAI embedding models."""
from __future__ import annotations
import logging
@ -120,7 +119,7 @@ async def async_embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) ->
class OpenAIEmbeddings(BaseModel, Embeddings):
"""Wrapper around OpenAI embedding models.
"""OpenAI embedding models.
To use, you should have the ``openai`` python package installed, and the
environment variable ``OPENAI_API_KEY`` set with your API key or pass it
@ -171,6 +170,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
# to support explicit proxy for OpenAI
openai_proxy: Optional[str] = None
embedding_ctx_length: int = 8191
"""The maximum number of tokens to embed at once."""
openai_api_key: Optional[str] = None
openai_organization: Optional[str] = None
allowed_special: Union[Literal["all"], Set[str]] = set()

View File

@ -1,4 +1,3 @@
"""Wrapper around Sagemaker InvokeEndpoint API."""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
@ -12,7 +11,7 @@ class EmbeddingsContentHandler(ContentHandlerBase[List[str], List[List[float]]])
class SagemakerEndpointEmbeddings(BaseModel, Embeddings):
"""Wrapper around custom Sagemaker Inference Endpoints.
"""Custom Sagemaker Inference Endpoints.
To use, you must supply the endpoint name from your deployed
Sagemaker model & the region where it is deployed.
@ -133,7 +132,7 @@ class SagemakerEndpointEmbeddings(BaseModel, Embeddings):
) from e
except ImportError:
raise ValueError(
raise ImportError(
"Could not import boto3 python package. "
"Please install it with `pip install boto3`."
)

View File

@ -1,4 +1,3 @@
"""Running custom embedding models on self-hosted remote hardware."""
from typing import Any, Callable, List
from pydantic import Extra
@ -17,7 +16,7 @@ def _embed_documents(pipeline: Any, *args: Any, **kwargs: Any) -> List[List[floa
class SelfHostedEmbeddings(SelfHostedPipeline, Embeddings):
"""Runs custom embedding models on self-hosted remote hardware.
"""Custom embedding models on self-hosted remote hardware.
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
and Lambda, as well as servers specified

View File

@ -1,4 +1,3 @@
"""Wrapper around HuggingFace embedding models for self-hosted remote hardware."""
import importlib
import logging
from typing import Any, Callable, List, Optional
@ -58,7 +57,7 @@ def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0)
class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings):
"""Runs sentence_transformers embedding models on self-hosted remote hardware.
"""HuggingFace embedding models on self-hosted remote hardware.
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
and Lambda, as well as servers specified
@ -101,7 +100,7 @@ class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings):
class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings):
"""Runs InstructorEmbedding embedding models on self-hosted remote hardware.
"""HuggingFace InstructEmbedding models on self-hosted remote hardware.
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
and Lambda, as well as servers specified

View File

@ -1,4 +1,4 @@
"""Wrapper around sentence transformer embedding models."""
"""HuggingFace sentence_transformer embedding models."""
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
SentenceTransformerEmbeddings = HuggingFaceEmbeddings

View File

@ -7,8 +7,8 @@ from langchain.embeddings.base import Embeddings
class SpacyEmbeddings(BaseModel, Embeddings):
"""
SpacyEmbeddings is a class for generating embeddings using the Spacy library.
"""Embeddings by SpaCy models.
It only supports the 'en_core_web_sm' model.
Attributes:

View File

@ -1,4 +1,3 @@
"""Wrapper around TensorflowHub embedding models."""
from typing import Any, List
from pydantic import BaseModel, Extra
@ -9,7 +8,7 @@ DEFAULT_MODEL_URL = "https://tfhub.dev/google/universal-sentence-encoder-multili
class TensorflowHubEmbeddings(BaseModel, Embeddings):
"""Wrapper around tensorflow_hub embedding models.
"""TensorflowHub embedding models.
To use, you should have the ``tensorflow_text`` python package installed.

View File

@ -1,4 +1,3 @@
"""Wrapper around Google VertexAI embedding models."""
from typing import Dict, List
from pydantic import root_validator
@ -9,6 +8,8 @@ from langchain.utilities.vertexai import raise_vertex_import_error
class VertexAIEmbeddings(_VertexAICommon, Embeddings):
"""Google Cloud VertexAI embedding models."""
model_name: str = "textembedding-gecko"
@root_validator()