mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
docstrings for embeddings
(#7973)
Added/updated docstrings for the `embeddings` @baskaryan
This commit is contained in:
parent
0613ed5b95
commit
24b26a922a
@ -7,8 +7,8 @@ from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
|
||||
"""
|
||||
Wrapper for Aleph Alpha's Asymmetric Embeddings
|
||||
"""Aleph Alpha's asymmetric semantic embedding.
|
||||
|
||||
AA provides you with an endpoint to embed a document and a query.
|
||||
The models were optimized to make the embeddings of documents and
|
||||
the query for a document as similar as possible.
|
||||
@ -30,7 +30,7 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
|
||||
"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
|
||||
"""Aleph Alpha client."""
|
||||
model: Optional[str] = "luminous-base"
|
||||
"""Model name to use."""
|
||||
hosting: Optional[str] = "https://api.aleph-alpha.com"
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Interface for embedding models."""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
|
||||
@ -15,9 +14,9 @@ class Embeddings(ABC):
|
||||
"""Embed query text."""
|
||||
|
||||
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Embed search docs."""
|
||||
"""Asynchronous Embed search docs."""
|
||||
raise NotImplementedError
|
||||
|
||||
async def aembed_query(self, text: str) -> List[float]:
|
||||
"""Embed query text."""
|
||||
"""Asynchronous Embed query text."""
|
||||
raise NotImplementedError
|
||||
|
@ -8,7 +8,7 @@ from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class BedrockEmbeddings(BaseModel, Embeddings):
|
||||
"""Embeddings provider to invoke Bedrock embedding models.
|
||||
"""Bedrock embedding models.
|
||||
|
||||
To authenticate, the AWS client uses the following methods to
|
||||
automatically load credentials:
|
||||
@ -39,7 +39,7 @@ class BedrockEmbeddings(BaseModel, Embeddings):
|
||||
"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
|
||||
"""Bedrock client."""
|
||||
region_name: Optional[str] = None
|
||||
"""The aws region e.g., `us-west-2`. Fallsback to AWS_DEFAULT_REGION env variable
|
||||
or region specified in ~/.aws/config in case it is not provided here.
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around Clarifai embedding models."""
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@ -11,7 +10,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ClarifaiEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around Clarifai embedding models.
|
||||
"""Clarifai embedding models.
|
||||
|
||||
To use, you should have the ``clarifai`` python package installed, and the
|
||||
environment variable ``CLARIFAI_PAT`` set with your personal access token or pass it
|
||||
@ -27,22 +26,19 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
|
||||
"""
|
||||
|
||||
stub: Any #: :meta private:
|
||||
"""Clarifai stub."""
|
||||
userDataObject: Any
|
||||
|
||||
"""Clarifai user data object."""
|
||||
model_id: Optional[str] = None
|
||||
"""Model id to use."""
|
||||
|
||||
model_version_id: Optional[str] = None
|
||||
"""Model version id to use."""
|
||||
|
||||
app_id: Optional[str] = None
|
||||
"""Clarifai application id to use."""
|
||||
|
||||
user_id: Optional[str] = None
|
||||
"""Clarifai user id to use."""
|
||||
|
||||
pat: Optional[str] = None
|
||||
|
||||
"""Clarifai personal access token to use."""
|
||||
api_base: str = "https://api.clarifai.com"
|
||||
|
||||
class Config:
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around Cohere embedding models."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
@ -8,7 +7,7 @@ from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class CohereEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around Cohere embedding models.
|
||||
"""Cohere embedding models.
|
||||
|
||||
To use, you should have the ``cohere`` python package installed, and the
|
||||
environment variable ``COHERE_API_KEY`` set with your API key or pass it
|
||||
@ -24,6 +23,7 @@ class CohereEmbeddings(BaseModel, Embeddings):
|
||||
"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
"""Cohere client."""
|
||||
model: str = "embed-english-v2.0"
|
||||
"""Model name to use."""
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around DashScope embedding models."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
@ -65,7 +64,7 @@ def embed_with_retry(embeddings: DashScopeEmbeddings, **kwargs: Any) -> Any:
|
||||
|
||||
|
||||
class DashScopeEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around DashScope embedding models.
|
||||
"""DashScope embedding models.
|
||||
|
||||
To use, you should have the ``dashscope`` python package installed, and the
|
||||
environment variable ``DASHSCOPE_API_KEY`` set with your API key or pass it
|
||||
@ -93,10 +92,11 @@ class DashScopeEmbeddings(BaseModel, Embeddings):
|
||||
"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
"""The DashScope client."""
|
||||
model: str = "text-embedding-v1"
|
||||
dashscope_api_key: Optional[str] = None
|
||||
"""Maximum number of retries to make when generating."""
|
||||
max_retries: int = 5
|
||||
"""Maximum number of retries to make when generating."""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
@ -10,7 +10,7 @@ DEFAULT_MODEL_ID = "sentence-transformers/clip-ViT-B-32"
|
||||
|
||||
|
||||
class DeepInfraEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around Deep Infra's embedding inference service.
|
||||
"""Deep Infra's embedding inference service.
|
||||
|
||||
To use, you should have the
|
||||
environment variable ``DEEPINFRA_API_TOKEN`` set with your API token, or pass
|
||||
|
@ -12,8 +12,7 @@ from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class ElasticsearchEmbeddings(Embeddings):
|
||||
"""
|
||||
Wrapper around Elasticsearch embedding models.
|
||||
"""Elasticsearch embedding models.
|
||||
|
||||
This class provides an interface to generate embeddings using a model deployed
|
||||
in an Elasticsearch cluster. It requires an Elasticsearch connection object
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around embaas embeddings API."""
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
|
||||
import requests
|
||||
@ -22,7 +21,7 @@ class EmbaasEmbeddingsPayload(TypedDict):
|
||||
|
||||
|
||||
class EmbaasEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around embaas's embedding service.
|
||||
"""Embaas's embedding service.
|
||||
|
||||
To use, you should have the
|
||||
environment variable ``EMBAAS_API_KEY`` set with your API key, or pass
|
||||
|
@ -7,7 +7,10 @@ from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class FakeEmbeddings(Embeddings, BaseModel):
|
||||
"""Fake embedding model."""
|
||||
|
||||
size: int
|
||||
"""The size of the embedding vector."""
|
||||
|
||||
def _get_embedding(self) -> List[float]:
|
||||
return list(np.random.normal(size=self.size))
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around Google's PaLM Embeddings APIs."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
@ -55,6 +54,8 @@ def embed_with_retry(
|
||||
|
||||
|
||||
class GooglePalmEmbeddings(BaseModel, Embeddings):
|
||||
"""Google's PaLM Embeddings APIs."""
|
||||
|
||||
client: Any
|
||||
google_api_key: Optional[str]
|
||||
model_name: str = "models/embedding-gecko-001"
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around GPT4All embedding models."""
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pydantic import BaseModel, root_validator
|
||||
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class GPT4AllEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around GPT4All embedding models.
|
||||
"""GPT4All embedding models.
|
||||
|
||||
To use, you should have the gpt4all python package installed
|
||||
|
||||
@ -30,7 +29,7 @@ class GPT4AllEmbeddings(BaseModel, Embeddings):
|
||||
|
||||
values["client"] = Embed4All()
|
||||
except ImportError:
|
||||
raise ModuleNotFoundError(
|
||||
raise ImportError(
|
||||
"Could not import gpt4all library. "
|
||||
"Please install the gpt4all library to "
|
||||
"use this embedding model: pip install gpt4all"
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around HuggingFace embedding models."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, Field
|
||||
@ -14,7 +13,7 @@ DEFAULT_QUERY_INSTRUCTION = (
|
||||
|
||||
|
||||
class HuggingFaceEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around sentence_transformers embedding models.
|
||||
"""HuggingFace sentence_transformers embedding models.
|
||||
|
||||
To use, you should have the ``sentence_transformers`` python package installed.
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around HuggingFace Hub embedding models."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
@ -11,7 +10,7 @@ VALID_TASKS = ("feature-extraction",)
|
||||
|
||||
|
||||
class HuggingFaceHubEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around HuggingFaceHub embedding models.
|
||||
"""HuggingFaceHub embedding models.
|
||||
|
||||
To use, you should have the ``huggingface_hub`` python package installed, and the
|
||||
environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass
|
||||
@ -71,7 +70,7 @@ class HuggingFaceHubEmbeddings(BaseModel, Embeddings):
|
||||
)
|
||||
values["client"] = client
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import huggingface_hub python package. "
|
||||
"Please install it with `pip install huggingface_hub`."
|
||||
)
|
||||
|
@ -1,5 +1,3 @@
|
||||
"""Wrapper around Jina embedding models."""
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@ -11,6 +9,8 @@ from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class JinaEmbeddings(BaseModel, Embeddings):
|
||||
"""Jina embedding models."""
|
||||
|
||||
client: Any #: :meta private:
|
||||
|
||||
model_name: str = "ViT-B-32::openai"
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around llama.cpp embedding models."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class LlamaCppEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around llama.cpp embedding models.
|
||||
"""llama.cpp embedding models.
|
||||
|
||||
To use, you should have the llama-cpp-python library installed, and provide the
|
||||
path to the Llama model as a named parameter to the constructor.
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around MiniMax APIs."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
@ -47,7 +46,7 @@ def embed_with_retry(embeddings: MiniMaxEmbeddings, *args: Any, **kwargs: Any) -
|
||||
|
||||
|
||||
class MiniMaxEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around MiniMax's embedding inference service.
|
||||
"""MiniMax's embedding service.
|
||||
|
||||
To use, you should have the environment variable ``MINIMAX_GROUP_ID`` and
|
||||
``MINIMAX_API_KEY`` set with your API token, or pass it as a named parameter to
|
||||
|
@ -13,8 +13,12 @@ def _chunk(texts: List[str], size: int) -> Iterator[List[str]]:
|
||||
|
||||
|
||||
class MlflowAIGatewayEmbeddings(Embeddings, BaseModel):
|
||||
"""MLflow AI Gateway Embeddings APIs."""
|
||||
|
||||
route: str
|
||||
"""The route to use for the MLflow AI Gateway API."""
|
||||
gateway_uri: Optional[str] = None
|
||||
"""The URI for the MLflow AI Gateway API."""
|
||||
|
||||
def __init__(self, **kwargs: Any):
|
||||
try:
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around ModelScopeHub embedding models."""
|
||||
from typing import Any, List
|
||||
|
||||
from pydantic import BaseModel, Extra
|
||||
@ -7,7 +6,7 @@ from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class ModelScopeEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around modelscope_hub embedding models.
|
||||
"""ModelScopeHub embedding models.
|
||||
|
||||
To use, you should have the ``modelscope`` python package installed.
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around MosaicML APIs."""
|
||||
from typing import Any, Dict, List, Mapping, Optional, Tuple
|
||||
|
||||
import requests
|
||||
@ -9,7 +8,7 @@ from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class MosaicMLInstructorEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around MosaicML's embedding inference service.
|
||||
"""MosaicML embedding service.
|
||||
|
||||
To use, you should have the
|
||||
environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around NLP Cloud embedding models."""
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pydantic import BaseModel, root_validator
|
||||
@ -8,7 +7,7 @@ from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class NLPCloudEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around NLP Cloud embedding models.
|
||||
"""NLP Cloud embedding models.
|
||||
|
||||
To use, you should have the nlpcloud python package installed
|
||||
|
||||
|
@ -1,5 +1,3 @@
|
||||
"""Module providing a wrapper around OctoAI Compute Service embedding models."""
|
||||
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
@ -12,7 +10,7 @@ DEFAULT_QUERY_INSTRUCTION = "Represent the question for retrieving similar docum
|
||||
|
||||
|
||||
class OctoAIEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around OctoAI Compute Service embedding models.
|
||||
"""OctoAI Compute Service embedding models.
|
||||
|
||||
The environment variable ``OCTOAI_API_TOKEN`` should be set
|
||||
with your API token, or it can be passed
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around OpenAI embedding models."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
@ -120,7 +119,7 @@ async def async_embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) ->
|
||||
|
||||
|
||||
class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around OpenAI embedding models.
|
||||
"""OpenAI embedding models.
|
||||
|
||||
To use, you should have the ``openai`` python package installed, and the
|
||||
environment variable ``OPENAI_API_KEY`` set with your API key or pass it
|
||||
@ -171,6 +170,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
# to support explicit proxy for OpenAI
|
||||
openai_proxy: Optional[str] = None
|
||||
embedding_ctx_length: int = 8191
|
||||
"""The maximum number of tokens to embed at once."""
|
||||
openai_api_key: Optional[str] = None
|
||||
openai_organization: Optional[str] = None
|
||||
allowed_special: Union[Literal["all"], Set[str]] = set()
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around Sagemaker InvokeEndpoint API."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
@ -12,7 +11,7 @@ class EmbeddingsContentHandler(ContentHandlerBase[List[str], List[List[float]]])
|
||||
|
||||
|
||||
class SagemakerEndpointEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around custom Sagemaker Inference Endpoints.
|
||||
"""Custom Sagemaker Inference Endpoints.
|
||||
|
||||
To use, you must supply the endpoint name from your deployed
|
||||
Sagemaker model & the region where it is deployed.
|
||||
@ -133,7 +132,7 @@ class SagemakerEndpointEmbeddings(BaseModel, Embeddings):
|
||||
) from e
|
||||
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import boto3 python package. "
|
||||
"Please install it with `pip install boto3`."
|
||||
)
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Running custom embedding models on self-hosted remote hardware."""
|
||||
from typing import Any, Callable, List
|
||||
|
||||
from pydantic import Extra
|
||||
@ -17,7 +16,7 @@ def _embed_documents(pipeline: Any, *args: Any, **kwargs: Any) -> List[List[floa
|
||||
|
||||
|
||||
class SelfHostedEmbeddings(SelfHostedPipeline, Embeddings):
|
||||
"""Runs custom embedding models on self-hosted remote hardware.
|
||||
"""Custom embedding models on self-hosted remote hardware.
|
||||
|
||||
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
|
||||
and Lambda, as well as servers specified
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around HuggingFace embedding models for self-hosted remote hardware."""
|
||||
import importlib
|
||||
import logging
|
||||
from typing import Any, Callable, List, Optional
|
||||
@ -58,7 +57,7 @@ def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0)
|
||||
|
||||
|
||||
class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings):
|
||||
"""Runs sentence_transformers embedding models on self-hosted remote hardware.
|
||||
"""HuggingFace embedding models on self-hosted remote hardware.
|
||||
|
||||
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
|
||||
and Lambda, as well as servers specified
|
||||
@ -101,7 +100,7 @@ class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings):
|
||||
|
||||
|
||||
class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings):
|
||||
"""Runs InstructorEmbedding embedding models on self-hosted remote hardware.
|
||||
"""HuggingFace InstructEmbedding models on self-hosted remote hardware.
|
||||
|
||||
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
|
||||
and Lambda, as well as servers specified
|
||||
|
@ -1,4 +1,4 @@
|
||||
"""Wrapper around sentence transformer embedding models."""
|
||||
"""HuggingFace sentence_transformer embedding models."""
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
|
||||
SentenceTransformerEmbeddings = HuggingFaceEmbeddings
|
||||
|
@ -7,8 +7,8 @@ from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class SpacyEmbeddings(BaseModel, Embeddings):
|
||||
"""
|
||||
SpacyEmbeddings is a class for generating embeddings using the Spacy library.
|
||||
"""Embeddings by SpaCy models.
|
||||
|
||||
It only supports the 'en_core_web_sm' model.
|
||||
|
||||
Attributes:
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around TensorflowHub embedding models."""
|
||||
from typing import Any, List
|
||||
|
||||
from pydantic import BaseModel, Extra
|
||||
@ -9,7 +8,7 @@ DEFAULT_MODEL_URL = "https://tfhub.dev/google/universal-sentence-encoder-multili
|
||||
|
||||
|
||||
class TensorflowHubEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around tensorflow_hub embedding models.
|
||||
"""TensorflowHub embedding models.
|
||||
|
||||
To use, you should have the ``tensorflow_text`` python package installed.
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
"""Wrapper around Google VertexAI embedding models."""
|
||||
from typing import Dict, List
|
||||
|
||||
from pydantic import root_validator
|
||||
@ -9,6 +8,8 @@ from langchain.utilities.vertexai import raise_vertex_import_error
|
||||
|
||||
|
||||
class VertexAIEmbeddings(_VertexAICommon, Embeddings):
|
||||
"""Google Cloud VertexAI embedding models."""
|
||||
|
||||
model_name: str = "textembedding-gecko"
|
||||
|
||||
@root_validator()
|
||||
|
Loading…
Reference in New Issue
Block a user