diff --git a/docs/modules/models/text_embedding/examples/sentence_transformers.ipynb b/docs/modules/models/text_embedding/examples/sentence_transformers.ipynb index eda1c7dd..bf5466b9 100644 --- a/docs/modules/models/text_embedding/examples/sentence_transformers.ipynb +++ b/docs/modules/models/text_embedding/examples/sentence_transformers.ipynb @@ -8,12 +8,14 @@ "source": [ "# Sentence Transformers Embeddings\n", "\n", - "Let's generate embeddings using the [SentenceTransformers](https://www.sbert.net/) integration. SentenceTransformers is a python package that can generate text and image embeddings, originating from [Sentence-BERT](https://arxiv.org/abs/1908.10084)" + "[SentenceTransformers](https://www.sbert.net/) embeddings are called using the `HuggingFaceEmbeddings` integration. We have also added an alias for `SentenceTransformerEmbeddings` for users who are more familiar with directly using that package.\n", + "\n", + "SentenceTransformers is a python package that can generate text and image embeddings, originating from [Sentence-BERT](https://arxiv.org/abs/1908.10084)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "id": "06c9f47d", "metadata": {}, "outputs": [ @@ -21,10 +23,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], @@ -34,27 +35,28 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "id": "861521a9", "metadata": {}, "outputs": [], "source": [ - "from langchain.embeddings import SentenceTransformerEmbeddings " + "from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings " ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "ff9be586", "metadata": {}, "outputs": [], "source": [ - "embeddings = SentenceTransformerEmbeddings(model=\"all-MiniLM-L6-v2\")" + "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", + "# Equivalent to SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "id": "d0a98ae9", "metadata": {}, "outputs": [], @@ -64,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "id": "5d6c682b", "metadata": {}, "outputs": [], @@ -74,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "id": "bb5e74c0", "metadata": {}, "outputs": [], @@ -107,7 +109,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.2" + "version": "3.8.16" }, "vscode": { "interpreter": { diff --git a/langchain/embeddings/sentence_transformer.py b/langchain/embeddings/sentence_transformer.py index b3bba97e..0a69f2c2 100644 --- a/langchain/embeddings/sentence_transformer.py +++ b/langchain/embeddings/sentence_transformer.py @@ -1,63 +1,4 @@ """Wrapper around sentence transformer embedding models.""" -from typing import Any, Dict, List, Optional +from langchain.embeddings.huggingface import HuggingFaceEmbeddings -from pydantic import BaseModel, Extra, Field, root_validator - -from langchain.embeddings.base import Embeddings - - -class SentenceTransformerEmbeddings(BaseModel, Embeddings): - embedding_function: Any #: :meta private: - - model: Optional[str] = Field("all-MiniLM-L6-v2", alias="model") - """Transformer model to use.""" - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that sentence_transformers library is installed.""" - model = values["model"] - - try: - from sentence_transformers import SentenceTransformer - - values["embedding_function"] = SentenceTransformer(model) - except ImportError: - raise ModuleNotFoundError( - "Could not import sentence_transformers library. " - "Please install the sentence_transformers library to " - "use this embedding model: pip install sentence_transformers" - ) - except Exception: - raise NameError(f"Could not load SentenceTransformer model {model}.") - - return values - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Embed a list of documents using the SentenceTransformer model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - embeddings = self.embedding_function.encode( - texts, convert_to_numpy=True - ).tolist() - return [list(map(float, e)) for e in embeddings] - - def embed_query(self, text: str) -> List[float]: - """Embed a query using the SentenceTransformer model. - - Args: - text: The text to embed. - - Returns: - Embedding for the text. - """ - return self.embed_documents([text])[0] +SentenceTransformerEmbeddings = HuggingFaceEmbeddings