community[minor]: Add Openvino embedding support (#19632)

This PR is used to support both HF and BGE embeddings with openvino --------- Co-authored-by: Alexander Kozlov <alexander.kozlov@intel.com>
6 months ago · 7164015135
parent cd55d587c2
commit 7164015135
4 changed files with 616 additions and 0 deletions
--- a/docs/docs/integrations/text_embedding/openvino.ipynb
+++ b/docs/docs/integrations/text_embedding/openvino.ipynb
@ -0,0 +1,268 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ed47bb62",
+   "metadata": {},
+   "source": [
+    "# OpenVINO Local Pipelines\n",
+    "[OpenVINO™](https://github.com/openvinotoolkit/openvino) is an open-source toolkit for optimizing and deploying AI inference. The OpenVINO™ Runtime supports various hardware [devices](https://github.com/openvinotoolkit/openvino?tab=readme-ov-file#supported-hardware-matrix) including x86 and ARM CPUs, and Intel GPUs. It can help to boost deep learning performance in Computer Vision, Automatic Speech Recognition, Natural Language Processing and other common tasks.\n",
+    "\n",
+    "Hugging Face embedding model can be supported by OpenVINO through ``OpenVINOEmbeddings`` class. If you have an Intel GPU, you can specify `model_kwargs={\"device\": \"GPU\"}` to run inference on it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "16b20335-da1d-46ba-aa23-fbf3e2c6fe60",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "861521a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.embeddings import OpenVINOEmbeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ff9be586",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
+      "  warnings.warn(\n",
+      "Framework not specified. Using pt to export the model.\n",
+      "Using the export variant default. Available variants are:\n",
+      "    - default: The default ONNX variant.\n",
+      "Using framework PyTorch: 2.2.1+cu121\n",
+      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
+      "  warnings.warn(\n",
+      "Compiling the model to CPU ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
+    "model_kwargs = {\"device\": \"CPU\"}\n",
+    "encode_kwargs = {\"mean_pooling\": True, \"normalize_embeddings\": True}\n",
+    "\n",
+    "ov_embeddings = OpenVINOEmbeddings(\n",
+    "    model_name_or_path=model_name,\n",
+    "    model_kwargs=model_kwargs,\n",
+    "    encode_kwargs=encode_kwargs,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d0a98ae9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This is a test document.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5d6c682b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_result = ov_embeddings.embed_query(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b57b8ce9-ef7d-4e63-979e-aa8763d1f9a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.048951778560876846, -0.03986183926463127, -0.02156277745962143]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_result[:3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "bb5e74c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "doc_result = ov_embeddings.embed_documents([text])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "92019ef1-5d30-4985-b4e6-c0d98bdfe265",
+   "metadata": {},
+   "source": [
+    "## BGE with OpenVINO\n",
+    "We can also access BGE embedding models via the ``OpenVINOBgeEmbeddings`` class with OpenVINO. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "66f5c6ba-1446-43e1-b012-800d17cef300",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
+      "  warnings.warn(\n",
+      "Framework not specified. Using pt to export the model.\n",
+      "Using the export variant default. Available variants are:\n",
+      "    - default: The default ONNX variant.\n",
+      "Using framework PyTorch: 2.2.1+cu121\n",
+      "Overriding 1 configuration item(s)\n",
+      "\t- use_cache -> False\n",
+      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
+      "  warnings.warn(\n",
+      "Compiling the model to CPU ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_community.embeddings import OpenVINOBgeEmbeddings\n",
+    "\n",
+    "model_name = \"BAAI/bge-small-en\"\n",
+    "model_kwargs = {\"device\": \"CPU\"}\n",
+    "encode_kwargs = {\"normalize_embeddings\": True}\n",
+    "ov_embeddings = OpenVINOBgeEmbeddings(\n",
+    "    model_name_or_path=model_name,\n",
+    "    model_kwargs=model_kwargs,\n",
+    "    encode_kwargs=encode_kwargs,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "72001afb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "384"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "embedding = ov_embeddings.embed_query(\"hi this is harrison\")\n",
+    "len(embedding)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7e86c9ae-ec63-48e9-97ba-f23f7a042ed1",
+   "metadata": {},
+   "source": [
+    "For more information refer to:\n",
+    "\n",
+    "* [OpenVINO LLM guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html).\n",
+    "\n",
+    "* [OpenVINO Documentation](https://docs.openvino.ai/2024/home.html).\n",
+    "\n",
+    "* [OpenVINO Get Started Guide](https://www.intel.com/content/www/us/en/content-details/819067/openvino-get-started-guide.html).\n",
+    "\n",
+    "* [RAG Notebook with LangChain](https://github.com/openvinotoolkit/openvino_notebooks/blob/master/notebooks/llm-chatbot/rag-chatbot.ipynb)."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "7377c2ccc78bc62c2683122d48c8cd1fb85a53850a1b1fc29736ed39852c9885"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/libs/community/langchain_community/embeddings/init.py
+++ b/libs/community/langchain_community/embeddings/init.py
@ -67,6 +67,8 @@ _module_lookup = {
    "OctoAIEmbeddings": "langchain_community.embeddings.octoai_embeddings",
    "OllamaEmbeddings": "langchain_community.embeddings.ollama",
    "OpenAIEmbeddings": "langchain_community.embeddings.openai",
+    "OpenVINOEmbeddings": "langchain_community.embeddings.openvino",
+    "OpenVINOBgeEmbeddings": "langchain_community.embeddings.openvino",
    "QianfanEmbeddingsEndpoint": "langchain_community.embeddings.baidu_qianfan_endpoint",  # noqa: E501
    "QuantizedBgeEmbeddings": "langchain_community.embeddings.itrex",
    "QuantizedBiEncoderEmbeddings": "langchain_community.embeddings.optimum_intel",
--- a/libs/community/langchain_community/embeddings/openvino.py
+++ b/libs/community/langchain_community/embeddings/openvino.py
@ -0,0 +1,344 @@
+from pathlib import Path
+from typing import Any, Dict, List
+
+from langchain_core.embeddings import Embeddings
+from langchain_core.pydantic_v1 import BaseModel, Extra, Field
+
+DEFAULT_QUERY_INSTRUCTION = (
+    "Represent the question for retrieving supporting documents: "
+)
+DEFAULT_QUERY_BGE_INSTRUCTION_EN = (
+    "Represent this question for searching relevant passages: "
+)
+DEFAULT_QUERY_BGE_INSTRUCTION_ZH = "为这个句子生成表示以用于检索相关文章："
+
+
+class OpenVINOEmbeddings(BaseModel, Embeddings):
+    """OpenVINO embedding models.
+
+    To use, you should have the ``sentence_transformers`` python package installed.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.embeddings import OpenVINOEmbeddings
+
+            model_name = "sentence-transformers/all-mpnet-base-v2"
+            model_kwargs = {'device': 'CPU'}
+            encode_kwargs = {'normalize_embeddings': True}
+            ov = OpenVINOEmbeddings(
+                model_name_or_path=model_name,
+                model_kwargs=model_kwargs,
+                encode_kwargs=encode_kwargs
+            )
+    """
+
+    ov_model: Any
+    """OpenVINO model object."""
+    tokenizer: Any
+    """Tokenizer for embedding model."""
+    model_name_or_path: str
+    """HuggingFace model id."""
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Keyword arguments to pass to the model."""
+    encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Keyword arguments to pass when calling the `encode` method of the model."""
+    show_progress: bool = False
+    """Whether to show a progress bar."""
+
+    def __init__(self, **kwargs: Any):
+        """Initialize the sentence_transformer."""
+        super().__init__(**kwargs)
+
+        try:
+            from optimum.intel.openvino import OVModelForFeatureExtraction
+        except ImportError as e:
+            raise ValueError(
+                "Could not import optimum-intel python package. "
+                "Please install it with: "
+                "pip install -U 'optimum[openvino,nncf]'"
+            ) from e
+
+        try:
+            from huggingface_hub import HfApi
+        except ImportError as e:
+            raise ValueError(
+                "Could not import huggingface_hub python package. "
+                "Please install it with: "
+                "`pip install -U huggingface_hub`."
+            ) from e
+
+        def require_model_export(
+            model_id: str, revision: Any = None, subfolder: Any = None
+        ) -> bool:
+            model_dir = Path(model_id)
+            if subfolder is not None:
+                model_dir = model_dir / subfolder
+            if model_dir.is_dir():
+                return (
+                    not (model_dir / "openvino_model.xml").exists()
+                    or not (model_dir / "openvino_model.bin").exists()
+                )
+            hf_api = HfApi()
+            try:
+                model_info = hf_api.model_info(model_id, revision=revision or "main")
+                normalized_subfolder = (
+                    None if subfolder is None else Path(subfolder).as_posix()
+                )
+                model_files = [
+                    file.rfilename
+                    for file in model_info.siblings
+                    if normalized_subfolder is None
+                    or file.rfilename.startswith(normalized_subfolder)
+                ]
+                ov_model_path = (
+                    "openvino_model.xml"
+                    if subfolder is None
+                    else f"{normalized_subfolder}/openvino_model.xml"
+                )
+                return (
+                    ov_model_path not in model_files
+                    or ov_model_path.replace(".xml", ".bin") not in model_files
+                )
+            except Exception:
+                return True
+
+        if require_model_export(self.model_name_or_path):
+            # use remote model
+            self.ov_model = OVModelForFeatureExtraction.from_pretrained(
+                self.model_name_or_path, export=True, **self.model_kwargs
+            )
+        else:
+            # use local model
+            self.ov_model = OVModelForFeatureExtraction.from_pretrained(
+                self.model_name_or_path, **self.model_kwargs
+            )
+
+        try:
+            from transformers import AutoTokenizer
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import transformers, please install with "
+                "`pip install -U transformers`."
+            ) from e
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
+
+    def _text_length(self, text: Any) -> int:
+        """
+        Help function to get the length for the input text. Text can be either
+        a list of ints (which means a single text as input), or a tuple of list of ints
+        (representing several text inputs to the model).
+        """
+
+        if isinstance(text, dict):  # {key: value} case
+            return len(next(iter(text.values())))
+        elif not hasattr(text, "__len__"):  # Object has no len() method
+            return 1
+        # Empty string or list of ints
+        elif len(text) == 0 or isinstance(text[0], int):
+            return len(text)
+        else:
+            # Sum of length of individual strings
+            return sum([len(t) for t in text])
+
+    def encode(
+        self,
+        sentences: Any,
+        batch_size: int = 4,
+        show_progress_bar: bool = False,
+        convert_to_numpy: bool = True,
+        convert_to_tensor: bool = False,
+        mean_pooling: bool = False,
+        normalize_embeddings: bool = True,
+    ) -> Any:
+        """
+        Computes sentence embeddings.
+
+        :param sentences: the sentences to embed.
+        :param batch_size: the batch size used for the computation.
+        :param show_progress_bar: Whether to output a progress bar.
+        :param convert_to_numpy: Whether the output should be a list of numpy vectors.
+        :param convert_to_tensor: Whether the output should be one large tensor.
+        :param mean_pooling: Whether to pool returned vectors.
+        :param normalize_embeddings: Whether to normalize returned vectors.
+
+        :return: By default, a 2d numpy array with shape [num_inputs, output_dimension].
+        """
+        try:
+            import numpy as np
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import numpy, please install with " "`pip install -U numpy`."
+            ) from e
+        try:
+            from tqdm import trange
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import tqdm, please install with " "`pip install -U tqdm`."
+            ) from e
+        try:
+            import torch
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import torch, please install with " "`pip install -U torch`."
+            ) from e
+
+        def run_mean_pooling(model_output: Any, attention_mask: Any) -> Any:
+            token_embeddings = model_output[
+                0
+            ]  # First element of model_output contains all token embeddings
+            input_mask_expanded = (
+                attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+            )
+            return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
+                input_mask_expanded.sum(1), min=1e-9
+            )
+
+        if convert_to_tensor:
+            convert_to_numpy = False
+
+        input_was_string = False
+        if isinstance(sentences, str) or not hasattr(
+            sentences, "__len__"
+        ):  # Cast an individual sentence to a list with length 1
+            sentences = [sentences]
+            input_was_string = True
+
+        all_embeddings: Any = []
+        length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences])
+        sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
+
+        for start_index in trange(
+            0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar
+        ):
+            sentences_batch = sentences_sorted[start_index : start_index + batch_size]
+            features = self.tokenizer(
+                sentences_batch, padding=True, truncation=True, return_tensors="pt"
+            )
+
+            out_features = self.ov_model(**features)
+            if mean_pooling:
+                embeddings = run_mean_pooling(out_features, features["attention_mask"])
+            else:
+                embeddings = out_features[0][:, 0]
+            if normalize_embeddings:
+                embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+
+            # fixes for #522 and #487 to avoid oom problems on gpu with large datasets
+            if convert_to_numpy:
+                embeddings = embeddings.cpu()
+
+            all_embeddings.extend(embeddings)
+
+        all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]
+
+        if convert_to_tensor:
+            if len(all_embeddings):
+                all_embeddings = torch.stack(all_embeddings)
+            else:
+                all_embeddings = torch.Tensor()
+        elif convert_to_numpy:
+            all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
+
+        if input_was_string:
+            all_embeddings = all_embeddings[0]
+
+        return all_embeddings
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Compute doc embeddings using a HuggingFace transformer model.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+
+        texts = list(map(lambda x: x.replace("\n", " "), texts))
+        embeddings = self.encode(
+            texts, show_progress_bar=self.show_progress, **self.encode_kwargs
+        )
+
+        return embeddings.tolist()
+
+    def embed_query(self, text: str) -> List[float]:
+        """Compute query embeddings using a HuggingFace transformer model.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+        return self.embed_documents([text])[0]
+
+
+class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
+    """OpenVNO BGE embedding models.
+
+    Bge Example:
+        .. code-block:: python
+
+            from langchain_community.embeddings import OpenVINOBgeEmbeddings
+
+            model_name_or_path = "BAAI/bge-large-en"
+            model_kwargs = {'device': 'CPU'}
+            encode_kwargs = {'normalize_embeddings': True}
+            ov = OpenVINOBgeEmbeddings(
+                model_name_or_path=model_name,
+                model_kwargs=model_kwargs,
+                encode_kwargs=encode_kwargs
+            )
+    """
+
+    model_name_or_path: str
+    """HuggingFace model id."""
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Keyword arguments to pass to the model."""
+    encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Keyword arguments to pass when calling the `encode` method of the model."""
+    show_progress: bool = False
+    """Whether to show a progress bar."""
+    query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
+    """Instruction to use for embedding query."""
+    embed_instruction: str = ""
+    """Instruction to use for embedding document."""
+
+    def __init__(self, **kwargs: Any):
+        """Initialize the sentence_transformer."""
+        super().__init__(**kwargs)
+
+        if "-zh" in self.model_name_or_path:
+            self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Compute doc embeddings using a HuggingFace transformer model.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+        texts = [self.embed_instruction + t.replace("\n", " ") for t in texts]
+        embeddings = self.encode(texts, **self.encode_kwargs)
+        return embeddings.tolist()
+
+    def embed_query(self, text: str) -> List[float]:
+        """Compute query embeddings using a HuggingFace transformer model.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+        text = text.replace("\n", " ")
+        embedding = self.encode(self.query_instruction + text, **self.encode_kwargs)
+        return embedding.tolist()
--- a/libs/community/tests/unit_tests/embeddings/test_imports.py
+++ b/libs/community/tests/unit_tests/embeddings/test_imports.py
@ -69,6 +69,8 @@ EXPECTED_ALL = [
    "QuantizedBgeEmbeddings",
    "PremAIEmbeddings",
    "YandexGPTEmbeddings",
+    "OpenVINOEmbeddings",
+    "OpenVINOBgeEmbeddings",
 ]