From 716401513512ae7732a2d738100bb684c7bb5251 Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Fri, 29 Mar 2024 16:34:51 +0800 Subject: [PATCH] community[minor]: Add Openvino embedding support (#19632) This PR is used to support both HF and BGE embeddings with openvino --------- Co-authored-by: Alexander Kozlov --- .../text_embedding/openvino.ipynb | 268 ++++++++++++++ .../embeddings/__init__.py | 2 + .../embeddings/openvino.py | 344 ++++++++++++++++++ .../unit_tests/embeddings/test_imports.py | 2 + 4 files changed, 616 insertions(+) create mode 100644 docs/docs/integrations/text_embedding/openvino.ipynb create mode 100644 libs/community/langchain_community/embeddings/openvino.py diff --git a/docs/docs/integrations/text_embedding/openvino.ipynb b/docs/docs/integrations/text_embedding/openvino.ipynb new file mode 100644 index 0000000000..3f400d8f1e --- /dev/null +++ b/docs/docs/integrations/text_embedding/openvino.ipynb @@ -0,0 +1,268 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ed47bb62", + "metadata": {}, + "source": [ + "# OpenVINO Local Pipelines\n", + "[OpenVINO™](https://github.com/openvinotoolkit/openvino) is an open-source toolkit for optimizing and deploying AI inference. The OpenVINO™ Runtime supports various hardware [devices](https://github.com/openvinotoolkit/openvino?tab=readme-ov-file#supported-hardware-matrix) including x86 and ARM CPUs, and Intel GPUs. It can help to boost deep learning performance in Computer Vision, Automatic Speech Recognition, Natural Language Processing and other common tasks.\n", + "\n", + "Hugging Face embedding model can be supported by OpenVINO through ``OpenVINOEmbeddings`` class. If you have an Intel GPU, you can specify `model_kwargs={\"device\": \"GPU\"}` to run inference on it." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "16b20335-da1d-46ba-aa23-fbf3e2c6fe60", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "861521a9", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.embeddings import OpenVINOEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ff9be586", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n", + " warnings.warn(\n", + "Framework not specified. Using pt to export the model.\n", + "Using the export variant default. Available variants are:\n", + " - default: The default ONNX variant.\n", + "Using framework PyTorch: 2.2.1+cu121\n", + "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n", + " warnings.warn(\n", + "Compiling the model to CPU ...\n" + ] + } + ], + "source": [ + "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n", + "model_kwargs = {\"device\": \"CPU\"}\n", + "encode_kwargs = {\"mean_pooling\": True, \"normalize_embeddings\": True}\n", + "\n", + "ov_embeddings = OpenVINOEmbeddings(\n", + " model_name_or_path=model_name,\n", + " model_kwargs=model_kwargs,\n", + " encode_kwargs=encode_kwargs,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d0a98ae9", + "metadata": {}, + "outputs": [], + "source": [ + "text = \"This is a test document.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5d6c682b", + "metadata": {}, + "outputs": [], + "source": [ + "query_result = ov_embeddings.embed_query(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b57b8ce9-ef7d-4e63-979e-aa8763d1f9a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-0.048951778560876846, -0.03986183926463127, -0.02156277745962143]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_result[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bb5e74c0", + "metadata": {}, + "outputs": [], + "source": [ + "doc_result = ov_embeddings.embed_documents([text])" + ] + }, + { + "cell_type": "markdown", + "id": "92019ef1-5d30-4985-b4e6-c0d98bdfe265", + "metadata": {}, + "source": [ + "## BGE with OpenVINO\n", + "We can also access BGE embedding models via the ``OpenVINOBgeEmbeddings`` class with OpenVINO. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "66f5c6ba-1446-43e1-b012-800d17cef300", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n", + " warnings.warn(\n", + "Framework not specified. Using pt to export the model.\n", + "Using the export variant default. Available variants are:\n", + " - default: The default ONNX variant.\n", + "Using framework PyTorch: 2.2.1+cu121\n", + "Overriding 1 configuration item(s)\n", + "\t- use_cache -> False\n", + "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n", + " warnings.warn(\n", + "Compiling the model to CPU ...\n" + ] + } + ], + "source": [ + "from langchain_community.embeddings import OpenVINOBgeEmbeddings\n", + "\n", + "model_name = \"BAAI/bge-small-en\"\n", + "model_kwargs = {\"device\": \"CPU\"}\n", + "encode_kwargs = {\"normalize_embeddings\": True}\n", + "ov_embeddings = OpenVINOBgeEmbeddings(\n", + " model_name_or_path=model_name,\n", + " model_kwargs=model_kwargs,\n", + " encode_kwargs=encode_kwargs,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "72001afb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "384" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "embedding = ov_embeddings.embed_query(\"hi this is harrison\")\n", + "len(embedding)" + ] + }, + { + "cell_type": "markdown", + "id": "7e86c9ae-ec63-48e9-97ba-f23f7a042ed1", + "metadata": {}, + "source": [ + "For more information refer to:\n", + "\n", + "* [OpenVINO LLM guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html).\n", + "\n", + "* [OpenVINO Documentation](https://docs.openvino.ai/2024/home.html).\n", + "\n", + "* [OpenVINO Get Started Guide](https://www.intel.com/content/www/us/en/content-details/819067/openvino-get-started-guide.html).\n", + "\n", + "* [RAG Notebook with LangChain](https://github.com/openvinotoolkit/openvino_notebooks/blob/master/notebooks/llm-chatbot/rag-chatbot.ipynb)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "vscode": { + "interpreter": { + "hash": "7377c2ccc78bc62c2683122d48c8cd1fb85a53850a1b1fc29736ed39852c9885" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index dc43569803..929301a9ce 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -67,6 +67,8 @@ _module_lookup = { "OctoAIEmbeddings": "langchain_community.embeddings.octoai_embeddings", "OllamaEmbeddings": "langchain_community.embeddings.ollama", "OpenAIEmbeddings": "langchain_community.embeddings.openai", + "OpenVINOEmbeddings": "langchain_community.embeddings.openvino", + "OpenVINOBgeEmbeddings": "langchain_community.embeddings.openvino", "QianfanEmbeddingsEndpoint": "langchain_community.embeddings.baidu_qianfan_endpoint", # noqa: E501 "QuantizedBgeEmbeddings": "langchain_community.embeddings.itrex", "QuantizedBiEncoderEmbeddings": "langchain_community.embeddings.optimum_intel", diff --git a/libs/community/langchain_community/embeddings/openvino.py b/libs/community/langchain_community/embeddings/openvino.py new file mode 100644 index 0000000000..379d2a271c --- /dev/null +++ b/libs/community/langchain_community/embeddings/openvino.py @@ -0,0 +1,344 @@ +from pathlib import Path +from typing import Any, Dict, List + +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Extra, Field + +DEFAULT_QUERY_INSTRUCTION = ( + "Represent the question for retrieving supporting documents: " +) +DEFAULT_QUERY_BGE_INSTRUCTION_EN = ( + "Represent this question for searching relevant passages: " +) +DEFAULT_QUERY_BGE_INSTRUCTION_ZH = "为这个句子生成表示以用于检索相关文章:" + + +class OpenVINOEmbeddings(BaseModel, Embeddings): + """OpenVINO embedding models. + + To use, you should have the ``sentence_transformers`` python package installed. + + Example: + .. code-block:: python + + from langchain_community.embeddings import OpenVINOEmbeddings + + model_name = "sentence-transformers/all-mpnet-base-v2" + model_kwargs = {'device': 'CPU'} + encode_kwargs = {'normalize_embeddings': True} + ov = OpenVINOEmbeddings( + model_name_or_path=model_name, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs + ) + """ + + ov_model: Any + """OpenVINO model object.""" + tokenizer: Any + """Tokenizer for embedding model.""" + model_name_or_path: str + """HuggingFace model id.""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass to the model.""" + encode_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass when calling the `encode` method of the model.""" + show_progress: bool = False + """Whether to show a progress bar.""" + + def __init__(self, **kwargs: Any): + """Initialize the sentence_transformer.""" + super().__init__(**kwargs) + + try: + from optimum.intel.openvino import OVModelForFeatureExtraction + except ImportError as e: + raise ValueError( + "Could not import optimum-intel python package. " + "Please install it with: " + "pip install -U 'optimum[openvino,nncf]'" + ) from e + + try: + from huggingface_hub import HfApi + except ImportError as e: + raise ValueError( + "Could not import huggingface_hub python package. " + "Please install it with: " + "`pip install -U huggingface_hub`." + ) from e + + def require_model_export( + model_id: str, revision: Any = None, subfolder: Any = None + ) -> bool: + model_dir = Path(model_id) + if subfolder is not None: + model_dir = model_dir / subfolder + if model_dir.is_dir(): + return ( + not (model_dir / "openvino_model.xml").exists() + or not (model_dir / "openvino_model.bin").exists() + ) + hf_api = HfApi() + try: + model_info = hf_api.model_info(model_id, revision=revision or "main") + normalized_subfolder = ( + None if subfolder is None else Path(subfolder).as_posix() + ) + model_files = [ + file.rfilename + for file in model_info.siblings + if normalized_subfolder is None + or file.rfilename.startswith(normalized_subfolder) + ] + ov_model_path = ( + "openvino_model.xml" + if subfolder is None + else f"{normalized_subfolder}/openvino_model.xml" + ) + return ( + ov_model_path not in model_files + or ov_model_path.replace(".xml", ".bin") not in model_files + ) + except Exception: + return True + + if require_model_export(self.model_name_or_path): + # use remote model + self.ov_model = OVModelForFeatureExtraction.from_pretrained( + self.model_name_or_path, export=True, **self.model_kwargs + ) + else: + # use local model + self.ov_model = OVModelForFeatureExtraction.from_pretrained( + self.model_name_or_path, **self.model_kwargs + ) + + try: + from transformers import AutoTokenizer + except ImportError as e: + raise ImportError( + "Unable to import transformers, please install with " + "`pip install -U transformers`." + ) from e + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path) + + def _text_length(self, text: Any) -> int: + """ + Help function to get the length for the input text. Text can be either + a list of ints (which means a single text as input), or a tuple of list of ints + (representing several text inputs to the model). + """ + + if isinstance(text, dict): # {key: value} case + return len(next(iter(text.values()))) + elif not hasattr(text, "__len__"): # Object has no len() method + return 1 + # Empty string or list of ints + elif len(text) == 0 or isinstance(text[0], int): + return len(text) + else: + # Sum of length of individual strings + return sum([len(t) for t in text]) + + def encode( + self, + sentences: Any, + batch_size: int = 4, + show_progress_bar: bool = False, + convert_to_numpy: bool = True, + convert_to_tensor: bool = False, + mean_pooling: bool = False, + normalize_embeddings: bool = True, + ) -> Any: + """ + Computes sentence embeddings. + + :param sentences: the sentences to embed. + :param batch_size: the batch size used for the computation. + :param show_progress_bar: Whether to output a progress bar. + :param convert_to_numpy: Whether the output should be a list of numpy vectors. + :param convert_to_tensor: Whether the output should be one large tensor. + :param mean_pooling: Whether to pool returned vectors. + :param normalize_embeddings: Whether to normalize returned vectors. + + :return: By default, a 2d numpy array with shape [num_inputs, output_dimension]. + """ + try: + import numpy as np + except ImportError as e: + raise ImportError( + "Unable to import numpy, please install with " "`pip install -U numpy`." + ) from e + try: + from tqdm import trange + except ImportError as e: + raise ImportError( + "Unable to import tqdm, please install with " "`pip install -U tqdm`." + ) from e + try: + import torch + except ImportError as e: + raise ImportError( + "Unable to import torch, please install with " "`pip install -U torch`." + ) from e + + def run_mean_pooling(model_output: Any, attention_mask: Any) -> Any: + token_embeddings = model_output[ + 0 + ] # First element of model_output contains all token embeddings + input_mask_expanded = ( + attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + ) + return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( + input_mask_expanded.sum(1), min=1e-9 + ) + + if convert_to_tensor: + convert_to_numpy = False + + input_was_string = False + if isinstance(sentences, str) or not hasattr( + sentences, "__len__" + ): # Cast an individual sentence to a list with length 1 + sentences = [sentences] + input_was_string = True + + all_embeddings: Any = [] + length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences]) + sentences_sorted = [sentences[idx] for idx in length_sorted_idx] + + for start_index in trange( + 0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar + ): + sentences_batch = sentences_sorted[start_index : start_index + batch_size] + features = self.tokenizer( + sentences_batch, padding=True, truncation=True, return_tensors="pt" + ) + + out_features = self.ov_model(**features) + if mean_pooling: + embeddings = run_mean_pooling(out_features, features["attention_mask"]) + else: + embeddings = out_features[0][:, 0] + if normalize_embeddings: + embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) + + # fixes for #522 and #487 to avoid oom problems on gpu with large datasets + if convert_to_numpy: + embeddings = embeddings.cpu() + + all_embeddings.extend(embeddings) + + all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)] + + if convert_to_tensor: + if len(all_embeddings): + all_embeddings = torch.stack(all_embeddings) + else: + all_embeddings = torch.Tensor() + elif convert_to_numpy: + all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings]) + + if input_was_string: + all_embeddings = all_embeddings[0] + + return all_embeddings + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace transformer model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + + texts = list(map(lambda x: x.replace("\n", " "), texts)) + embeddings = self.encode( + texts, show_progress_bar=self.show_progress, **self.encode_kwargs + ) + + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0] + + +class OpenVINOBgeEmbeddings(OpenVINOEmbeddings): + """OpenVNO BGE embedding models. + + Bge Example: + .. code-block:: python + + from langchain_community.embeddings import OpenVINOBgeEmbeddings + + model_name_or_path = "BAAI/bge-large-en" + model_kwargs = {'device': 'CPU'} + encode_kwargs = {'normalize_embeddings': True} + ov = OpenVINOBgeEmbeddings( + model_name_or_path=model_name, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs + ) + """ + + model_name_or_path: str + """HuggingFace model id.""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass to the model.""" + encode_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass when calling the `encode` method of the model.""" + show_progress: bool = False + """Whether to show a progress bar.""" + query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN + """Instruction to use for embedding query.""" + embed_instruction: str = "" + """Instruction to use for embedding document.""" + + def __init__(self, **kwargs: Any): + """Initialize the sentence_transformer.""" + super().__init__(**kwargs) + + if "-zh" in self.model_name_or_path: + self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace transformer model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + texts = [self.embed_instruction + t.replace("\n", " ") for t in texts] + embeddings = self.encode(texts, **self.encode_kwargs) + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + text = text.replace("\n", " ") + embedding = self.encode(self.query_instruction + text, **self.encode_kwargs) + return embedding.tolist() diff --git a/libs/community/tests/unit_tests/embeddings/test_imports.py b/libs/community/tests/unit_tests/embeddings/test_imports.py index c48e98c6d3..56b7f247d6 100644 --- a/libs/community/tests/unit_tests/embeddings/test_imports.py +++ b/libs/community/tests/unit_tests/embeddings/test_imports.py @@ -69,6 +69,8 @@ EXPECTED_ALL = [ "QuantizedBgeEmbeddings", "PremAIEmbeddings", "YandexGPTEmbeddings", + "OpenVINOEmbeddings", + "OpenVINOBgeEmbeddings", ]