community[minor]: Add Ascend NPU optimized Embeddings (#20260)

- **Description:** Add NPU support for embeddings --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
3 months ago · 398b2b9c51
parent 7b1066341b
commit 398b2b9c51
7 changed files with 335 additions and 2 deletions
--- a/docs/docs/integrations/providers/ascend.mdx
+++ b/docs/docs/integrations/providers/ascend.mdx
@ -0,0 +1,24 @@
 # Ascend
 >[Ascend](https://https://www.hiascend.com/) is Natural Process Unit provide by Huawei
 This page covers how to use ascend NPU with LangChain.
 ### Installation
 Install using torch-npu using:
 ```bash
 pip install torch-npu
 ```
 Please follow the installation instructions as specified below:
 * Install CANN as shown [here](https://www.hiascend.com/document/detail/zh/canncommercial/700/quickstart/quickstart/quickstart_18_0002.html).
 ### Embedding Models
 See a [usage example](/docs/integrations/text_embedding/ascend).
 ```python
 from langchain_community.embeddings import AscendEmbeddings
 ```
--- a/docs/docs/integrations/text_embedding/ascend.ipynb
+++ b/docs/docs/integrations/text_embedding/ascend.ipynb
@ -0,0 +1,183 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a636f6f3-00d7-4248-8c36-3da51190e882",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-0.04053403 -0.05560051 -0.04385472 ...  0.09371872  0.02846981\n",
      " -0.00576814]\n"
     ]
    }
   ],
   "source": [
    "from langchain_community.embeddings import AscendEmbeddings\n",
    "\n",
    "model = AscendEmbeddings(\n",
    "    model_path=\"/root/.cache/modelscope/hub/yangjhchs/acge_text_embedding\",\n",
    "    device_id=0,\n",
    "    query_instruction=\"Represend this sentence for searching relevant passages: \",\n",
    ")\n",
    "emb = model.embed_query(\"hellow\")\n",
    "print(emb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8d29ddaa-eef3-4a4e-93d8-0f1c13525fb4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.00348254  0.03098977 -0.00203087 ...  0.08492374  0.03970494\n",
      "  -0.03372753]\n",
      " [-0.02198593 -0.01601127  0.00215684 ...  0.06065163  0.00126425\n",
      "  -0.03634358]]\n"
     ]
    }
   ],
   "source": [
    "doc_embs = model.embed_documents(\n",
    "    [\"This is a content of the document\", \"This is another document\"]\n",
    ")\n",
    "print(doc_embs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "797a720d-c478-4254-be2c-975bc4529f57",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<coroutine object Embeddings.aembed_query at 0x7f9fac699cb0>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.aembed_query(\"hellow\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "57e62e53-4d2c-4532-9b77-a46bc3da1130",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.04053403, -0.05560051, -0.04385472, ...,  0.09371872,\n",
       "        0.02846981, -0.00576814], dtype=float32)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "await model.aembed_query(\"hellow\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7e260457-8b50-4ca3-8f76-8a76d8bba8c8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<coroutine object Embeddings.aembed_documents at 0x7fa093ff1a80>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.aembed_documents(\n",
    "    [\"This is a content of the document\", \"This is another document\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ce954b94-aaac-4d2c-80be-b2988c16af6d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.00348254,  0.03098977, -0.00203087, ...,  0.08492374,\n",
       "         0.03970494, -0.03372753],\n",
       "       [-0.02198593, -0.01601127,  0.00215684, ...,  0.06065163,\n",
       "         0.00126425, -0.03634358]], dtype=float32)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "await model.aembed_documents(\n",
    "    [\"This is a content of the document\", \"This is another document\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7823d69d-de79-4f95-90dd-38f4bdeb9bcc",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/libs/community/langchain_community/embeddings/init.py
+++ b/libs/community/langchain_community/embeddings/init.py
@ -22,6 +22,9 @@ if TYPE_CHECKING:
    from langchain_community.embeddings.anyscale import (
        AnyscaleEmbeddings,
    )
    from langchain_community.embeddings.ascend import (
        AscendEmbeddings,
    )
    from langchain_community.embeddings.awa import (
        AwaEmbeddings,
    )
@ -236,6 +239,7 @@ __all__ = [
    "AlephAlphaAsymmetricSemanticEmbedding",
    "AlephAlphaSymmetricSemanticEmbedding",
    "AnyscaleEmbeddings",
    "AscendEmbeddings",
    "AwaEmbeddings",
    "AzureOpenAIEmbeddings",
    "BaichuanTextEmbeddings",
@ -391,6 +395,7 @@ _module_lookup = {
    "TitanTakeoffEmbed": "langchain_community.embeddings.titan_takeoff",
    "PremAIEmbeddings": "langchain_community.embeddings.premai",
    "YandexGPTEmbeddings": "langchain_community.embeddings.yandex",
    "AscendEmbeddings": "langchain_community.embeddings.ascend",
    "ZhipuAIEmbeddings": "langchain_community.embeddings.zhipuai",
 }
--- a/libs/community/langchain_community/embeddings/ascend.py
+++ b/libs/community/langchain_community/embeddings/ascend.py
@ -0,0 +1,120 @@
 import os
 from typing import Any, Dict, List, Optional
 from langchain_core.embeddings import Embeddings
 from langchain_core.pydantic_v1 import BaseModel, root_validator
 class AscendEmbeddings(Embeddings, BaseModel):
    """
    Ascend NPU accelerate Embedding model
    Please ensure that you have installed CANN and torch_npu.
    Example:
    from langchain_community.embeddings import AscendEmbeddings
    model = AscendEmbeddings(model_path=<path_to_model>,
        device_id=0,
        query_instruction="Represent this sentence for searching relevant passages: "
    )
    """
    """model path"""
    model_path: str
    """Ascend NPU device id."""
    device_id: int = 0
    """Unstruntion to used for embedding query."""
    query_instruction: str = ""
    """Unstruntion to used for embedding document."""
    document_instruction: str = ""
    use_fp16: bool = True
    pooling_method: Optional[str] = "cls"
    model: Any
    tokenizer: Any
    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)
        try:
            from transformers import AutoModel, AutoTokenizer
        except ImportError as e:
            raise ImportError(
                "Unable to import transformers, please install with "
                "`pip install -U transformers`."
            ) from e
        try:
            self.model = AutoModel.from_pretrained(self.model_path).npu().eval()
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
        except Exception as e:
            raise Exception(
                f"Failed to load model [self.model_path], due to following error:{e}"
            )
        if self.use_fp16:
            self.model.half()
        self.encode([f"warmup {i} times" for i in range(10)])
    @root_validator
    def validate_environment(cls, values: Dict) -> Dict:
        if not os.access(values["model_path"], os.F_OK):
            raise FileNotFoundError(
                f"Unabled to find valid model path in [{values['model_path']}]"
            )
        try:
            import torch_npu
        except ImportError:
            raise ModuleNotFoundError("torch_npu not found, please install torch_npu")
        except Exception as e:
            raise e
        try:
            torch_npu.npu.set_device(values["device_id"])
        except Exception as e:
            raise Exception(f"set device failed due to {e}")
        return values
    def encode(self, sentences: Any) -> Any:
        inputs = self.tokenizer(
            sentences,
            padding=True,
            truncation=True,
            return_tensors="pt",
            max_length=512,
        )
        try:
            import torch
        except ImportError as e:
            raise ImportError(
                "Unable to import torch, please install with " "`pip install -U torch`."
            ) from e
        last_hidden_state = self.model(
            inputs.input_ids.npu(), inputs.attention_mask.npu(), return_dict=True
        ).last_hidden_state
        tmp = self.pooling(last_hidden_state, inputs["attention_mask"].npu())
        embeddings = torch.nn.functional.normalize(tmp, dim=-1)
        return embeddings.cpu().detach().numpy()
    def pooling(self, last_hidden_state: Any, attention_mask: Any = None) -> Any:
        try:
            import torch
        except ImportError as e:
            raise ImportError(
                "Unable to import torch, please install with " "`pip install -U torch`."
            ) from e
        if self.pooling_method == "cls":
            return last_hidden_state[:, 0]
        elif self.pooling_method == "mean":
            s = torch.sum(
                last_hidden_state * attention_mask.unsqueeze(-1).float(), dim=-1
            )
            d = attention_mask.sum(dim=1, keepdim=True).float()
            return s / d
        else:
            raise NotImplementedError(
                f"Pooling method [{self.pooling_method}] not implemented"
            )
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return self.encode([self.document_instruction + text for text in texts])
    def embed_query(self, text: str) -> List[float]:
        return self.encode([self.query_instruction + text])[0]
--- a/libs/community/pyproject.toml
+++ b/libs/community/pyproject.toml
@ -156,4 +156,4 @@ ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
 # whats is a typo but used frequently in queries so kept as is
 # aapply - async apply
 # unsecure - typo but part of API, decided to not bother for now
-ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin'
+ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin,cann'
--- a/libs/community/tests/unit_tests/embeddings/test_imports.py
+++ b/libs/community/tests/unit_tests/embeddings/test_imports.py
@ -78,6 +78,7 @@ EXPECTED_ALL = [
    "OpenVINOEmbeddings",
    "OpenVINOBgeEmbeddings",
    "SolarEmbeddings",
    "AscendEmbeddings",
    "ZhipuAIEmbeddings",
 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -71,7 +71,7 @@ ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
 # whats is a typo but used frequently in queries so kept as is
 # aapply - async apply
 # unsecure - typo but part of API, decided to not bother for now
-ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin'
+ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin,cann'
 [tool.ruff]
 extend-include = ["*.ipynb"]