community[minor]: Add Ascend NPU optimized Embeddings (#20260)

- **Description:** Add NPU support for embeddings --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
1 week ago · 398b2b9c51
parent 7b1066341b
commit 398b2b9c51
7 changed files with 335 additions and 2 deletions
--- a/docs/docs/integrations/providers/ascend.mdx
+++ b/docs/docs/integrations/providers/ascend.mdx
@ -0,0 +1,24 @@
+# Ascend
+
+>[Ascend](https://https://www.hiascend.com/) is Natural Process Unit provide by Huawei
+
+This page covers how to use ascend NPU with LangChain.
+
+### Installation
+
+Install using torch-npu using:
+
+```bash
+pip install torch-npu
+```
+
+Please follow the installation instructions as specified below:
+* Install CANN as shown [here](https://www.hiascend.com/document/detail/zh/canncommercial/700/quickstart/quickstart/quickstart_18_0002.html).
+
+### Embedding Models
+
+See a [usage example](/docs/integrations/text_embedding/ascend).
+
+```python
+from langchain_community.embeddings import AscendEmbeddings
+```
--- a/docs/docs/integrations/text_embedding/ascend.ipynb
+++ b/docs/docs/integrations/text_embedding/ascend.ipynb
@ -0,0 +1,183 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a636f6f3-00d7-4248-8c36-3da51190e882",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.04053403 -0.05560051 -0.04385472 ...  0.09371872  0.02846981\n",
+      " -0.00576814]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_community.embeddings import AscendEmbeddings\n",
+    "\n",
+    "model = AscendEmbeddings(\n",
+    "    model_path=\"/root/.cache/modelscope/hub/yangjhchs/acge_text_embedding\",\n",
+    "    device_id=0,\n",
+    "    query_instruction=\"Represend this sentence for searching relevant passages: \",\n",
+    ")\n",
+    "emb = model.embed_query(\"hellow\")\n",
+    "print(emb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8d29ddaa-eef3-4a4e-93d8-0f1c13525fb4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[-0.00348254  0.03098977 -0.00203087 ...  0.08492374  0.03970494\n",
+      "  -0.03372753]\n",
+      " [-0.02198593 -0.01601127  0.00215684 ...  0.06065163  0.00126425\n",
+      "  -0.03634358]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "doc_embs = model.embed_documents(\n",
+    "    [\"This is a content of the document\", \"This is another document\"]\n",
+    ")\n",
+    "print(doc_embs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "797a720d-c478-4254-be2c-975bc4529f57",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<coroutine object Embeddings.aembed_query at 0x7f9fac699cb0>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.aembed_query(\"hellow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "57e62e53-4d2c-4532-9b77-a46bc3da1130",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([-0.04053403, -0.05560051, -0.04385472, ...,  0.09371872,\n",
+       "        0.02846981, -0.00576814], dtype=float32)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "await model.aembed_query(\"hellow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7e260457-8b50-4ca3-8f76-8a76d8bba8c8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<coroutine object Embeddings.aembed_documents at 0x7fa093ff1a80>"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.aembed_documents(\n",
+    "    [\"This is a content of the document\", \"This is another document\"]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ce954b94-aaac-4d2c-80be-b2988c16af6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.00348254,  0.03098977, -0.00203087, ...,  0.08492374,\n",
+       "         0.03970494, -0.03372753],\n",
+       "       [-0.02198593, -0.01601127,  0.00215684, ...,  0.06065163,\n",
+       "         0.00126425, -0.03634358]], dtype=float32)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "await model.aembed_documents(\n",
+    "    [\"This is a content of the document\", \"This is another document\"]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7823d69d-de79-4f95-90dd-38f4bdeb9bcc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/libs/community/langchain_community/embeddings/init.py
+++ b/libs/community/langchain_community/embeddings/init.py
@ -22,6 +22,9 @@ if TYPE_CHECKING:
    from langchain_community.embeddings.anyscale import (
        AnyscaleEmbeddings,
    )
+    from langchain_community.embeddings.ascend import (
+        AscendEmbeddings,
+    )
    from langchain_community.embeddings.awa import (
        AwaEmbeddings,
    )
@ -236,6 +239,7 @@ __all__ = [
    "AlephAlphaAsymmetricSemanticEmbedding",
    "AlephAlphaSymmetricSemanticEmbedding",
    "AnyscaleEmbeddings",
+    "AscendEmbeddings",
    "AwaEmbeddings",
    "AzureOpenAIEmbeddings",
    "BaichuanTextEmbeddings",
@ -391,6 +395,7 @@ _module_lookup = {
    "TitanTakeoffEmbed": "langchain_community.embeddings.titan_takeoff",
    "PremAIEmbeddings": "langchain_community.embeddings.premai",
    "YandexGPTEmbeddings": "langchain_community.embeddings.yandex",
+    "AscendEmbeddings": "langchain_community.embeddings.ascend",
    "ZhipuAIEmbeddings": "langchain_community.embeddings.zhipuai",
 }

--- a/libs/community/langchain_community/embeddings/ascend.py
+++ b/libs/community/langchain_community/embeddings/ascend.py
@ -0,0 +1,120 @@
+import os
+from typing import Any, Dict, List, Optional
+
+from langchain_core.embeddings import Embeddings
+from langchain_core.pydantic_v1 import BaseModel, root_validator
+
+
+class AscendEmbeddings(Embeddings, BaseModel):
+    """
+    Ascend NPU accelerate Embedding model
+
+    Please ensure that you have installed CANN and torch_npu.
+
+    Example:
+
+    from langchain_community.embeddings import AscendEmbeddings
+    model = AscendEmbeddings(model_path=<path_to_model>,
+        device_id=0,
+        query_instruction="Represent this sentence for searching relevant passages: "
+    )
+    """
+
+    """model path"""
+    model_path: str
+    """Ascend NPU device id."""
+    device_id: int = 0
+    """Unstruntion to used for embedding query."""
+    query_instruction: str = ""
+    """Unstruntion to used for embedding document."""
+    document_instruction: str = ""
+    use_fp16: bool = True
+    pooling_method: Optional[str] = "cls"
+    model: Any
+    tokenizer: Any
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        try:
+            from transformers import AutoModel, AutoTokenizer
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import transformers, please install with "
+                "`pip install -U transformers`."
+            ) from e
+        try:
+            self.model = AutoModel.from_pretrained(self.model_path).npu().eval()
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+        except Exception as e:
+            raise Exception(
+                f"Failed to load model [self.model_path], due to following error:{e}"
+            )
+
+        if self.use_fp16:
+            self.model.half()
+        self.encode([f"warmup {i} times" for i in range(10)])
+
+    @root_validator
+    def validate_environment(cls, values: Dict) -> Dict:
+        if not os.access(values["model_path"], os.F_OK):
+            raise FileNotFoundError(
+                f"Unabled to find valid model path in [{values['model_path']}]"
+            )
+        try:
+            import torch_npu
+        except ImportError:
+            raise ModuleNotFoundError("torch_npu not found, please install torch_npu")
+        except Exception as e:
+            raise e
+        try:
+            torch_npu.npu.set_device(values["device_id"])
+        except Exception as e:
+            raise Exception(f"set device failed due to {e}")
+        return values
+
+    def encode(self, sentences: Any) -> Any:
+        inputs = self.tokenizer(
+            sentences,
+            padding=True,
+            truncation=True,
+            return_tensors="pt",
+            max_length=512,
+        )
+        try:
+            import torch
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import torch, please install with " "`pip install -U torch`."
+            ) from e
+        last_hidden_state = self.model(
+            inputs.input_ids.npu(), inputs.attention_mask.npu(), return_dict=True
+        ).last_hidden_state
+        tmp = self.pooling(last_hidden_state, inputs["attention_mask"].npu())
+        embeddings = torch.nn.functional.normalize(tmp, dim=-1)
+        return embeddings.cpu().detach().numpy()
+
+    def pooling(self, last_hidden_state: Any, attention_mask: Any = None) -> Any:
+        try:
+            import torch
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import torch, please install with " "`pip install -U torch`."
+            ) from e
+        if self.pooling_method == "cls":
+            return last_hidden_state[:, 0]
+        elif self.pooling_method == "mean":
+            s = torch.sum(
+                last_hidden_state * attention_mask.unsqueeze(-1).float(), dim=-1
+            )
+            d = attention_mask.sum(dim=1, keepdim=True).float()
+            return s / d
+        else:
+            raise NotImplementedError(
+                f"Pooling method [{self.pooling_method}] not implemented"
+            )
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        return self.encode([self.document_instruction + text for text in texts])
+
+    def embed_query(self, text: str) -> List[float]:
+        return self.encode([self.query_instruction + text])[0]
--- a/libs/community/pyproject.toml
+++ b/libs/community/pyproject.toml
@ -156,4 +156,4 @@ ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
 # whats is a typo but used frequently in queries so kept as is
 # aapply - async apply
 # unsecure - typo but part of API, decided to not bother for now
-ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin'
+ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin,cann'
--- a/libs/community/tests/unit_tests/embeddings/test_imports.py
+++ b/libs/community/tests/unit_tests/embeddings/test_imports.py
@ -78,6 +78,7 @@ EXPECTED_ALL = [
    "OpenVINOEmbeddings",
    "OpenVINOBgeEmbeddings",
    "SolarEmbeddings",
+    "AscendEmbeddings",
    "ZhipuAIEmbeddings",
 ]

--- a/pyproject.toml
+++ b/pyproject.toml
@ -71,7 +71,7 @@ ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
 # whats is a typo but used frequently in queries so kept as is
 # aapply - async apply
 # unsecure - typo but part of API, decided to not bother for now
-ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin'
+ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin,cann'

 [tool.ruff]
 extend-include = ["*.ipynb"]