mirror of https://github.com/hwchase17/langchain
community[minor]: Add Ascend NPU optimized Embeddings (#20260)
- **Description:** Add NPU support for embeddings --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>pull/22095/head
parent
7b1066341b
commit
398b2b9c51
@ -0,0 +1,24 @@
|
|||||||
|
# Ascend
|
||||||
|
|
||||||
|
>[Ascend](https://https://www.hiascend.com/) is Natural Process Unit provide by Huawei
|
||||||
|
|
||||||
|
This page covers how to use ascend NPU with LangChain.
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
Install using torch-npu using:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install torch-npu
|
||||||
|
```
|
||||||
|
|
||||||
|
Please follow the installation instructions as specified below:
|
||||||
|
* Install CANN as shown [here](https://www.hiascend.com/document/detail/zh/canncommercial/700/quickstart/quickstart/quickstart_18_0002.html).
|
||||||
|
|
||||||
|
### Embedding Models
|
||||||
|
|
||||||
|
See a [usage example](/docs/integrations/text_embedding/ascend).
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain_community.embeddings import AscendEmbeddings
|
||||||
|
```
|
@ -0,0 +1,183 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "a636f6f3-00d7-4248-8c36-3da51190e882",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[-0.04053403 -0.05560051 -0.04385472 ... 0.09371872 0.02846981\n",
|
||||||
|
" -0.00576814]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from langchain_community.embeddings import AscendEmbeddings\n",
|
||||||
|
"\n",
|
||||||
|
"model = AscendEmbeddings(\n",
|
||||||
|
" model_path=\"/root/.cache/modelscope/hub/yangjhchs/acge_text_embedding\",\n",
|
||||||
|
" device_id=0,\n",
|
||||||
|
" query_instruction=\"Represend this sentence for searching relevant passages: \",\n",
|
||||||
|
")\n",
|
||||||
|
"emb = model.embed_query(\"hellow\")\n",
|
||||||
|
"print(emb)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "8d29ddaa-eef3-4a4e-93d8-0f1c13525fb4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[[-0.00348254 0.03098977 -0.00203087 ... 0.08492374 0.03970494\n",
|
||||||
|
" -0.03372753]\n",
|
||||||
|
" [-0.02198593 -0.01601127 0.00215684 ... 0.06065163 0.00126425\n",
|
||||||
|
" -0.03634358]]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"doc_embs = model.embed_documents(\n",
|
||||||
|
" [\"This is a content of the document\", \"This is another document\"]\n",
|
||||||
|
")\n",
|
||||||
|
"print(doc_embs)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "797a720d-c478-4254-be2c-975bc4529f57",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<coroutine object Embeddings.aembed_query at 0x7f9fac699cb0>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"model.aembed_query(\"hellow\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "57e62e53-4d2c-4532-9b77-a46bc3da1130",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([-0.04053403, -0.05560051, -0.04385472, ..., 0.09371872,\n",
|
||||||
|
" 0.02846981, -0.00576814], dtype=float32)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"await model.aembed_query(\"hellow\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "7e260457-8b50-4ca3-8f76-8a76d8bba8c8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<coroutine object Embeddings.aembed_documents at 0x7fa093ff1a80>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"model.aembed_documents(\n",
|
||||||
|
" [\"This is a content of the document\", \"This is another document\"]\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "ce954b94-aaac-4d2c-80be-b2988c16af6d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([[-0.00348254, 0.03098977, -0.00203087, ..., 0.08492374,\n",
|
||||||
|
" 0.03970494, -0.03372753],\n",
|
||||||
|
" [-0.02198593, -0.01601127, 0.00215684, ..., 0.06065163,\n",
|
||||||
|
" 0.00126425, -0.03634358]], dtype=float32)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"await model.aembed_documents(\n",
|
||||||
|
" [\"This is a content of the document\", \"This is another document\"]\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7823d69d-de79-4f95-90dd-38f4bdeb9bcc",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.14"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -0,0 +1,120 @@
|
|||||||
|
import os
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.pydantic_v1 import BaseModel, root_validator
|
||||||
|
|
||||||
|
|
||||||
|
class AscendEmbeddings(Embeddings, BaseModel):
|
||||||
|
"""
|
||||||
|
Ascend NPU accelerate Embedding model
|
||||||
|
|
||||||
|
Please ensure that you have installed CANN and torch_npu.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
from langchain_community.embeddings import AscendEmbeddings
|
||||||
|
model = AscendEmbeddings(model_path=<path_to_model>,
|
||||||
|
device_id=0,
|
||||||
|
query_instruction="Represent this sentence for searching relevant passages: "
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""model path"""
|
||||||
|
model_path: str
|
||||||
|
"""Ascend NPU device id."""
|
||||||
|
device_id: int = 0
|
||||||
|
"""Unstruntion to used for embedding query."""
|
||||||
|
query_instruction: str = ""
|
||||||
|
"""Unstruntion to used for embedding document."""
|
||||||
|
document_instruction: str = ""
|
||||||
|
use_fp16: bool = True
|
||||||
|
pooling_method: Optional[str] = "cls"
|
||||||
|
model: Any
|
||||||
|
tokenizer: Any
|
||||||
|
|
||||||
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
try:
|
||||||
|
from transformers import AutoModel, AutoTokenizer
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"Unable to import transformers, please install with "
|
||||||
|
"`pip install -U transformers`."
|
||||||
|
) from e
|
||||||
|
try:
|
||||||
|
self.model = AutoModel.from_pretrained(self.model_path).npu().eval()
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to load model [self.model_path], due to following error:{e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.use_fp16:
|
||||||
|
self.model.half()
|
||||||
|
self.encode([f"warmup {i} times" for i in range(10)])
|
||||||
|
|
||||||
|
@root_validator
|
||||||
|
def validate_environment(cls, values: Dict) -> Dict:
|
||||||
|
if not os.access(values["model_path"], os.F_OK):
|
||||||
|
raise FileNotFoundError(
|
||||||
|
f"Unabled to find valid model path in [{values['model_path']}]"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
import torch_npu
|
||||||
|
except ImportError:
|
||||||
|
raise ModuleNotFoundError("torch_npu not found, please install torch_npu")
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
try:
|
||||||
|
torch_npu.npu.set_device(values["device_id"])
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"set device failed due to {e}")
|
||||||
|
return values
|
||||||
|
|
||||||
|
def encode(self, sentences: Any) -> Any:
|
||||||
|
inputs = self.tokenizer(
|
||||||
|
sentences,
|
||||||
|
padding=True,
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
max_length=512,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"Unable to import torch, please install with " "`pip install -U torch`."
|
||||||
|
) from e
|
||||||
|
last_hidden_state = self.model(
|
||||||
|
inputs.input_ids.npu(), inputs.attention_mask.npu(), return_dict=True
|
||||||
|
).last_hidden_state
|
||||||
|
tmp = self.pooling(last_hidden_state, inputs["attention_mask"].npu())
|
||||||
|
embeddings = torch.nn.functional.normalize(tmp, dim=-1)
|
||||||
|
return embeddings.cpu().detach().numpy()
|
||||||
|
|
||||||
|
def pooling(self, last_hidden_state: Any, attention_mask: Any = None) -> Any:
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"Unable to import torch, please install with " "`pip install -U torch`."
|
||||||
|
) from e
|
||||||
|
if self.pooling_method == "cls":
|
||||||
|
return last_hidden_state[:, 0]
|
||||||
|
elif self.pooling_method == "mean":
|
||||||
|
s = torch.sum(
|
||||||
|
last_hidden_state * attention_mask.unsqueeze(-1).float(), dim=-1
|
||||||
|
)
|
||||||
|
d = attention_mask.sum(dim=1, keepdim=True).float()
|
||||||
|
return s / d
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(
|
||||||
|
f"Pooling method [{self.pooling_method}] not implemented"
|
||||||
|
)
|
||||||
|
|
||||||
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||||
|
return self.encode([self.document_instruction + text for text in texts])
|
||||||
|
|
||||||
|
def embed_query(self, text: str) -> List[float]:
|
||||||
|
return self.encode([self.query_instruction + text])[0]
|
Loading…
Reference in New Issue