community: Add save_model function for openvino reranker and embedding (#19896)

pull/20613/head
Ethan Yang 3 months ago committed by GitHub
parent 9c1d7f2405
commit 2d6d796040
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {
"collapsed": false,
"jupyter": {
@ -28,42 +28,7 @@
"is_executing": true
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"outputs": [],
"source": [
"%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet\n",
"%pip install --upgrade --quiet faiss-cpu"
@ -404,46 +369,23 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Framework not specified. Using pt to export the model.\n",
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
"Using framework PyTorch: 2.2.1+cu121\n",
"Overriding 1 configuration item(s)\n",
"\t- use_cache -> False\n",
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
" warnings.warn(\n",
"Compiling the model to CPU ...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0, 16, 18, 6]\n"
]
}
],
"outputs": [],
"source": [
"from langchain.retrievers import ContextualCompressionRetriever\n",
"from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker\n",
"\n",
"model_name = \"BAAI/bge-reranker-large\"\n",
"\n",
"compressor = OpenVINOReranker(model_name_or_path=model_name)\n",
"ov_compressor = OpenVINOReranker(model_name_or_path=model_name, top_n=4)\n",
"compression_retriever = ContextualCompressionRetriever(\n",
" base_compressor=compressor, base_retriever=retriever\n",
" base_compressor=ov_compressor, base_retriever=retriever\n",
")\n",
"\n",
"compressed_docs = compression_retriever.get_relevant_documents(\n",
@ -461,7 +403,7 @@
}
},
"source": [
"After reranking, the top 3 documents are different from the top 3 documents retrieved by the base retriever."
"After reranking, the top 4 documents are different from the top 4 documents retrieved by the base retriever."
]
},
{
@ -532,37 +474,13 @@
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Framework not specified. Using pt to export the model.\n",
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
"Using framework PyTorch: 2.2.1+cu121\n",
"Overriding 1 configuration item(s)\n",
"\t- use_cache -> False\n",
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"ov_model_dir = \"bge-reranker-large-ov\"\n",
"if not Path(ov_model_dir).exists():\n",
" from optimum.intel.openvino import OVModelForSequenceClassification\n",
" from transformers import AutoTokenizer\n",
"\n",
" ov_model = OVModelForSequenceClassification.from_pretrained(\n",
" model_name, compile=False, export=True\n",
" )\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
" ov_model.half()\n",
" ov_model.save_pretrained(ov_model_dir)\n",
" tokenizer.save_pretrained(ov_model_dir)"
" ov_compressor.save_model(ov_model_dir)"
]
},
{
@ -579,7 +497,7 @@
}
],
"source": [
"compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
"ov_compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
]
},
{

@ -41,41 +41,10 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "ff9be586",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
" warnings.warn(\n",
"Framework not specified. Using pt to export the model.\n",
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
"Using framework PyTorch: 2.2.1+cu121\n",
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
" warnings.warn(\n",
"Compiling the model to CPU ...\n"
]
}
],
"outputs": [],
"source": [
"model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
"model_kwargs = {\"device\": \"CPU\"}\n",
@ -131,7 +100,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "bb5e74c0",
"metadata": {},
"outputs": [],
@ -150,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "a6544a65",
"metadata": {},
"outputs": [],
@ -159,24 +128,23 @@
"\n",
"ov_model_dir = \"all-mpnet-base-v2-ov\"\n",
"if not Path(ov_model_dir).exists():\n",
" from optimum.intel.openvino import OVModelForFeatureExtraction\n",
" from transformers import AutoTokenizer\n",
"\n",
" ov_model = OVModelForFeatureExtraction.from_pretrained(\n",
" model_name, compile=False, export=True\n",
" )\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
" ov_model.half()\n",
" ov_model.save_pretrained(ov_model_dir)\n",
" tokenizer.save_pretrained(ov_model_dir)"
" ov_embeddings.save_model(ov_model_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "162004c4",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Compiling the model to CPU ...\n"
]
}
],
"source": [
"ov_embeddings = OpenVINOEmbeddings(\n",
" model_name_or_path=ov_model_dir,\n",
@ -196,43 +164,10 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "66f5c6ba-1446-43e1-b012-800d17cef300",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
" warnings.warn(\n",
"Framework not specified. Using pt to export the model.\n",
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
"Using framework PyTorch: 2.2.1+cu121\n",
"Overriding 1 configuration item(s)\n",
"\t- use_cache -> False\n",
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
" warnings.warn(\n",
"Compiling the model to CPU ...\n"
]
}
],
"outputs": [],
"source": [
"from langchain_community.embeddings import OpenVINOBgeEmbeddings\n",
"\n",

@ -155,3 +155,12 @@ class OpenVINOReranker(BaseDocumentCompressor):
)
final_results.append(doc)
return final_results
def save_model(
self,
model_path: str,
) -> bool:
self.ov_model.half()
self.ov_model.save_pretrained(model_path)
self.tokenizer.save_pretrained(model_path)
return True

@ -276,6 +276,15 @@ class OpenVINOEmbeddings(BaseModel, Embeddings):
"""
return self.embed_documents([text])[0]
def save_model(
self,
model_path: str,
) -> bool:
self.ov_model.half()
self.ov_model.save_pretrained(model_path)
self.tokenizer.save_pretrained(model_path)
return True
class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
"""OpenVNO BGE embedding models.
@ -285,7 +294,7 @@ class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
from langchain_community.embeddings import OpenVINOBgeEmbeddings
model_name_or_path = "BAAI/bge-large-en"
model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'CPU'}
encode_kwargs = {'normalize_embeddings': True}
ov = OpenVINOBgeEmbeddings(
@ -295,14 +304,6 @@ class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
)
"""
model_name_or_path: str
"""HuggingFace model id."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass to the model."""
encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass when calling the `encode` method of the model."""
show_progress: bool = False
"""Whether to show a progress bar."""
query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
"""Instruction to use for embedding query."""
embed_instruction: str = ""

Loading…
Cancel
Save