|
|
@ -31,7 +31,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet"
|
|
|
|
"%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" langchain-huggingface --quiet"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -130,6 +130,28 @@
|
|
|
|
"print(chain.invoke({\"question\": question}))"
|
|
|
|
"print(chain.invoke({\"question\": question}))"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
|
|
|
"id": "446a01e0",
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"To get response without prompt, you can bind `skip_prompt=True` with LLM."
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
|
|
"id": "e3baeab2",
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"chain = prompt | ov_llm.bind(skip_prompt=True)\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"question = \"What is electroencephalography?\"\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"print(chain.invoke({\"question\": question}))"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"id": "12524837-e9ab-455a-86be-66b95f4f893a",
|
|
|
|
"id": "12524837-e9ab-455a-86be-66b95f4f893a",
|
|
|
@ -243,7 +265,8 @@
|
|
|
|
" skip_prompt=True,\n",
|
|
|
|
" skip_prompt=True,\n",
|
|
|
|
" skip_special_tokens=True,\n",
|
|
|
|
" skip_special_tokens=True,\n",
|
|
|
|
")\n",
|
|
|
|
")\n",
|
|
|
|
"ov_llm.pipeline._forward_params = {\"streamer\": streamer, \"max_new_tokens\": 100}\n",
|
|
|
|
"pipeline_kwargs = {\"pipeline_kwargs\": {\"streamer\": streamer, \"max_new_tokens\": 100}}\n",
|
|
|
|
|
|
|
|
"chain = prompt | ov_llm.bind(**pipeline_kwargs)\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"t1 = Thread(target=chain.invoke, args=({\"question\": question},))\n",
|
|
|
|
"t1 = Thread(target=chain.invoke, args=({\"question\": question},))\n",
|
|
|
|
"t1.start()\n",
|
|
|
|
"t1.start()\n",
|
|
|
|