|
|
@ -216,6 +216,42 @@
|
|
|
|
"}"
|
|
|
|
"}"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
|
|
|
"id": "7574c6f0",
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"### Streaming\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"To get streaming of LLM output, you can create a Huggingface `TextIteratorStreamer` for `_forward_params`."
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
|
|
"id": "548c9489",
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"from threading import Thread\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"from transformers import TextIteratorStreamer\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"streamer = TextIteratorStreamer(\n",
|
|
|
|
|
|
|
|
" ov_llm.pipeline.tokenizer,\n",
|
|
|
|
|
|
|
|
" timeout=30.0,\n",
|
|
|
|
|
|
|
|
" skip_prompt=True,\n",
|
|
|
|
|
|
|
|
" skip_special_tokens=True,\n",
|
|
|
|
|
|
|
|
")\n",
|
|
|
|
|
|
|
|
"ov_llm.pipeline._forward_params = {\"streamer\": streamer, \"max_new_tokens\": 100}\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"t1 = Thread(target=chain.invoke, args=({\"question\": question},))\n",
|
|
|
|
|
|
|
|
"t1.start()\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"for new_text in streamer:\n",
|
|
|
|
|
|
|
|
" print(new_text, end=\"\", flush=True)"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"id": "da9a9239",
|
|
|
|
"id": "da9a9239",
|
|
|
|