|
|
|
@ -82,7 +82,7 @@
|
|
|
|
|
" temperature=0.8,\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(llm(\"What is the capital of France ?\"))"
|
|
|
|
|
"print(llm.invoke(\"What is the capital of France ?\"))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -117,8 +117,7 @@
|
|
|
|
|
"1. The first Pokemon game was released in 1996.\n",
|
|
|
|
|
"2. The president was Bill Clinton.\n",
|
|
|
|
|
"3. Clinton was president from 1993 to 2001.\n",
|
|
|
|
|
"4. The answer is Clinton.\n",
|
|
|
|
|
"\n"
|
|
|
|
|
"4. The answer is Clinton.\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -142,7 +141,7 @@
|
|
|
|
|
"\n",
|
|
|
|
|
"question = \"Who was the US president in the year the first Pokemon game was released?\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(llm_chain.run(question))"
|
|
|
|
|
"print(llm_chain.invoke(question))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -172,7 +171,36 @@
|
|
|
|
|
" trust_remote_code=True, # mandatory for hf models\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"llm(\"What is the future of AI?\")"
|
|
|
|
|
"llm.invoke(\"What is the future of AI?\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "d6ca8fd911d25faa",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": false
|
|
|
|
|
},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Quantization\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"vLLM supports `awq` quantization. To enable it, pass `quantization` to `vllm_kwargs`."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "2cada3174c46a0ea",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": false
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"llm_q = VLLM(\n",
|
|
|
|
|
" model=\"TheBloke/Llama-2-7b-Chat-AWQ\",\n",
|
|
|
|
|
" trust_remote_code=True,\n",
|
|
|
|
|
" max_new_tokens=512,\n",
|
|
|
|
|
" vllm_kwargs={\"quantization\": \"awq\"},\n",
|
|
|
|
|
")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -216,7 +244,7 @@
|
|
|
|
|
" model_name=\"tiiuae/falcon-7b\",\n",
|
|
|
|
|
" model_kwargs={\"stop\": [\".\"]},\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"print(llm(\"Rome is\"))"
|
|
|
|
|
"print(llm.invoke(\"Rome is\"))"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|