From 9484cc0962f2bd5922a124e82d96ce4bb1042eba Mon Sep 17 00:00:00 2001 From: Changyong Um Date: Tue, 12 Nov 2024 05:41:56 +0900 Subject: [PATCH] community[docs]: modify parameter for the LoRA adapter on the vllm page (#27930) **Description:** This PR modifies the documentation regarding the configuration of the VLLM with the LoRA adapter. The updates aim to provide clear instructions for users on how to set up the LoRA adapter when using the VLLM. - before ```python VLLM(..., enable_lora=True) ``` - after ```python VLLM(..., vllm_kwargs={ "enable_lora": True } ) ``` This change clarifies that users should use the vllm_kwargs to enable the LoRA adapter. Co-authored-by: Um Changyong --- docs/docs/integrations/llms/vllm.ipynb | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/docs/integrations/llms/vllm.ipynb b/docs/docs/integrations/llms/vllm.ipynb index 1e1baff963..e68ca9525a 100644 --- a/docs/docs/integrations/llms/vllm.ipynb +++ b/docs/docs/integrations/llms/vllm.ipynb @@ -266,8 +266,18 @@ "from langchain_community.llms import VLLM\n", "from vllm.lora.request import LoRARequest\n", "\n", - "llm = VLLM(model=\"meta-llama/Llama-2-7b-hf\", enable_lora=True)\n", - "\n", + "llm = VLLM(\n", + " model=\"meta-llama/Llama-3.2-3B-Instruct\",\n", + " max_new_tokens=300,\n", + " top_k=1,\n", + " top_p=0.90,\n", + " temperature=0.1,\n", + " vllm_kwargs={\n", + " \"gpu_memory_utilization\": 0.5,\n", + " \"enable_lora\": True,\n", + " \"max_model_len\": 350,\n", + " },\n", + ")\n", "LoRA_ADAPTER_PATH = \"path/to/adapter\"\n", "lora_adapter = LoRARequest(\"lora_adapter\", 1, LoRA_ADAPTER_PATH)\n", "\n",