From 06d5971be91b1bc96e4974c34dc2bfa34a009309 Mon Sep 17 00:00:00 2001 From: eryk-dsai <142571618+eryk-dsai@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:28:27 +0200 Subject: [PATCH] Fix issue #10985 - Skip model.to(device) if it is instantiated with bitsandbytes config (#11009) Preventing error caused by attempting to move the model that was already loaded on the GPU using the Accelerate module to the same or another device. It is not possible to load model with Accelerate/PEFT to CPU for now Addresses: [#10985](https://github.com/langchain-ai/langchain/issues/10985) --- .../langchain/llms/huggingface_pipeline.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/llms/huggingface_pipeline.py b/libs/langchain/langchain/llms/huggingface_pipeline.py index 0658185902..2b0d792bec 100644 --- a/libs/langchain/langchain/llms/huggingface_pipeline.py +++ b/libs/langchain/langchain/llms/huggingface_pipeline.py @@ -69,7 +69,7 @@ class HuggingFacePipeline(BaseLLM): cls, model_id: str, task: str, - device: int = -1, + device: Optional[int] = -1, model_kwargs: Optional[dict] = None, pipeline_kwargs: Optional[dict] = None, batch_size: int = DEFAULT_BATCH_SIZE, @@ -108,7 +108,20 @@ class HuggingFacePipeline(BaseLLM): f"Could not load the {task} model due to missing dependencies." ) from e - if importlib.util.find_spec("torch") is not None: + if ( + model.is_quantized + or model.model.is_loaded_in_4bit + or model.model.is_loaded_in_8bit + ) and device is not None: + logger.warning( + f"Setting the `device` argument to None from {device} to avoid " + "the error caused by attempting to move the model that was already " + "loaded on the GPU using the Accelerate module to the same or " + "another device." + ) + device = None + + if device is not None and importlib.util.find_spec("torch") is not None: import torch cuda_device_count = torch.cuda.device_count()