From 06d5971be91b1bc96e4974c34dc2bfa34a009309 Mon Sep 17 00:00:00 2001
From: eryk-dsai <142571618+eryk-dsai@users.noreply.github.com>
Date: Wed, 11 Oct 2023 18:28:27 +0200
Subject: [PATCH] Fix issue #10985 - Skip model.to(device) if it is
 instantiated with bitsandbytes config (#11009)

Preventing error caused by attempting to move the model that was already
loaded on the GPU using the Accelerate module to the same or another
device. It is not possible to load model with Accelerate/PEFT to CPU for
now

Addresses:
[#10985](https://github.com/langchain-ai/langchain/issues/10985)
---
 .../langchain/llms/huggingface_pipeline.py      | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/libs/langchain/langchain/llms/huggingface_pipeline.py b/libs/langchain/langchain/llms/huggingface_pipeline.py
index 0658185902..2b0d792bec 100644
--- a/libs/langchain/langchain/llms/huggingface_pipeline.py
+++ b/libs/langchain/langchain/llms/huggingface_pipeline.py
@@ -69,7 +69,7 @@ class HuggingFacePipeline(BaseLLM):
         cls,
         model_id: str,
         task: str,
-        device: int = -1,
+        device: Optional[int] = -1,
         model_kwargs: Optional[dict] = None,
         pipeline_kwargs: Optional[dict] = None,
         batch_size: int = DEFAULT_BATCH_SIZE,
@@ -108,7 +108,20 @@ class HuggingFacePipeline(BaseLLM):
                 f"Could not load the {task} model due to missing dependencies."
             ) from e
 
-        if importlib.util.find_spec("torch") is not None:
+        if (
+            model.is_quantized
+            or model.model.is_loaded_in_4bit
+            or model.model.is_loaded_in_8bit
+        ) and device is not None:
+            logger.warning(
+                f"Setting the `device` argument to None from {device} to avoid "
+                "the error caused by attempting to move the model that was already "
+                "loaded on the GPU using the Accelerate module to the same or "
+                "another device."
+            )
+            device = None
+
+        if device is not None and importlib.util.find_spec("torch") is not None:
             import torch
 
             cuda_device_count = torch.cuda.device_count()