Fix issue #10985 - Skip model.to(device) if it is instantiated with bitsandbytes config (#11009)

Preventing error caused by attempting to move the model that was already loaded on the GPU using the Accelerate module to the same or another device. It is not possible to load model with Accelerate/PEFT to CPU for now Addresses: [#10985](https://github.com/langchain-ai/langchain/issues/10985)
9 months ago · 06d5971be9
parent 64969bc8ae
commit 06d5971be9
1 changed files with 15 additions and 2 deletions
--- a/libs/langchain/langchain/llms/huggingface_pipeline.py
+++ b/libs/langchain/langchain/llms/huggingface_pipeline.py
@ -69,7 +69,7 @@ class HuggingFacePipeline(BaseLLM):
        cls,
        model_id: str,
        task: str,
-        device: int = -1,
+        device: Optional[int] = -1,
        model_kwargs: Optional[dict] = None,
        pipeline_kwargs: Optional[dict] = None,
        batch_size: int = DEFAULT_BATCH_SIZE,
@ -108,7 +108,20 @@ class HuggingFacePipeline(BaseLLM):
                f"Could not load the {task} model due to missing dependencies."
            ) from e

-        if importlib.util.find_spec("torch") is not None:
+        if (
+            model.is_quantized
+            or model.model.is_loaded_in_4bit
+            or model.model.is_loaded_in_8bit
+        ) and device is not None:
+            logger.warning(
+                f"Setting the `device` argument to None from {device} to avoid "
+                "the error caused by attempting to move the model that was already "
+                "loaded on the GPU using the Accelerate module to the same or "
+                "another device."
+            )
+            device = None
+
+        if device is not None and importlib.util.find_spec("torch") is not None:
            import torch

            cuda_device_count = torch.cuda.device_count()