Fix issue #10985 - Skip model.to(device) if it is instantiated with bitsandbytes config (#11009)

Preventing error caused by attempting to move the model that was already loaded on the GPU using the Accelerate module to the same or another device. It is not possible to load model with Accelerate/PEFT to CPU for now Addresses: [#10985](https://github.com/langchain-ai/langchain/issues/10985)
11 months ago · 06d5971be9
parent 64969bc8ae
commit 06d5971be9
1 changed files with 15 additions and 2 deletions
--- a/libs/langchain/langchain/llms/huggingface_pipeline.py
+++ b/libs/langchain/langchain/llms/huggingface_pipeline.py
@ -69,7 +69,7 @@ class HuggingFacePipeline(BaseLLM):
        cls,
        model_id: str,
        task: str,
-        device: int = -1,
+        device: Optional[int] = -1,
        model_kwargs: Optional[dict] = None,
        pipeline_kwargs: Optional[dict] = None,
        batch_size: int = DEFAULT_BATCH_SIZE,
@ -108,7 +108,20 @@ class HuggingFacePipeline(BaseLLM):
                f"Could not load the {task} model due to missing dependencies."
            ) from e
-        if importlib.util.find_spec("torch") is not None:
+        if (
            model.is_quantized
            or model.model.is_loaded_in_4bit
            or model.model.is_loaded_in_8bit
        ) and device is not None:
            logger.warning(
                f"Setting the `device` argument to None from {device} to avoid "
                "the error caused by attempting to move the model that was already "
                "loaded on the GPU using the Accelerate module to the same or "
                "another device."
            )
            device = None
        if device is not None and importlib.util.find_spec("torch") is not None:
            import torch
            cuda_device_count = torch.cuda.device_count()