diff --git a/libs/langchain/langchain/llms/huggingface_pipeline.py b/libs/langchain/langchain/llms/huggingface_pipeline.py index 0658185902..2b0d792bec 100644 --- a/libs/langchain/langchain/llms/huggingface_pipeline.py +++ b/libs/langchain/langchain/llms/huggingface_pipeline.py @@ -69,7 +69,7 @@ class HuggingFacePipeline(BaseLLM): cls, model_id: str, task: str, - device: int = -1, + device: Optional[int] = -1, model_kwargs: Optional[dict] = None, pipeline_kwargs: Optional[dict] = None, batch_size: int = DEFAULT_BATCH_SIZE, @@ -108,7 +108,20 @@ class HuggingFacePipeline(BaseLLM): f"Could not load the {task} model due to missing dependencies." ) from e - if importlib.util.find_spec("torch") is not None: + if ( + model.is_quantized + or model.model.is_loaded_in_4bit + or model.model.is_loaded_in_8bit + ) and device is not None: + logger.warning( + f"Setting the `device` argument to None from {device} to avoid " + "the error caused by attempting to move the model that was already " + "loaded on the GPU using the Accelerate module to the same or " + "another device." + ) + device = None + + if device is not None and importlib.util.find_spec("torch") is not None: import torch cuda_device_count = torch.cuda.device_count()