diff --git a/libs/langchain/langchain/llms/huggingface_pipeline.py b/libs/langchain/langchain/llms/huggingface_pipeline.py
index 0658185902..2b0d792bec 100644
--- a/libs/langchain/langchain/llms/huggingface_pipeline.py
+++ b/libs/langchain/langchain/llms/huggingface_pipeline.py
@@ -69,7 +69,7 @@ class HuggingFacePipeline(BaseLLM):
         cls,
         model_id: str,
         task: str,
-        device: int = -1,
+        device: Optional[int] = -1,
         model_kwargs: Optional[dict] = None,
         pipeline_kwargs: Optional[dict] = None,
         batch_size: int = DEFAULT_BATCH_SIZE,
@@ -108,7 +108,20 @@ class HuggingFacePipeline(BaseLLM):
                 f"Could not load the {task} model due to missing dependencies."
             ) from e
 
-        if importlib.util.find_spec("torch") is not None:
+        if (
+            model.is_quantized
+            or model.model.is_loaded_in_4bit
+            or model.model.is_loaded_in_8bit
+        ) and device is not None:
+            logger.warning(
+                f"Setting the `device` argument to None from {device} to avoid "
+                "the error caused by attempting to move the model that was already "
+                "loaded on the GPU using the Accelerate module to the same or "
+                "another device."
+            )
+            device = None
+
+        if device is not None and importlib.util.find_spec("torch") is not None:
             import torch
 
             cuda_device_count = torch.cuda.device_count()