@ -69,7 +69,7 @@ class HuggingFacePipeline(BaseLLM):
cls ,
cls ,
model_id : str ,
model_id : str ,
task : str ,
task : str ,
device : int = - 1 ,
device : Optional [ int ] = - 1 ,
model_kwargs : Optional [ dict ] = None ,
model_kwargs : Optional [ dict ] = None ,
pipeline_kwargs : Optional [ dict ] = None ,
pipeline_kwargs : Optional [ dict ] = None ,
batch_size : int = DEFAULT_BATCH_SIZE ,
batch_size : int = DEFAULT_BATCH_SIZE ,
@ -108,7 +108,20 @@ class HuggingFacePipeline(BaseLLM):
f " Could not load the { task } model due to missing dependencies. "
f " Could not load the { task } model due to missing dependencies. "
) from e
) from e
if importlib . util . find_spec ( " torch " ) is not None :
if (
model . is_quantized
or model . model . is_loaded_in_4bit
or model . model . is_loaded_in_8bit
) and device is not None :
logger . warning (
f " Setting the `device` argument to None from { device } to avoid "
" the error caused by attempting to move the model that was already "
" loaded on the GPU using the Accelerate module to the same or "
" another device. "
)
device = None
if device is not None and importlib . util . find_spec ( " torch " ) is not None :
import torch
import torch
cuda_device_count = torch . cuda . device_count ( )
cuda_device_count = torch . cuda . device_count ( )