Fix issue #10985 - Skip model.to(device) if it is instantiated with bitsandbytes config (#11009)

Preventing error caused by attempting to move the model that was already
loaded on the GPU using the Accelerate module to the same or another
device. It is not possible to load model with Accelerate/PEFT to CPU for
now

Addresses:
[#10985](https://github.com/langchain-ai/langchain/issues/10985)
pull/10675/head^2
eryk-dsai 9 months ago committed by GitHub
parent 64969bc8ae
commit 06d5971be9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -69,7 +69,7 @@ class HuggingFacePipeline(BaseLLM):
cls,
model_id: str,
task: str,
device: int = -1,
device: Optional[int] = -1,
model_kwargs: Optional[dict] = None,
pipeline_kwargs: Optional[dict] = None,
batch_size: int = DEFAULT_BATCH_SIZE,
@ -108,7 +108,20 @@ class HuggingFacePipeline(BaseLLM):
f"Could not load the {task} model due to missing dependencies."
) from e
if importlib.util.find_spec("torch") is not None:
if (
model.is_quantized
or model.model.is_loaded_in_4bit
or model.model.is_loaded_in_8bit
) and device is not None:
logger.warning(
f"Setting the `device` argument to None from {device} to avoid "
"the error caused by attempting to move the model that was already "
"loaded on the GPU using the Accelerate module to the same or "
"another device."
)
device = None
if device is not None and importlib.util.find_spec("torch") is not None:
import torch
cuda_device_count = torch.cuda.device_count()

Loading…
Cancel
Save