From 5019f59724b2b6adf840b78019f2581546cb390d Mon Sep 17 00:00:00 2001 From: eryk-dsai <142571618+eryk-dsai@users.noreply.github.com> Date: Tue, 17 Oct 2023 01:54:20 +0200 Subject: [PATCH] fix: more robust check whether the HF model is quantized (#11891) Removes the check of `model.is_quantized` and adds more robust way of checking for 4bit and 8bit quantization in the `huggingface_pipeline.py` script. I had to make the original change on the outdated version of `transformers`, because the models had this property before. Seems redundant now. Fixes: https://github.com/langchain-ai/langchain/issues/11809 and https://github.com/langchain-ai/langchain/issues/11759 --- libs/langchain/langchain/llms/huggingface_pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/langchain/langchain/llms/huggingface_pipeline.py b/libs/langchain/langchain/llms/huggingface_pipeline.py index 291fb5a69d..4c77a71c01 100644 --- a/libs/langchain/langchain/llms/huggingface_pipeline.py +++ b/libs/langchain/langchain/llms/huggingface_pipeline.py @@ -109,9 +109,8 @@ class HuggingFacePipeline(BaseLLM): ) from e if ( - model.is_quantized - or model.model.is_loaded_in_4bit - or model.model.is_loaded_in_8bit + getattr(model, "is_loaded_in_4bit", False) + or getattr(model, "is_loaded_in_8bit", False) ) and device is not None: logger.warning( f"Setting the `device` argument to None from {device} to avoid "