From 5019f59724b2b6adf840b78019f2581546cb390d Mon Sep 17 00:00:00 2001
From: eryk-dsai <142571618+eryk-dsai@users.noreply.github.com>
Date: Tue, 17 Oct 2023 01:54:20 +0200
Subject: [PATCH] fix: more robust check whether the HF model is quantized
 (#11891)

Removes the check of `model.is_quantized` and adds more robust way of
checking for 4bit and 8bit quantization in the `huggingface_pipeline.py`
script. I had to make the original change on the outdated version of
`transformers`, because the models had this property before. Seems
redundant now.

Fixes: https://github.com/langchain-ai/langchain/issues/11809 and
https://github.com/langchain-ai/langchain/issues/11759
---
 libs/langchain/langchain/llms/huggingface_pipeline.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libs/langchain/langchain/llms/huggingface_pipeline.py b/libs/langchain/langchain/llms/huggingface_pipeline.py
index 291fb5a69d..4c77a71c01 100644
--- a/libs/langchain/langchain/llms/huggingface_pipeline.py
+++ b/libs/langchain/langchain/llms/huggingface_pipeline.py
@@ -109,9 +109,8 @@ class HuggingFacePipeline(BaseLLM):
             ) from e
 
         if (
-            model.is_quantized
-            or model.model.is_loaded_in_4bit
-            or model.model.is_loaded_in_8bit
+            getattr(model, "is_loaded_in_4bit", False)
+            or getattr(model, "is_loaded_in_8bit", False)
         ) and device is not None:
             logger.warning(
                 f"Setting the `device` argument to None from {device} to avoid "