2024-03-01 18:04:53 +00:00
|
|
|
import logging
|
2024-03-28 03:12:59 +00:00
|
|
|
from typing import Any, Optional
|
2024-03-01 18:04:53 +00:00
|
|
|
|
|
|
|
from langchain_core.language_models.llms import LLM
|
|
|
|
|
2024-03-28 03:12:59 +00:00
|
|
|
from langchain_community.llms.ipex_llm import IpexLLM
|
2024-03-01 18:04:53 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2024-03-28 03:12:59 +00:00
|
|
|
class BigdlLLM(IpexLLM):
|
|
|
|
"""Wrapper around the BigdlLLM model
|
2024-03-01 18:04:53 +00:00
|
|
|
|
|
|
|
Example:
|
|
|
|
.. code-block:: python
|
|
|
|
|
2024-03-28 03:12:59 +00:00
|
|
|
from langchain_community.llms import BigdlLLM
|
|
|
|
llm = BigdlLLM.from_model_id(model_id="THUDM/chatglm-6b")
|
2024-03-01 18:04:53 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_model_id(
|
|
|
|
cls,
|
|
|
|
model_id: str,
|
|
|
|
model_kwargs: Optional[dict] = None,
|
2024-04-25 19:58:18 +00:00
|
|
|
*,
|
|
|
|
tokenizer_id: Optional[str] = None,
|
|
|
|
load_in_4bit: bool = True,
|
|
|
|
load_in_low_bit: Optional[str] = None,
|
2024-03-01 18:04:53 +00:00
|
|
|
**kwargs: Any,
|
|
|
|
) -> LLM:
|
|
|
|
"""
|
|
|
|
Construct object from model_id
|
|
|
|
|
|
|
|
Args:
|
|
|
|
model_id: Path for the huggingface repo id to be downloaded or
|
|
|
|
the huggingface checkpoint folder.
|
2024-04-25 19:58:18 +00:00
|
|
|
tokenizer_id: Path for the huggingface repo id to be downloaded or
|
|
|
|
the huggingface checkpoint folder which contains the tokenizer.
|
2024-03-01 18:04:53 +00:00
|
|
|
model_kwargs: Keyword arguments to pass to the model and tokenizer.
|
|
|
|
kwargs: Extra arguments to pass to the model and tokenizer.
|
|
|
|
|
|
|
|
Returns:
|
2024-03-28 03:12:59 +00:00
|
|
|
An object of BigdlLLM.
|
2024-03-01 18:04:53 +00:00
|
|
|
"""
|
2024-03-28 03:12:59 +00:00
|
|
|
logger.warning("BigdlLLM was deprecated. Please use IpexLLM instead.")
|
|
|
|
|
2024-03-01 18:04:53 +00:00
|
|
|
try:
|
|
|
|
from bigdl.llm.transformers import (
|
|
|
|
AutoModel,
|
|
|
|
AutoModelForCausalLM,
|
|
|
|
)
|
|
|
|
from transformers import AutoTokenizer, LlamaTokenizer
|
|
|
|
|
|
|
|
except ImportError:
|
2024-04-29 14:32:50 +00:00
|
|
|
raise ImportError(
|
2024-03-01 18:04:53 +00:00
|
|
|
"Could not import bigdl-llm or transformers. "
|
|
|
|
"Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
|
|
|
|
)
|
|
|
|
|
2024-04-25 19:58:18 +00:00
|
|
|
if load_in_low_bit is not None:
|
|
|
|
logger.warning(
|
|
|
|
"""`load_in_low_bit` option is not supported in BigdlLLM and
|
|
|
|
is ignored. For more data types support with `load_in_low_bit`,
|
|
|
|
use IpexLLM instead."""
|
|
|
|
)
|
|
|
|
|
|
|
|
if not load_in_4bit:
|
|
|
|
raise ValueError(
|
|
|
|
"BigdlLLM only supports loading in 4-bit mode, "
|
|
|
|
"i.e. load_in_4bit = True. "
|
|
|
|
"Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
|
|
|
|
)
|
|
|
|
|
2024-03-01 18:04:53 +00:00
|
|
|
_model_kwargs = model_kwargs or {}
|
2024-04-25 19:58:18 +00:00
|
|
|
_tokenizer_id = tokenizer_id or model_id
|
2024-03-01 18:04:53 +00:00
|
|
|
|
|
|
|
try:
|
2024-04-25 19:58:18 +00:00
|
|
|
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
2024-03-01 18:04:53 +00:00
|
|
|
except Exception:
|
2024-04-25 19:58:18 +00:00
|
|
|
tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
2024-03-01 18:04:53 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
model_id, load_in_4bit=True, **_model_kwargs
|
|
|
|
)
|
|
|
|
except Exception:
|
|
|
|
model = AutoModel.from_pretrained(
|
|
|
|
model_id, load_in_4bit=True, **_model_kwargs
|
|
|
|
)
|
|
|
|
|
|
|
|
if "trust_remote_code" in _model_kwargs:
|
|
|
|
_model_kwargs = {
|
|
|
|
k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"
|
|
|
|
}
|
|
|
|
|
|
|
|
return cls(
|
|
|
|
model_id=model_id,
|
|
|
|
model=model,
|
|
|
|
tokenizer=tokenizer,
|
|
|
|
model_kwargs=_model_kwargs,
|
|
|
|
**kwargs,
|
|
|
|
)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_model_id_low_bit(
|
|
|
|
cls,
|
|
|
|
model_id: str,
|
|
|
|
model_kwargs: Optional[dict] = None,
|
2024-04-25 19:58:18 +00:00
|
|
|
*,
|
|
|
|
tokenizer_id: Optional[str] = None,
|
2024-03-01 18:04:53 +00:00
|
|
|
**kwargs: Any,
|
|
|
|
) -> LLM:
|
|
|
|
"""
|
|
|
|
Construct low_bit object from model_id
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
2024-03-28 03:12:59 +00:00
|
|
|
model_id: Path for the bigdl-llm transformers low-bit model folder.
|
2024-04-25 19:58:18 +00:00
|
|
|
tokenizer_id: Path for the huggingface repo id or local model folder
|
|
|
|
which contains the tokenizer.
|
2024-03-01 18:04:53 +00:00
|
|
|
model_kwargs: Keyword arguments to pass to the model and tokenizer.
|
|
|
|
kwargs: Extra arguments to pass to the model and tokenizer.
|
|
|
|
|
|
|
|
Returns:
|
2024-03-28 03:12:59 +00:00
|
|
|
An object of BigdlLLM.
|
2024-03-01 18:04:53 +00:00
|
|
|
"""
|
2024-03-28 03:12:59 +00:00
|
|
|
|
|
|
|
logger.warning("BigdlLLM was deprecated. Please use IpexLLM instead.")
|
|
|
|
|
2024-03-01 18:04:53 +00:00
|
|
|
try:
|
|
|
|
from bigdl.llm.transformers import (
|
|
|
|
AutoModel,
|
|
|
|
AutoModelForCausalLM,
|
|
|
|
)
|
|
|
|
from transformers import AutoTokenizer, LlamaTokenizer
|
|
|
|
|
|
|
|
except ImportError:
|
2024-04-29 14:32:50 +00:00
|
|
|
raise ImportError(
|
2024-03-01 18:04:53 +00:00
|
|
|
"Could not import bigdl-llm or transformers. "
|
2024-03-28 03:12:59 +00:00
|
|
|
"Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
|
2024-03-01 18:04:53 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
_model_kwargs = model_kwargs or {}
|
2024-04-25 19:58:18 +00:00
|
|
|
_tokenizer_id = tokenizer_id or model_id
|
|
|
|
|
2024-03-01 18:04:53 +00:00
|
|
|
try:
|
2024-04-25 19:58:18 +00:00
|
|
|
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
2024-03-01 18:04:53 +00:00
|
|
|
except Exception:
|
2024-04-25 19:58:18 +00:00
|
|
|
tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
2024-03-01 18:04:53 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
model = AutoModelForCausalLM.load_low_bit(model_id, **_model_kwargs)
|
|
|
|
except Exception:
|
|
|
|
model = AutoModel.load_low_bit(model_id, **_model_kwargs)
|
|
|
|
|
|
|
|
if "trust_remote_code" in _model_kwargs:
|
|
|
|
_model_kwargs = {
|
|
|
|
k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"
|
|
|
|
}
|
|
|
|
|
|
|
|
return cls(
|
|
|
|
model_id=model_id,
|
|
|
|
model=model,
|
|
|
|
tokenizer=tokenizer,
|
|
|
|
model_kwargs=_model_kwargs,
|
|
|
|
**kwargs,
|
|
|
|
)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def _llm_type(self) -> str:
|
2024-03-28 03:12:59 +00:00
|
|
|
return "bigdl-llm"
|