You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
42 lines
1.9 KiB
Python
42 lines
1.9 KiB
Python
import os
|
|
|
|
import bitsandbytes as bnb
|
|
import torch
|
|
|
|
PETALS_8BIT_BACKWARD = bool(int(os.environ.get("PETALS_8BIT_BACKWARD", 1)))
|
|
|
|
|
|
def replace_8bit_linear(model, threshold=6.0):
|
|
"""
|
|
A helper function to convert all `torch.nn.Linear` modules to `bnb.nn.Linear8bit` modules from the `bitsandbytes`
|
|
library. This will enable running your models using mixed int8 precision as described by the paper `GPT3.int8():
|
|
8-bit Matrix Multiplication for Transformers at Scale`. Make sure `bitsandbytes` compiled with the correct CUDA
|
|
version of your hardware is installed before running this function. `pip install -i https://test.pypi.org/simple/
|
|
bitsandbytes-cudaXXX` with `XXX` is your CUDA version (e.g., 11.6 = 116)
|
|
The function will be run recursively and replace all `torch.nn.Linear` modules except for the `lm_head` and 'score' that should
|
|
be kept as a `torch.nn.Linear` module.
|
|
Parameters:
|
|
model (`torch.nn.Module`):
|
|
Input model or `torch.nn.Module` as the function is run recursively.
|
|
threshold (`float`, *optional*):
|
|
`int8_threshold` for outlier detection as described in the formentioned paper. This parameters is set to
|
|
`6.0` as described by the paper.
|
|
"""
|
|
for n, module in model.named_children():
|
|
if len(list(module.children())) > 0:
|
|
replace_8bit_linear(module, threshold)
|
|
|
|
if isinstance(module, torch.nn.Linear) and n not in ["lm_head", "score"]:
|
|
model._modules[n] = bnb.nn.Linear8bitLt(
|
|
module.in_features,
|
|
module.out_features,
|
|
module.bias is not None,
|
|
has_fp16_weights=False,
|
|
threshold=threshold,
|
|
memory_efficient_backward=PETALS_8BIT_BACKWARD,
|
|
)
|
|
model._modules[n].weight = bnb.nn.Int8Params(
|
|
module.weight.data, requires_grad=False, has_fp16_weights=False
|
|
).to(module.weight.dtype)
|
|
return model
|