mirror of
https://github.com/bigscience-workshop/petals
synced 2024-10-31 09:20:41 +00:00
1a78638c02
We avoid importing bitsandbytes when it's not used, since bitsandbytes doesn't always find correct CUDA libs and may raise exceptions because of that.
40 lines
1.4 KiB
Python
40 lines
1.4 KiB
Python
import subprocess
|
|
import sys
|
|
|
|
import pytest
|
|
import torch
|
|
|
|
from petals import AutoDistributedConfig
|
|
from petals.server.throughput import measure_compute_rps
|
|
from petals.utils.convert_block import QuantType
|
|
from test_utils import MODEL_NAME
|
|
|
|
|
|
def test_bnb_not_imported_when_unnecessary():
|
|
"""
|
|
We avoid importing bitsandbytes when it's not used,
|
|
since bitsandbytes doesn't always find correct CUDA libs and may raise exceptions because of that.
|
|
|
|
If this test fails, please change your code to import bitsandbytes and/or petals.utils.peft
|
|
in the function's/method's code when it's actually needed instead of importing them in the beginning of the file.
|
|
This won't slow down the code - importing a module for the 2nd time doesn't rerun module code.
|
|
"""
|
|
|
|
subprocess.check_call([sys.executable, "-c", "import petals, sys; assert 'bitsandbytes' not in sys.modules"])
|
|
|
|
|
|
@pytest.mark.forked
|
|
@pytest.mark.parametrize("tensor_parallel", [False, True])
|
|
def test_compute_throughput(tensor_parallel: bool):
|
|
config = AutoDistributedConfig.from_pretrained(MODEL_NAME)
|
|
tensor_parallel_devices = ("cpu", "cpu") if tensor_parallel else ()
|
|
compute_rps = measure_compute_rps(
|
|
config,
|
|
device=torch.device("cpu"),
|
|
dtype=torch.bfloat16,
|
|
quant_type=QuantType.NONE,
|
|
tensor_parallel_devices=tensor_parallel_devices,
|
|
n_steps=10,
|
|
)
|
|
assert isinstance(compute_rps, float) and compute_rps > 0
|