From 84bcc8090ce8fedb5641db4c3d13671c9e27ed2e Mon Sep 17 00:00:00 2001 From: Aleksandr Borzunov Date: Thu, 6 Apr 2023 15:10:02 +0000 Subject: [PATCH] Remove cpufeature from setup.cfg --- setup.cfg | 1 - src/petals/bloom/modeling_utils.py | 10 ---------- src/petals/client/remote_model.py | 3 +-- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/setup.cfg b/setup.cfg index c485cd5..055f1c0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,6 @@ install_requires = tensor_parallel==1.0.23 humanfriendly async-timeout>=4.0.2 - cpufeature>=0.2.0 packaging>=20.9 [options.extras_require] diff --git a/src/petals/bloom/modeling_utils.py b/src/petals/bloom/modeling_utils.py index eddbb9d..f68b25b 100644 --- a/src/petals/bloom/modeling_utils.py +++ b/src/petals/bloom/modeling_utils.py @@ -29,16 +29,6 @@ class LMHead(nn.Module): self.word_embeddings = word_embeddings self.use_chunked_forward = config.use_chunked_forward - if self.use_chunked_forward == "auto": - if platform.machine() == "x86_64": - # Import of cpufeature may crash on non-x86_64 machines - from cpufeature import CPUFeature - - # If the CPU supports AVX512, plain bfloat16 is ~10x faster than chunked_forward(). - # Otherwise, it's ~8x slower. - self.use_chunked_forward = not (CPUFeature["AVX512f"] and CPUFeature["OS_AVX512"]) - else: - self.use_chunked_forward = True self.chunked_forward_step = config.chunked_forward_step self._bf16_warning_shown = False diff --git a/src/petals/client/remote_model.py b/src/petals/client/remote_model.py index 937cd9c..42fee3e 100644 --- a/src/petals/client/remote_model.py +++ b/src/petals/client/remote_model.py @@ -44,8 +44,7 @@ class DistributedBloomConfig(BloomConfig): tuning_mode: Optional[str] = None # One of the finetune options: [None, 'shallow_ptune', 'deep_ptune', 'adapters'] # This settings matter for running the client with dtype bfloat16 on CPU. - # If the CPU doesn't support AVX512, chunked_forward() significantly speeds up computations. - use_chunked_forward: Union[str, bool] = "auto" + use_chunked_forward: bool = True chunked_forward_step: int = 16384