Remove cpufeature from setup.cfg

1 year ago · 84bcc8090c
parent 6c6150f684
commit 84bcc8090c
3 changed files with 1 additions and 13 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -41,7 +41,6 @@ install_requires =
    tensor_parallel==1.0.23
    humanfriendly
    async-timeout>=4.0.2
-    cpufeature>=0.2.0
    packaging>=20.9

 [options.extras_require]
--- a/src/petals/bloom/modeling_utils.py
+++ b/src/petals/bloom/modeling_utils.py
@ -29,16 +29,6 @@ class LMHead(nn.Module):
        self.word_embeddings = word_embeddings

        self.use_chunked_forward = config.use_chunked_forward
-        if self.use_chunked_forward == "auto":
-            if platform.machine() == "x86_64":
-                # Import of cpufeature may crash on non-x86_64 machines
-                from cpufeature import CPUFeature
-
-                # If the CPU supports AVX512, plain bfloat16 is ~10x faster than chunked_forward().
-                # Otherwise, it's ~8x slower.
-                self.use_chunked_forward = not (CPUFeature["AVX512f"] and CPUFeature["OS_AVX512"])
-            else:
-                self.use_chunked_forward = True
        self.chunked_forward_step = config.chunked_forward_step
        self._bf16_warning_shown = False

--- a/src/petals/client/remote_model.py
+++ b/src/petals/client/remote_model.py
@ -44,8 +44,7 @@ class DistributedBloomConfig(BloomConfig):
    tuning_mode: Optional[str] = None  # One of the finetune options: [None, 'shallow_ptune', 'deep_ptune', 'adapters']

    # This settings matter for running the client with dtype bfloat16 on CPU.
-    # If the CPU doesn't support AVX512, chunked_forward() significantly speeds up computations.
-    use_chunked_forward: Union[str, bool] = "auto"
+    use_chunked_forward: bool = True
    chunked_forward_step: int = 16384