From b6b3ae964ff425ad94c2fc62572451d1e673c64f Mon Sep 17 00:00:00 2001 From: Alexander Borzunov Date: Thu, 20 Jul 2023 23:20:15 +0400 Subject: [PATCH] Fix --attn_cache_tokens default (#392) --- src/petals/server/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/petals/server/server.py b/src/petals/server/server.py index 947dbd8..5cdca46 100644 --- a/src/petals/server/server.py +++ b/src/petals/server/server.py @@ -186,7 +186,7 @@ class Server: # For attention cache in GPU or RAM if attn_cache_tokens is None: - attn_cache_tokens = 32768 if is_multiquery_attn else 2048 + attn_cache_tokens = 32768 if is_multiquery_attn else 8192 cache_values_per_block = 2 * self.block_config.hidden_size * attn_cache_tokens cache_values_per_block //= self.block_config.num_key_value_groups self._cache_bytes_per_block = cache_values_per_block * torch.finfo(self.torch_dtype).bits // 8