Fix dummy cache allocation

2 months ago · f02bd578b7
parent d6f4f80f3f
commit f02bd578b7
1 changed files with 1 additions and 1 deletions
--- a/src/petals/server/throughput.py
+++ b/src/petals/server/throughput.py
@ -206,7 +206,7 @@ def measure_compute_rps(
        block = block.to(dtype)
        block = convert_block(block, 0, config, tensor_parallel_devices, device, quant_type=quant_type, freeze=True)

-        cache = (DUMMY_KEY_PAST.to(dtype), DUMMY_KEY_PAST.to(dtype))
+        cache = (DUMMY_KEY_PAST.to(dtype).to(device), DUMMY_KEY_PAST.to(dtype).to(device))
        elapsed = 0
        dummy_input = torch.randn(1, n_tokens, config.hidden_size, device=device, dtype=dtype)