Rechain reloc

1 month ago · 0ca54a5e76
parent e5dddfe0b6
commit 0ca54a5e76
2 changed files with 3 additions and 3 deletions
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@ -14,8 +14,8 @@ jobs:
          - { model: 'bigscience/bloom-560m', os: 'ubuntu', python-version: '3.11' }
          - { model: 'Maykeye/TinyLLama-v0', os: 'ubuntu', python-version: '3.8' }
          - { model: 'Maykeye/TinyLLama-v0', os: 'ubuntu', python-version: '3.11' }
-          - { model: 'Maykeye/TinyLLama-v0', os: 'macos-14', python-version: '3.10' }
-          - { model: 'Maykeye/TinyLLama-v0', os: 'macos-14', python-version: '3.11' }
+          - { model: 'Maykeye/TinyLLama-v0', os: 'macos', python-version: '3.10' }
+          - { model: 'Maykeye/TinyLLama-v0', os: 'macos', python-version: '3.11' }
          - { model: 'artek0chumak/TestMixtral', os: 'ubuntu', python-version: '3.8' }
          - { model: 'artek0chumak/TestMixtral', os: 'ubuntu', python-version: '3.11' }
      fail-fast: false
--- a/src/petals/server/throughput.py
+++ b/src/petals/server/throughput.py
@ -206,7 +206,7 @@ def measure_compute_rps(
        block = block.to(dtype)
        block = convert_block(block, 0, config, tensor_parallel_devices, device, quant_type=quant_type, freeze=True)

-        cache = (DUMMY_KEY_PAST.to(dtype).to(device), DUMMY_KEY_PAST.to(dtype).to(device))
+        cache = (DUMMY_KEY_PAST.to(dtype=dtype, device=device), DUMMY_KEY_PAST.to(dtype=dtype, device=device))
        elapsed = 0
        dummy_input = torch.randn(1, n_tokens, config.hidden_size, device=device, dtype=dtype)