diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml
index 71f6c9c..f4173f1 100644
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@@ -28,12 +28,12 @@ jobs:
           pip install -r requirements.txt
       - name: Delete previous model, if exists
         run: |
-          export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_BASE_REF') or os.environ.get('GITHUB_REF_NAME'))")
+          export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
           python -c "from huggingface_hub import delete_repo; delete_repo(token='$BLOOM_TESTING_WRITE_TOKEN', \
           name='test-bloomd-350m-$HF_TAG', organization='bloom-testing')" || true
       - name: Convert model and push to hub
         run: |
-          export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_BASE_REF') or os.environ.get('GITHUB_REF_NAME'))")
+          export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
           python -m cli.convert_model --model bigscience/bloom-350m  --output_path ./converted_model \
             --output_repo bloom-testing/test-bloomd-350m-$HF_TAG --use_auth_token $BLOOM_TESTING_WRITE_TOKEN
 
@@ -64,7 +64,7 @@ jobs:
           pip install -r requirements-dev.txt
       - name: Test
         run: |
-          export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_BASE_REF') or os.environ.get('GITHUB_REF_NAME'))")
+          export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
           export MODEL_NAME=bloom-testing/test-bloomd-350m-$HF_TAG
           export REF_NAME=bigscience/bloom-350m
 
@@ -72,6 +72,8 @@ jobs:
             --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 --throughput 1 &
           SERVER1_PID=$!
           
+          sleep 5  # wait for the first server to initialize DHT
+          
           export INITIAL_PEERS=/ip4/127.0.0.1/tcp/31337/p2p/QmS9KwZptnVdB9FFV7uGgaTq4sEKBwcYeKZDfSpyKDUd1g
           # ^-- server 1 multiaddr is determined by --identity and --host_maddrs
           
@@ -79,7 +81,7 @@ jobs:
             --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server2.log &
           SERVER2_PID=$!
 
-          sleep 30  # wait for server to download layers
+          sleep 60  # wait for server to download layers
           
           PYTHONPATH=. pytest tests
           
diff --git a/src/bloom/ops.py b/src/bloom/ops.py
index 0ef9b5e..b84c7c1 100644
--- a/src/bloom/ops.py
+++ b/src/bloom/ops.py
@@ -101,7 +101,7 @@ def pre_process_alibi_for_pad(alibi: torch.Tensor, attention_mask: torch.Tensor)
         attention_mask: ([`torch.tensor`], *required*):
             attention mask to pre-process
     """
-    assert attention_mask.shape.ndim == 2, "mask should be [batch_size, seq_length]"
+    assert attention_mask.ndim == 2, "mask should be [batch_size, seq_length]"
     unpadded_indices = torch.relu(attention_mask.cumsum(dim=1) - 1)
     # ^-- [batch, max_len], values correspond to element indices after removing padding
     # We shift the alibi tensor + replace all the values where attention_mask==0.0 by 0
diff --git a/tests/test_full_model.py b/tests/test_full_model.py
index 98140b4..f39128c 100644
--- a/tests/test_full_model.py
+++ b/tests/test_full_model.py
@@ -13,13 +13,15 @@ logger = get_logger(__file__)
 @pytest.mark.forked
 def test_full_model_exact_match(atol_forward=1e-3, atol_inference=1e-3):
     tokenizer = transformers.BloomTokenizerFast.from_pretrained(MODEL_NAME)
-    model = DistributedBloomForCausalLM.from_pretrained(MODEL_NAME, initial_peers=INITIAL_PEERS)
+    model = DistributedBloomForCausalLM.from_pretrained(
+        MODEL_NAME, initial_peers=INITIAL_PEERS, low_cpu_mem_usage=True, torch_dtype=torch.float32
+    )
     assert isinstance(model, DistributedBloomForCausalLM)
     assert len(model.transformer.h) == model.config.n_layer
 
     test_inputs = tokenizer("A cat sat on a mat", return_tensors="pt")["input_ids"]
 
-    with torch.no_grad():
+    with torch.inference_mode():
         parallel_outputs = model.forward(test_inputs).logits
         assert torch.all(torch.isfinite(parallel_outputs))
         logger.info("Forward outputs are finite")
@@ -32,21 +34,20 @@ def test_full_model_exact_match(atol_forward=1e-3, atol_inference=1e-3):
                 recurrent_outputs.append(sess.step(embs[:, t : t + 1, :]))
         recurrent_outputs = torch.cat(recurrent_outputs, dim=1)
         recurrent_outputs = model.transformer.ln_f(recurrent_outputs)
-
-        dictionary = model.transformer.word_embeddings.weight.t()
-        recurrent_outputs = recurrent_outputs.to(dictionary.dtype)
-        recurrent_outputs = (recurrent_outputs @ dictionary).float()
+        recurrent_outputs = model.lm_head(recurrent_outputs)
         assert torch.allclose(recurrent_outputs, parallel_outputs, rtol=0, atol=atol_inference)
         logger.info("Inference is consistent with forward")
 
-        del model, recurrent_outputs
+        del model, embs, recurrent_outputs
 
         if REF_NAME:
-            ref_model = transformers.AutoModelForCausalLM.from_pretrained(REF_NAME)
+            ref_model = transformers.BloomForCausalLM.from_pretrained(
+                REF_NAME, low_cpu_mem_usage=True, torch_dtype=torch.float32
+            )
             dummy_mask = torch.ones_like(test_inputs, dtype=torch.bool)
             # note: this creates a dummy mask to make the test compatible with older transformer versions
             # prior to https://github.com/huggingface/transformers/pull/17837
-            ref_outputs = ref_model.forward(test_inputs, attention_mask=dummy_mask).logits
+            ref_outputs = ref_model.forward(test_inputs, attention_mask=dummy_mask).logits.float()
             assert torch.allclose(ref_outputs, parallel_outputs, rtol=0, atol=atol_forward)
             logger.warning(f"Distributed forward is consistent with {type(ref_model)}.forward")
             del ref_model, ref_outputs, dummy_mask