From 63282afb4e0211c5b0b787ae13c238fc8738b32f Mon Sep 17 00:00:00 2001
From: Aleksandr Borzunov <hxrussia@gmail.com>
Date: Thu, 28 Sep 2023 18:38:56 +0000
Subject: [PATCH] Try 5% fails

---
 src/petals/client/config.py            | 2 +-
 src/petals/client/inference_session.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/petals/client/config.py b/src/petals/client/config.py
index a2f8f42..2538b31 100644
--- a/src/petals/client/config.py
+++ b/src/petals/client/config.py
@@ -27,7 +27,7 @@ class ClientConfig:
 
     max_retries: Optional[int] = DEFAULT_MAX_RETRIES  # max number of retries before an exception (default: inf)
     min_backoff: float = 1  # after a repeated failure, sleep for this many seconds times 2 ** (num_failures - 1)
-    max_backoff: float = 60  # limit maximal sleep time between retries to this value
+    max_backoff: float = 5  # limit maximal sleep time between retries to this value
     ban_timeout: float = 15  # when a remote peer fails to respond, prevent routing to that peer for this many seconds
     active_adapter: Optional[str] = None  # name of active LoRA adapter (usually, Hugging Face repo)
 
diff --git a/src/petals/client/inference_session.py b/src/petals/client/inference_session.py
index 34d24c7..b2df1f6 100644
--- a/src/petals/client/inference_session.py
+++ b/src/petals/client/inference_session.py
@@ -144,6 +144,12 @@ class _ServerInferenceSession:
                 )
             )
         )
+
+        import random
+
+        if random.random() < 0.05:
+            raise Exception("fail")
+
         outputs = list(map(deserialize_torch_tensor, outputs_serialized.tensors))
         assert (
             outputs[0].shape == inputs.shape