From 63282afb4e0211c5b0b787ae13c238fc8738b32f Mon Sep 17 00:00:00 2001 From: Aleksandr Borzunov Date: Thu, 28 Sep 2023 18:38:56 +0000 Subject: [PATCH] Try 5% fails --- src/petals/client/config.py | 2 +- src/petals/client/inference_session.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/petals/client/config.py b/src/petals/client/config.py index a2f8f42..2538b31 100644 --- a/src/petals/client/config.py +++ b/src/petals/client/config.py @@ -27,7 +27,7 @@ class ClientConfig: max_retries: Optional[int] = DEFAULT_MAX_RETRIES # max number of retries before an exception (default: inf) min_backoff: float = 1 # after a repeated failure, sleep for this many seconds times 2 ** (num_failures - 1) - max_backoff: float = 60 # limit maximal sleep time between retries to this value + max_backoff: float = 5 # limit maximal sleep time between retries to this value ban_timeout: float = 15 # when a remote peer fails to respond, prevent routing to that peer for this many seconds active_adapter: Optional[str] = None # name of active LoRA adapter (usually, Hugging Face repo) diff --git a/src/petals/client/inference_session.py b/src/petals/client/inference_session.py index 34d24c7..b2df1f6 100644 --- a/src/petals/client/inference_session.py +++ b/src/petals/client/inference_session.py @@ -144,6 +144,12 @@ class _ServerInferenceSession: ) ) ) + + import random + + if random.random() < 0.05: + raise Exception("fail") + outputs = list(map(deserialize_torch_tensor, outputs_serialized.tensors)) assert ( outputs[0].shape == inputs.shape