diff --git a/src/petals/client/config.py b/src/petals/client/config.py index a2f8f42..2538b31 100644 --- a/src/petals/client/config.py +++ b/src/petals/client/config.py @@ -27,7 +27,7 @@ class ClientConfig: max_retries: Optional[int] = DEFAULT_MAX_RETRIES # max number of retries before an exception (default: inf) min_backoff: float = 1 # after a repeated failure, sleep for this many seconds times 2 ** (num_failures - 1) - max_backoff: float = 60 # limit maximal sleep time between retries to this value + max_backoff: float = 5 # limit maximal sleep time between retries to this value ban_timeout: float = 15 # when a remote peer fails to respond, prevent routing to that peer for this many seconds active_adapter: Optional[str] = None # name of active LoRA adapter (usually, Hugging Face repo) diff --git a/src/petals/client/inference_session.py b/src/petals/client/inference_session.py index 34d24c7..b2df1f6 100644 --- a/src/petals/client/inference_session.py +++ b/src/petals/client/inference_session.py @@ -144,6 +144,12 @@ class _ServerInferenceSession: ) ) ) + + import random + + if random.random() < 0.05: + raise Exception("fail") + outputs = list(map(deserialize_torch_tensor, outputs_serialized.tensors)) assert ( outputs[0].shape == inputs.shape