From cc5e5d32c007ebfa21d8a1ceb6776851b16bb1bf Mon Sep 17 00:00:00 2001 From: Alexander Borzunov Date: Fri, 13 Jan 2023 08:45:53 +0400 Subject: [PATCH] Don't switch blocks if it makes swarm disjoint (#210) Even if the swarm seems to have at least 2 servers for each block, turning off on one of the servers could break it. That's because once a server is turned off, others may move to a better position, creating a significant downtime on their way. This PR prohibits switching blocks if it would make the swarm disjoint along the way. --- src/petals/server/block_selection.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/petals/server/block_selection.py b/src/petals/server/block_selection.py index 203b781..33161b2 100644 --- a/src/petals/server/block_selection.py +++ b/src/petals/server/block_selection.py @@ -79,6 +79,9 @@ def should_choose_other_blocks( # Also, subtracting local_span.throughput * (1 + eps) makes _choose_best_start() prefer # the previous server position in case of other things being almost equal. + if initial_throughput > eps and throughputs.min() <= 0: + return False # Switching blocks would make the swarm disjoint + new_start = _choose_best_start(throughputs, local_span.length) if local_span.start == new_start: return False # This server is on its best place already