Don't prioritize short inference steps (it's too much given that they

use a merged pool)
pull/464/head
Aleksandr Borzunov 10 months ago
parent 5ed96a44b1
commit fe88505e59

@ -196,7 +196,7 @@ async def iterate_rpc_inference(
hypo_ids,
points=point_per_piece,
requested_uids=requested_uids,
type="short_inference" if can_merge_pools else "inference",
type="inference",
)
# A client may pass a tensor with 0 tokens. This is a special case that occurs, e.g.

@ -14,9 +14,7 @@ class TaskPrioritizerBase(ABC):
class DummyTaskPrioritizer(TaskPrioritizerBase):
def prioritize(self, *input: torch.Tensor, points: float = 0.0, **kwargs) -> float:
# Inference steps (especially short ones) go first since they are more latency-sensitive
if kwargs.get("type") == "short_inference":
return 1.0
# Inference steps go first since they are more latency-sensitive
if kwargs.get("type") == "inference":
return 2.0
return 3.0 # Forward, backward
return 1.0
return 2.0 # Forward, backward

Loading…
Cancel
Save