You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
petals/src/petals/data_structures.py

55 lines
1.1 KiB
Python

from __future__ import annotations
import dataclasses
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, Tuple
from hivemind import PeerID
from hivemind.moe.expert_uid import ExpertUID
from petals.server.memory_cache import Handle
ModuleUID = str
UID_DELIMITER = "." # delimits parts of one module uid, e.g. "bloom.transformer.h.4.self_attention"
CHAIN_DELIMITER = " " # delimits multiple uids in a sequence, e.g. "bloom.layer3 bloom.layer4"
class ServerState(Enum):
OFFLINE = 0
JOINING = 1
ONLINE = 2
@dataclass
class ServerInfo:
state: ServerState
throughput: float
@dataclass
class RemoteModuleInfo:
"""A remote module that is served by one or more servers"""
uid: ModuleUID
servers: Dict[PeerID, ServerInfo]
@dataclass
class RemoteSpanInfo:
"""A chain of remote blocks served by one specific remote peer"""
start: int
end: int
peer_id: PeerID
RPCInfo = Dict[str, Any]
@dataclasses.dataclass(frozen=True)
class InferenceMetadata:
uid: ExpertUID
prefix_length: int
cache_handles: Tuple[Handle, ...]