You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
55 lines
1.1 KiB
Python
55 lines
1.1 KiB
Python
from __future__ import annotations
|
|
|
|
import dataclasses
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
from typing import Any, Dict, Tuple
|
|
|
|
from hivemind import PeerID
|
|
from hivemind.moe.expert_uid import ExpertUID
|
|
|
|
from petals.server.memory_cache import Handle
|
|
|
|
ModuleUID = str
|
|
UID_DELIMITER = "." # delimits parts of one module uid, e.g. "bloom.transformer.h.4.self_attention"
|
|
CHAIN_DELIMITER = " " # delimits multiple uids in a sequence, e.g. "bloom.layer3 bloom.layer4"
|
|
|
|
|
|
class ServerState(Enum):
|
|
OFFLINE = 0
|
|
JOINING = 1
|
|
ONLINE = 2
|
|
|
|
|
|
@dataclass
|
|
class ServerInfo:
|
|
state: ServerState
|
|
throughput: float
|
|
|
|
|
|
@dataclass
|
|
class RemoteModuleInfo:
|
|
"""A remote module that is served by one or more servers"""
|
|
|
|
uid: ModuleUID
|
|
servers: Dict[PeerID, ServerInfo]
|
|
|
|
|
|
@dataclass
|
|
class RemoteSpanInfo:
|
|
"""A chain of remote blocks served by one specific remote peer"""
|
|
|
|
start: int
|
|
end: int
|
|
peer_id: PeerID
|
|
|
|
|
|
RPCInfo = Dict[str, Any]
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class InferenceMetadata:
|
|
uid: ExpertUID
|
|
prefix_length: int
|
|
cache_handles: Tuple[Handle, ...]
|