Hot fix: Increase hivemind.P2P's startup_timeout for Colab, remove absent initial peer (#162)

pull/165/head
Alexander Borzunov 1 year ago committed by GitHub
parent 0a6b5f31aa
commit d6992fca63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -32,6 +32,7 @@ class DistributedBloomConfig(BloomConfig):
initial_peers: List[str] = PUBLIC_INITIAL_PEERS # a list of initial peers for hivemind DHT
dht_prefix: str # a prefix for all dht keys that correspond to this model (usually equal to model name)
daemon_startup_timeout: int = 30
dht: Optional[hivemind.DHT] = None # a running DHT instance, e.g. when using the same DHT for multiple models
chunk_size_for_efficient_fp16_on_cpu: int = 10000 # a chunk size for a LM head for efficient half-precision on CPU
pre_seq_len: int = 0 # a number of tokens for prompt tuning.
@ -95,7 +96,13 @@ class DistributedBloomModel(_LowCPUMemoryMixin, BloomModel):
dht = (
config.dht
if config.dht is not None
else hivemind.DHT(initial_peers=config.initial_peers, client_mode=True, num_workers=n_layer, start=True)
else hivemind.DHT(
initial_peers=config.initial_peers,
client_mode=True,
num_workers=n_layer,
startup_timeout=config.daemon_startup_timeout,
start=True,
)
)
assert isinstance(dht, hivemind.DHT) and dht.is_alive(), "dht must be a running hivemind.DHT instance"
self.h = RemoteSequential(config, dht, config.dht_prefix, request_timeout=config.request_timeout)

@ -3,6 +3,4 @@ PUBLIC_INITIAL_PEERS = [
"/dns6/bootstrap1.petals.ml/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY",
"/dns/bootstrap2.petals.ml/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5",
"/dns6/bootstrap2.petals.ml/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5",
"/dns/bootstrap3.petals.ml/tcp/31339/p2p/QmX82nfE57CSkNgyEC7pPMPBzjcFLLJXdHhvp1AXKVPvJD",
"/dns6/bootstrap3.petals.ml/tcp/31339/p2p/QmX82nfE57CSkNgyEC7pPMPBzjcFLLJXdHhvp1AXKVPvJD",
]

Loading…
Cancel
Save