Update to petals.dev (#390)

Since `petals.ml` DNS record is still unavailable, we're switching everything to https://petals.dev

Co-authored-by: Aleksandr Borzunov <hxrussia@gmail.com>
pull/391/head
justheuristic 10 months ago committed by GitHub
parent ddcda02b06
commit e51e84631d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -65,8 +65,8 @@ Basic tutorials:
Useful tools and advanced guides: Useful tools and advanced guides:
- [Chatbot web app](http://chat.petals.ml) (connects to Petals via an HTTP/WebSocket endpoint): [source code](https://github.com/borzunov/chat.petals.ml) - [Chatbot web app](https://chat.petals.dev) (connects to Petals via an HTTP/WebSocket endpoint): [source code](https://github.com/borzunov/chat.petals.dev)
- [Monitor](http://health.petals.ml) for the public swarm: [source code](https://github.com/borzunov/health.petals.ml) - [Monitor](https://health.petals.dev) for the public swarm: [source code](https://github.com/borzunov/health.petals.dev)
- Launch your own swarm: [guide](https://github.com/bigscience-workshop/petals/wiki/Launch-your-own-swarm) - Launch your own swarm: [guide](https://github.com/bigscience-workshop/petals/wiki/Launch-your-own-swarm)
- Run a custom foundation model: [guide](https://github.com/bigscience-workshop/petals/wiki/Run-a-custom-model-with-Petals) - Run a custom foundation model: [guide](https://github.com/bigscience-workshop/petals/wiki/Run-a-custom-model-with-Petals)
@ -78,7 +78,7 @@ Learning more:
## How does it work? ## How does it work?
- Petals runs large language models like [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) and [BLOOM](https://huggingface.co/bigscience/bloom) **collaboratively** — you load a small part of the model, then team up with people serving the other parts to run inference or fine-tuning. - Petals runs large language models like [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) and [BLOOM](https://huggingface.co/bigscience/bloom) **collaboratively** — you load a small part of the model, then team up with people serving the other parts to run inference or fine-tuning.
- Single-batch inference runs at up to 6 steps/sec for LLaMA 2 (70B) and &approx; 1 step/sec for BLOOM-176B. This is [up to 10x faster](https://github.com/bigscience-workshop/petals#benchmarks) than offloading, enough for [chatbots](http://chat.petals.ml) and other interactive apps. Parallel inference reaches hundreds of tokens/sec. - Single-batch inference runs at up to 6 steps/sec for LLaMA 2 (70B) and &approx; 1 step/sec for BLOOM-176B. This is [up to 10x faster](https://github.com/bigscience-workshop/petals#benchmarks) than offloading, enough for [chatbots](https://chat.petals.dev) and other interactive apps. Parallel inference reaches hundreds of tokens/sec.
- Beyond classic language model APIs — you can employ any fine-tuning and sampling methods, execute custom paths through the model, or see its hidden states. You get the comforts of an API with the flexibility of PyTorch. - Beyond classic language model APIs — you can employ any fine-tuning and sampling methods, execute custom paths through the model, or see its hidden states. You get the comforts of an API with the flexibility of PyTorch.
<p align="center"> <p align="center">
@ -218,5 +218,5 @@ _arXiv preprint arXiv:2209.01188,_ 2022.
This project is a part of the <a href="https://bigscience.huggingface.co/">BigScience</a> research workshop. This project is a part of the <a href="https://bigscience.huggingface.co/">BigScience</a> research workshop.
</p> </p>
<p align="center"> <p align="center">
<img src="https://petals.ml/bigscience.png" width="150"> <img src="https://petals.dev/bigscience.png" width="150">
</p> </p>

@ -330,7 +330,7 @@
"id": "51770911" "id": "51770911"
}, },
"source": [ "source": [
"Our model has been trained! You can now upload it to the Hub for later use, try out different models [served in the public swarm](http://health.petals.ml/), or [join Petals with your own GPU](https://github.com/bigscience-workshop/petals#connect-your-gpu-and-increase-petals-capacity)!" "Our model has been trained! You can now upload it to the Hub for later use, try out different models [served in the public swarm](https://health.petals.dev/), or [join Petals with your own GPU](https://github.com/bigscience-workshop/petals#connect-your-gpu-and-increase-petals-capacity)!"
] ]
}, },
{ {

@ -152,7 +152,7 @@ def main():
"weight matrix. See https://huggingface.co/transformers/v4.9.0/parallelism.html#tensor-parallelism") "weight matrix. See https://huggingface.co/transformers/v4.9.0/parallelism.html#tensor-parallelism")
parser.add_argument("--skip_reachability_check", action='store_true', parser.add_argument("--skip_reachability_check", action='store_true',
help="Skip checking this server's reachability via health.petals.ml " help="Skip checking this server's reachability via health.petals.dev "
"when connecting to the public swarm. If you connect to a private swarm, " "when connecting to the public swarm. If you connect to a private swarm, "
"the check is skipped by default. Use this option only if you know what you are doing") "the check is skipped by default. Use this option only if you know what you are doing")

@ -513,7 +513,7 @@ class MissingBlocksError(RuntimeError):
def __init__(self, block_indices: Union[int, Sequence[int]]): def __init__(self, block_indices: Union[int, Sequence[int]]):
super().__init__( super().__init__(
f"No servers holding blocks {block_indices} are online. " f"No servers holding blocks {block_indices} are online. "
f"You can check the public swarm's state at http://health.petals.ml " f"You can check the public swarm's state at https://health.petals.dev "
f"If there are not enough servers, please connect your GPU: " f"If there are not enough servers, please connect your GPU: "
f"https://github.com/bigscience-workshop/petals#connect-your-gpu-and-increase-petals-capacity " f"https://github.com/bigscience-workshop/petals#connect-your-gpu-and-increase-petals-capacity "
) )

@ -1,17 +1,18 @@
import torch import torch
PUBLIC_INITIAL_PEERS = [ PUBLIC_INITIAL_PEERS = [
# Temporary IPs until DNS issues get resolved # IPv4 DNS addresses
"/ip4/159.223.29.252/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY", "/dns/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY",
"/ip4/24.144.96.147/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5", "/dns/bootstrap2.petals.dev/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5",
# Default DNS addresses # IPv6 DNS addresses
"/dns/bootstrap1.petals.ml/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY", "/dns6/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY",
"/dns6/bootstrap1.petals.ml/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY", "/dns6/bootstrap2.petals.dev/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5",
"/dns/bootstrap2.petals.ml/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5", # Reserved IPs
"/dns6/bootstrap2.petals.ml/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5", "/ip4/159.89.214.152/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY",
"/ip4/159.203.156.48/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5",
] ]
# The reachability API is currently used only when connecting to the public swarm # The reachability API is currently used only when connecting to the public swarm
REACHABILITY_API_URL = "http://health.petals.ml" REACHABILITY_API_URL = "https://health.petals.dev"
DTYPE_MAP = dict(bfloat16=torch.bfloat16, float16=torch.float16, float32=torch.float32, auto="auto") DTYPE_MAP = dict(bfloat16=torch.bfloat16, float16=torch.float16, float32=torch.float32, auto="auto")

@ -28,7 +28,7 @@ def validate_reachability(peer_id, wait_time: float = 7 * 60, retry_delay: float
response = r.json() response = r.json()
if response["success"]: if response["success"]:
logger.info("Server is reachable from the Internet. It will appear at http://health.petals.ml soon") logger.info("Server is reachable from the Internet. It will appear at https://health.petals.dev soon")
return return
if attempt_no == 0: if attempt_no == 0:
@ -37,7 +37,7 @@ def validate_reachability(peer_id, wait_time: float = 7 * 60, retry_delay: float
logger.info("Detected a NAT or a firewall, connecting to libp2p relays. This takes a few minutes") logger.info("Detected a NAT or a firewall, connecting to libp2p relays. This takes a few minutes")
time.sleep(retry_delay) time.sleep(retry_delay)
except Exception as e: except Exception as e:
logger.warning(f"Skipping reachability check because health.petals.ml is down: {repr(e)}") logger.warning(f"Skipping reachability check because health.petals.dev is down: {repr(e)}")
return return
raise RuntimeError( raise RuntimeError(

Loading…
Cancel
Save