github actions and docker compose

update

update

run

update

adding steps

working as root

run

update

update

Add ARM Dockerfile

Temporarily require peft<0.5.0, transformers<4.32.0 (#470)

Peft 0.5 recently released and broke some compatilibities. This PR temporarily requires petals to use the previous stable version of peft while we work on 0.5.0 support.

run

update

adding steps

working as root

petals inference

moving to my org

first docker compose

move to our org

format

adding hive mind back into the setup

reformat ipynb

now to test main version

using cpu

now working.

The health server now needs to wait for the others to come up but
otherwise it is working.

adding chat

health

adding new test

run

update

Update run-tests-docker.yaml

tpu starting

running locally in cpu mode,

now we have the basic directory structure for an env, still need to
tag the items properly.

versions
pull/567/head
Ubuntu 2 months ago committed by mike dupont
parent 0fda7da816
commit 1222e172ef

@ -9,18 +9,19 @@ jobs:
black: black:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: meta-introspector/checkout@main
- uses: psf/black@stable - uses: meta-introspector/black@main
with: with:
options: "--check --diff" options: "--check --diff"
version: "22.3.0" version: "22.3.0"
isort: isort:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: meta-introspector/checkout@main
- uses: actions/setup-python@v3 - uses: meta-introspector/setup-python@main
with: with:
python-version: 3.8 python-version: 3.8
- uses: isort/isort-action@master - uses: meta-introspector/isort-action@main
with: with:
isortVersion: "5.10.1" isortVersion: "5.10.1"

@ -14,15 +14,15 @@ jobs:
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: meta-introspector/checkout@main
- name: Docker meta - name: Docker meta
id: meta id: meta
uses: crazy-max/ghaction-docker-meta@v2 uses: meta-introspector/metadata-action@main
with: with:
# list of Docker images to use as base name for tags # list of Docker images to use as base name for tags
images: | images: |
learningathome/petals h4ckermike/petals
# generate Docker tags based on the following events/attributes # generate Docker tags based on the following events/attributes
tags: | tags: |
type=ref,event=branch type=ref,event=branch
@ -33,18 +33,29 @@ jobs:
- name: Set up Docker Buildx - name: Set up Docker Buildx
id: buildx id: buildx
uses: docker/setup-buildx-action@v1 uses: meta-introspector/setup-buildx-action@main
- name: Login to Docker Hub - name: Login to Docker Hub
if: github.event_name != 'pull_request' if: github.event_name != 'pull_request'
uses: docker/login-action@v1 uses: meta-introspector/login-action@main
with: with:
username: ${{ secrets.DOCKER_HUB_USERNAME }} username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Free disk space on Ubuntu runner
uses: meta-introspector/free-disk-space@main
with:
# found in: https://github.com/docker/build-push-action/issues/968
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: Build and push - name: Build and push
id: docker_build id: docker_build
uses: docker/build-push-action@v2 uses: meta-introspector/build-push-action@main
with: with:
context: . context: .
push: ${{ github.event_name != 'pull_request' }} push: ${{ github.event_name != 'pull_request' }}

@ -0,0 +1,36 @@
name: Tests in docker compose
on:
push:
branches: [ main ]
pull_request:
jobs:
run-tests-in-compose:
# runs-on: ubuntu-latest
runs-on: self-hosted
timeout-minutes: 20
steps:
- name: Increase swap space
if: ${{ matrix.os == 'ubuntu' }}
uses: meta-introspector/set-swap-space@main
with:
swap-size-gb: 10
- name: Checkout
uses: meta-introspector/checkout@main
- name: Build the docker-compose stack
run: docker-compose -f docker-compose.yml up -d
- name: Check running containers
run: docker ps -a
- name: Check logs
run: docker logs health
- name: Build the docker-compose stack
run: docker-compose down

@ -24,17 +24,17 @@ jobs:
steps: steps:
- name: Increase swap space - name: Increase swap space
if: ${{ matrix.os == 'ubuntu' }} if: ${{ matrix.os == 'ubuntu' }}
uses: pierotofy/set-swap-space@master uses: meta-introspector/set-swap-space@main
with: with:
swap-size-gb: 10 swap-size-gb: 10
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: meta-introspector/checkout@main
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v3 uses: meta-introspector/setup-python@main
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Cache dependencies - name: Cache dependencies
uses: actions/cache@v3 uses: meta-introspector/cache@main
with: with:
path: ~/.cache/pip path: ~/.cache/pip
key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }}

@ -229,3 +229,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.
<p align="center"> <p align="center">
<img src="https://petals.dev/bigscience.png" width="150"> <img src="https://petals.dev/bigscience.png" width="150">
</p> </p>
# setup
1623 sudo cp petals-inference.service /etc/systemd/system/
1634 sudo systemctl daemon-reload
1635 sudo systemctl status petals-inference.service -l
1636 sudo systemctl restart petals-inference.service -l
1639 sudo useradd petals
1640 sudo mkdir /home/petals
1641 sudo chown petals: /home/petals/
1643 sudo cp -r ~/.venv/ /home/petals/venv
1644 sudo rm -rf /home/petals/venv
1658 sudo mv ~/.venv/ /home/petals/venv
1659 sudo chown petals: /home/petals/
1670 sudo systemctl status petals-inference.service -l
1674 sudo systemctl restart petals-inference.service -l
1675 sudo systemctl status petals-inference.service -l

@ -0,0 +1,111 @@
#version: "3"
services:
health:
restart: always
depends_on:
- backbone
image: h4ckermike/health.petals:main
ports:
- "8100:5000"
env_file: health.env
command: flask run --host=0.0.0.0 --port=5000
inference :
restart: always
depends_on:
- backbone
image: h4ckermike/inference.petals:main
ports:
- "8000:5000"
env_file: health.env
command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000
tinyllamacpu:
image: h4ckermike/petals:main
depends_on:
- backbone
command: python -m petals.cli.run_server --port 31331 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE
ports:
- "31331:31331"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: always
tinyllamagpu:
image: h4ckermike/petals:main
depends_on:
- backbone
ports:
- "31332:31332"
command: python -m petals.cli.run_server --port 31332 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: always
tinyllamatpu:
image: h4ckermike/petals:main
depends_on:
- backbone
ports:
- "31333:31333"
command: python -m petals.cli.run_server --port 31333 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
restart: always
# beluga:
# image: h4ckermike/petals:main
# depends_on:
# - backbone
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
# ports:
# - "31330:31330"
# restart: always
backbone:
image: h4ckermike/petals:main
command: python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path /cache/bootstrap1.id
volumes:
- petals-cache-backbone:/cache
network_mode: host
ipc: host
restart: unless-stopped
env_file: health.env
# # DEbug target
# debug_health:
# #environment:
# env_file: health.env
# image: h4ckermike/health.petals:main
# command: bash
# stdin_open: true
# tty: true
volumes:
petals-cache-backbone:

@ -0,0 +1,2 @@
INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
DEVICE=cpu

@ -0,0 +1,5 @@
INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
#PJRT_DEVICE=TPU
DEVICE=cuda
#DEVICE=cpu
#DEVICE=tpux

@ -0,0 +1,4 @@
INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
#PJRT_DEVICE=TPU
#DEVICE=xla
DEVICE=cpu

@ -0,0 +1,11 @@
[Unit]
Description=Petals Inference
[Service]
User=petals
Group=petals
Environment=PJRT_DEVICE=TPU
ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4
[Install]
WantedBy=multi-user.target

@ -85,10 +85,10 @@
"# The latter fine-tunes separate prefixes for each transformer block,\n", "# The latter fine-tunes separate prefixes for each transformer block,\n",
"# so prompt-tuning will take more time but yield better results.\n", "# so prompt-tuning will take more time but yield better results.\n",
"# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n", "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
"TUNING_MODE = 'ptune'\n", "TUNING_MODE = \"ptune\"\n",
"\n", "\n",
"NUM_PREFIX_TOKENS = 16\n", "NUM_PREFIX_TOKENS = 16\n",
"DEVICE = 'cuda'\n", "DEVICE = \"cuda\"\n",
"BATCH_SIZE = 8\n", "BATCH_SIZE = 8\n",
"LR = 1e-2\n", "LR = 1e-2\n",
"WEIGHT_DECAY = 0.0\n", "WEIGHT_DECAY = 0.0\n",
@ -113,12 +113,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"tokenizer = BloomTokenizerFast.from_pretrained(MODEL_NAME)\n", "tokenizer = BloomTokenizerFast.from_pretrained(MODEL_NAME)\n",
"tokenizer.padding_side = 'right'\n", "tokenizer.padding_side = \"right\"\n",
"tokenizer.model_max_length = MODEL_MAX_LENGTH\n", "tokenizer.model_max_length = MODEL_MAX_LENGTH\n",
"model = DistributedBloomForCausalLM.from_pretrained(\n", "model = DistributedBloomForCausalLM.from_pretrained(\n",
" MODEL_NAME,\n", " MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n",
" pre_seq_len=NUM_PREFIX_TOKENS, \n",
" tuning_mode=TUNING_MODE\n",
").to(DEVICE)" ").to(DEVICE)"
] ]
}, },
@ -150,17 +148,13 @@
"\n", "\n",
"\n", "\n",
"def tokenize(examples):\n", "def tokenize(examples):\n",
" outputs = {\n", " outputs = {\"input_ids\": tokenizer(examples[\"chunks\"], padding=\"max_length\", truncation=True)[\"input_ids\"]}\n",
" \"input_ids\": tokenizer(examples[\"chunks\"], padding='max_length', truncation=True)[\"input_ids\"]\n",
" }\n",
" outputs[\"labels\"] = outputs[\"input_ids\"]\n", " outputs[\"labels\"] = outputs[\"input_ids\"]\n",
" return outputs\n", " return outputs\n",
"\n", "\n",
"\n", "\n",
"tokenized_datasets = (\n", "tokenized_datasets = dataset.map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names).map(\n",
" dataset\n", " tokenize, batched=True, remove_columns=[\"chunks\"]\n",
" .map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names)\n",
" .map(tokenize, batched=True, remove_columns=[\"chunks\"])\n",
")\n", ")\n",
"\n", "\n",
"\n", "\n",
@ -241,7 +235,7 @@
" \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n", " \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n",
" \"model_name\": MODEL_NAME,\n", " \"model_name\": MODEL_NAME,\n",
" \"seed\": SEED,\n", " \"seed\": SEED,\n",
" }\n", " },\n",
")\n", ")\n",
"\n", "\n",
"for batch in tqdm(train_dataloader):\n", "for batch in tqdm(train_dataloader):\n",
@ -285,7 +279,7 @@
" user_phrase = input()\n", " user_phrase = input()\n",
" if len(user_phrase) == 0:\n", " if len(user_phrase) == 0:\n",
" break\n", " break\n",
" inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors='pt')['input_ids'].to(DEVICE)\n", " inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors=\"pt\")[\"input_ids\"].to(DEVICE)\n",
" while True:\n", " while True:\n",
" outputs = model.generate(\n", " outputs = model.generate(\n",
" inputs,\n", " inputs,\n",

@ -98,10 +98,10 @@
"# The latter fine-tunes separate prefixes for each transformer block,\n", "# The latter fine-tunes separate prefixes for each transformer block,\n",
"# so prompt-tuning will take more time but yield better results.\n", "# so prompt-tuning will take more time but yield better results.\n",
"# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n", "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
"TUNING_MODE = 'ptune'\n", "TUNING_MODE = \"ptune\"\n",
"\n", "\n",
"NUM_PREFIX_TOKENS = 8\n", "NUM_PREFIX_TOKENS = 8\n",
"DEVICE = 'cuda'\n", "DEVICE = \"cuda\"\n",
"BATCH_SIZE = 32\n", "BATCH_SIZE = 32\n",
"LR = 1e-2\n", "LR = 1e-2\n",
"WEIGHT_DECAY = 0.0\n", "WEIGHT_DECAY = 0.0\n",
@ -130,14 +130,16 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)\n", "tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)\n",
"tokenizer.padding_side = 'right'\n", "tokenizer.padding_side = \"right\"\n",
"tokenizer.model_max_length = MODEL_MAX_LENGTH\n", "tokenizer.model_max_length = MODEL_MAX_LENGTH\n",
"tokenizer.pad_token = tokenizer.unk_token\n", "tokenizer.pad_token = tokenizer.unk_token\n",
"model = DistributedLlamaForSequenceClassification.from_pretrained(\n", "model = (\n",
" MODEL_NAME,\n", " DistributedLlamaForSequenceClassification.from_pretrained(\n",
" pre_seq_len=NUM_PREFIX_TOKENS,\n", " MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n",
" tuning_mode=TUNING_MODE\n", " )\n",
").float().to(DEVICE)\n", " .float()\n",
" .to(DEVICE)\n",
")\n",
"model.config.pad_token_id = tokenizer.pad_token_id" "model.config.pad_token_id = tokenizer.pad_token_id"
] ]
}, },
@ -160,12 +162,14 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"task = 'sst2'\n", "task = \"sst2\"\n",
"\n", "\n",
"dataset = load_dataset(\"glue\", task)\n", "dataset = load_dataset(\"glue\", task)\n",
"\n", "\n",
"\n",
"def preprocess_function(examples):\n", "def preprocess_function(examples):\n",
" return tokenizer(examples[\"sentence\"], padding='max_length', truncation=True, return_token_type_ids=False)\n", " return tokenizer(examples[\"sentence\"], padding=\"max_length\", truncation=True, return_token_type_ids=False)\n",
"\n",
"\n", "\n",
"tokenized_datasets = dataset.map(preprocess_function, batched=True)\n", "tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
"tokenized_datasets = tokenized_datasets.remove_columns([\"sentence\", \"idx\", \"attention_mask\"])\n", "tokenized_datasets = tokenized_datasets.remove_columns([\"sentence\", \"idx\", \"attention_mask\"])\n",
@ -198,9 +202,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"metric = load_metric('glue', task)\n", "metric = load_metric(\"glue\", task)\n",
"\n", "\n",
"def eval_metrics(model, dataloader, device='cpu'):\n", "\n",
"def eval_metrics(model, dataloader, device=\"cpu\"):\n",
" model.eval()\n", " model.eval()\n",
" for batch in dataloader:\n", " for batch in dataloader:\n",
" batch = {k: v.to(device) for k, v in batch.items()}\n", " batch = {k: v.to(device) for k, v in batch.items()}\n",
@ -294,7 +299,7 @@
" \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n", " \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n",
" \"model_name\": MODEL_NAME,\n", " \"model_name\": MODEL_NAME,\n",
" \"seed\": SEED,\n", " \"seed\": SEED,\n",
" }\n", " },\n",
")\n", ")\n",
"\n", "\n",
"scaler = torch.cuda.amp.GradScaler()\n", "scaler = torch.cuda.amp.GradScaler()\n",
@ -305,7 +310,7 @@
" batch = {k: v.to(DEVICE) for k, v in batch.items()}\n", " batch = {k: v.to(DEVICE) for k, v in batch.items()}\n",
"\n", "\n",
" with torch.autocast(device_type=DEVICE, dtype=torch.float16):\n", " with torch.autocast(device_type=DEVICE, dtype=torch.float16):\n",
" outputs = model(**batch)\n", " outputs = model(**batch)\n",
" loss = outputs.loss\n", " loss = outputs.loss\n",
" scaler.scale(loss).backward()\n", " scaler.scale(loss).backward()\n",
"\n", "\n",

@ -0,0 +1 @@
docker logs petals-backbone-1 2>&1 |grep initial_peers |cut "-d " -f18- | sort -u > peers.txt

@ -0,0 +1,3 @@
INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
PJRT_DEVICE=TPU
DEVICE=xla

@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
[tool.black] [tool.black]
line-length = 120 line-length = 120
required-version = "22.3.0" required-version = "24.3.0"
[tool.isort] [tool.isort]
profile = "black" profile = "black"

@ -0,0 +1 @@
PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10

@ -0,0 +1 @@
docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id

@ -17,13 +17,13 @@ from petals.models import *
from petals.utils import * from petals.utils import *
from petals.utils.logging import initialize_logs as _initialize_logs from petals.utils.logging import initialize_logs as _initialize_logs
__version__ = "2.2.0" __version__ = "2.3.0.dev2"
if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"): #if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
assert ( # assert (
version.parse("4.32.0") <= version.parse(transformers.__version__) < version.parse("5.0.0") # version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
), "Please install a proper transformers version: pip install transformers>=4.32.0,<5.0.0" # ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"
def _override_bfloat16_mode_default(): def _override_bfloat16_mode_default():

Loading…
Cancel
Save