From 1222e172ef7e66d11a48e113e3ec1e5742cca63c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 28 Mar 2024 19:14:24 +0000 Subject: [PATCH] github actions and docker compose update update run update adding steps working as root run update update Add ARM Dockerfile Temporarily require peft<0.5.0, transformers<4.32.0 (#470) Peft 0.5 recently released and broke some compatilibities. This PR temporarily requires petals to use the previous stable version of peft while we work on 0.5.0 support. run update adding steps working as root petals inference moving to my org first docker compose move to our org format adding hive mind back into the setup reformat ipynb now to test main version using cpu now working. The health server now needs to wait for the others to come up but otherwise it is working. adding chat health adding new test run update Update run-tests-docker.yaml tpu starting running locally in cpu mode, now we have the basic directory structure for an env, still need to tag the items properly. versions --- .github/workflows/check-style.yaml | 11 ++- .github/workflows/push-docker-image.yaml | 23 +++-- .github/workflows/run-tests-docker.yaml | 36 ++++++++ .github/workflows/run-tests.yaml | 8 +- README.md | 22 +++++ docker-compose.yml | 111 +++++++++++++++++++++++ envs/cpu/is1/env.txt | 2 + envs/gpu/is1/env.txt | 5 + envs/tpu/v3-0/env.txt | 4 + etc/petals-inference.service | 11 +++ examples/prompt-tuning-personachat.ipynb | 24 ++--- examples/prompt-tuning-sst2.ipynb | 33 ++++--- get_peersl.sh | 1 + health.env | 3 + pyproject.toml | 2 +- run.sh | 1 + run2.sh | 1 + src/petals/__init__.py | 10 +- 18 files changed, 258 insertions(+), 50 deletions(-) create mode 100644 .github/workflows/run-tests-docker.yaml create mode 100644 docker-compose.yml create mode 100644 envs/cpu/is1/env.txt create mode 100644 envs/gpu/is1/env.txt create mode 100644 envs/tpu/v3-0/env.txt create mode 100644 etc/petals-inference.service create mode 100644 get_peersl.sh create mode 100644 health.env create mode 100755 run.sh create mode 100644 run2.sh diff --git a/.github/workflows/check-style.yaml b/.github/workflows/check-style.yaml index 60ea42b..d878766 100644 --- a/.github/workflows/check-style.yaml +++ b/.github/workflows/check-style.yaml @@ -9,18 +9,19 @@ jobs: black: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: psf/black@stable + - uses: meta-introspector/checkout@main + - uses: meta-introspector/black@main with: options: "--check --diff" version: "22.3.0" isort: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 + - uses: meta-introspector/checkout@main + - uses: meta-introspector/setup-python@main with: python-version: 3.8 - - uses: isort/isort-action@master + - uses: meta-introspector/isort-action@main with: isortVersion: "5.10.1" + diff --git a/.github/workflows/push-docker-image.yaml b/.github/workflows/push-docker-image.yaml index 345b8f2..88ae129 100644 --- a/.github/workflows/push-docker-image.yaml +++ b/.github/workflows/push-docker-image.yaml @@ -14,15 +14,15 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: meta-introspector/checkout@main - name: Docker meta id: meta - uses: crazy-max/ghaction-docker-meta@v2 + uses: meta-introspector/metadata-action@main with: # list of Docker images to use as base name for tags images: | - learningathome/petals + h4ckermike/petals # generate Docker tags based on the following events/attributes tags: | type=ref,event=branch @@ -33,18 +33,29 @@ jobs: - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: meta-introspector/setup-buildx-action@main - name: Login to Docker Hub if: github.event_name != 'pull_request' - uses: docker/login-action@v1 + uses: meta-introspector/login-action@main with: username: ${{ secrets.DOCKER_HUB_USERNAME }} password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} + - name: Free disk space on Ubuntu runner + uses: meta-introspector/free-disk-space@main + with: + # found in: https://github.com/docker/build-push-action/issues/968 + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + - name: Build and push id: docker_build - uses: docker/build-push-action@v2 + uses: meta-introspector/build-push-action@main with: context: . push: ${{ github.event_name != 'pull_request' }} diff --git a/.github/workflows/run-tests-docker.yaml b/.github/workflows/run-tests-docker.yaml new file mode 100644 index 0000000..6f6f00f --- /dev/null +++ b/.github/workflows/run-tests-docker.yaml @@ -0,0 +1,36 @@ +name: Tests in docker compose + +on: + push: + branches: [ main ] + pull_request: + +jobs: + run-tests-in-compose: + # runs-on: ubuntu-latest + runs-on: self-hosted + timeout-minutes: 20 + steps: + - name: Increase swap space + if: ${{ matrix.os == 'ubuntu' }} + uses: meta-introspector/set-swap-space@main + with: + swap-size-gb: 10 + - name: Checkout + uses: meta-introspector/checkout@main + + - name: Build the docker-compose stack + run: docker-compose -f docker-compose.yml up -d + + - name: Check running containers + run: docker ps -a + + - name: Check logs + run: docker logs health + + - name: Build the docker-compose stack + run: docker-compose down + + + + diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 05cebdd..2ba5831 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -24,17 +24,17 @@ jobs: steps: - name: Increase swap space if: ${{ matrix.os == 'ubuntu' }} - uses: pierotofy/set-swap-space@master + uses: meta-introspector/set-swap-space@main with: swap-size-gb: 10 - name: Checkout - uses: actions/checkout@v3 + uses: meta-introspector/checkout@main - name: Set up Python - uses: actions/setup-python@v3 + uses: meta-introspector/setup-python@main with: python-version: ${{ matrix.python-version }} - name: Cache dependencies - uses: actions/cache@v3 + uses: meta-introspector/cache@main with: path: ~/.cache/pip key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} diff --git a/README.md b/README.md index 1f410ef..f10374f 100644 --- a/README.md +++ b/README.md @@ -229,3 +229,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.

+ + +# setup + + + 1623 sudo cp petals-inference.service /etc/systemd/system/ + 1634 sudo systemctl daemon-reload + 1635 sudo systemctl status petals-inference.service -l + 1636 sudo systemctl restart petals-inference.service -l + + 1639 sudo useradd petals + 1640 sudo mkdir /home/petals + 1641 sudo chown petals: /home/petals/ + 1643 sudo cp -r ~/.venv/ /home/petals/venv + 1644 sudo rm -rf /home/petals/venv + 1658 sudo mv ~/.venv/ /home/petals/venv + 1659 sudo chown petals: /home/petals/ + +1670 sudo systemctl status petals-inference.service -l + 1674 sudo systemctl restart petals-inference.service -l + 1675 sudo systemctl status petals-inference.service -l + \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d6fe009 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,111 @@ +#version: "3" + +services: + + health: + restart: always + depends_on: + - backbone + image: h4ckermike/health.petals:main + ports: + - "8100:5000" + env_file: health.env + command: flask run --host=0.0.0.0 --port=5000 + + inference : + restart: always + depends_on: + - backbone + image: h4ckermike/inference.petals:main + ports: + - "8000:5000" + env_file: health.env + command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000 + + tinyllamacpu: + image: h4ckermike/petals:main + depends_on: + - backbone + command: python -m petals.cli.run_server --port 31331 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE + ports: + - "31331:31331" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: always + + tinyllamagpu: + image: h4ckermike/petals:main + depends_on: + - backbone + ports: + - "31332:31332" + command: python -m petals.cli.run_server --port 31332 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: always + + tinyllamatpu: + image: h4ckermike/petals:main + depends_on: + - backbone + ports: + - "31333:31333" + command: python -m petals.cli.run_server --port 31333 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE + + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] + restart: always + + # beluga: + # image: h4ckermike/petals:main + # depends_on: + # - backbone + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] + # ports: + # - "31330:31330" + # restart: always + + backbone: + image: h4ckermike/petals:main + command: python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path /cache/bootstrap1.id + volumes: + - petals-cache-backbone:/cache + network_mode: host + ipc: host + restart: unless-stopped + env_file: health.env + + # # DEbug target + # debug_health: + # #environment: + + # env_file: health.env + # image: h4ckermike/health.petals:main + # command: bash + # stdin_open: true + # tty: true + + +volumes: + petals-cache-backbone: diff --git a/envs/cpu/is1/env.txt b/envs/cpu/is1/env.txt new file mode 100644 index 0000000..67a88f2 --- /dev/null +++ b/envs/cpu/is1/env.txt @@ -0,0 +1,2 @@ +INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N +DEVICE=cpu diff --git a/envs/gpu/is1/env.txt b/envs/gpu/is1/env.txt new file mode 100644 index 0000000..5cfce73 --- /dev/null +++ b/envs/gpu/is1/env.txt @@ -0,0 +1,5 @@ +INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N +#PJRT_DEVICE=TPU +DEVICE=cuda +#DEVICE=cpu +#DEVICE=tpux diff --git a/envs/tpu/v3-0/env.txt b/envs/tpu/v3-0/env.txt new file mode 100644 index 0000000..c0c3c07 --- /dev/null +++ b/envs/tpu/v3-0/env.txt @@ -0,0 +1,4 @@ +INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa +#PJRT_DEVICE=TPU +#DEVICE=xla +DEVICE=cpu \ No newline at end of file diff --git a/etc/petals-inference.service b/etc/petals-inference.service new file mode 100644 index 0000000..ae08b6f --- /dev/null +++ b/etc/petals-inference.service @@ -0,0 +1,11 @@ +[Unit] +Description=Petals Inference + +[Service] +User=petals +Group=petals +Environment=PJRT_DEVICE=TPU +ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4 + +[Install] +WantedBy=multi-user.target diff --git a/examples/prompt-tuning-personachat.ipynb b/examples/prompt-tuning-personachat.ipynb index b9d1bf5..2f3f28a 100644 --- a/examples/prompt-tuning-personachat.ipynb +++ b/examples/prompt-tuning-personachat.ipynb @@ -85,10 +85,10 @@ "# The latter fine-tunes separate prefixes for each transformer block,\n", "# so prompt-tuning will take more time but yield better results.\n", "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n", - "TUNING_MODE = 'ptune'\n", + "TUNING_MODE = \"ptune\"\n", "\n", "NUM_PREFIX_TOKENS = 16\n", - "DEVICE = 'cuda'\n", + "DEVICE = \"cuda\"\n", "BATCH_SIZE = 8\n", "LR = 1e-2\n", "WEIGHT_DECAY = 0.0\n", @@ -113,12 +113,10 @@ "outputs": [], "source": [ "tokenizer = BloomTokenizerFast.from_pretrained(MODEL_NAME)\n", - "tokenizer.padding_side = 'right'\n", + "tokenizer.padding_side = \"right\"\n", "tokenizer.model_max_length = MODEL_MAX_LENGTH\n", "model = DistributedBloomForCausalLM.from_pretrained(\n", - " MODEL_NAME,\n", - " pre_seq_len=NUM_PREFIX_TOKENS, \n", - " tuning_mode=TUNING_MODE\n", + " MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n", ").to(DEVICE)" ] }, @@ -150,17 +148,13 @@ "\n", "\n", "def tokenize(examples):\n", - " outputs = {\n", - " \"input_ids\": tokenizer(examples[\"chunks\"], padding='max_length', truncation=True)[\"input_ids\"]\n", - " }\n", + " outputs = {\"input_ids\": tokenizer(examples[\"chunks\"], padding=\"max_length\", truncation=True)[\"input_ids\"]}\n", " outputs[\"labels\"] = outputs[\"input_ids\"]\n", " return outputs\n", "\n", "\n", - "tokenized_datasets = (\n", - " dataset\n", - " .map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names)\n", - " .map(tokenize, batched=True, remove_columns=[\"chunks\"])\n", + "tokenized_datasets = dataset.map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names).map(\n", + " tokenize, batched=True, remove_columns=[\"chunks\"]\n", ")\n", "\n", "\n", @@ -241,7 +235,7 @@ " \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n", " \"model_name\": MODEL_NAME,\n", " \"seed\": SEED,\n", - " }\n", + " },\n", ")\n", "\n", "for batch in tqdm(train_dataloader):\n", @@ -285,7 +279,7 @@ " user_phrase = input()\n", " if len(user_phrase) == 0:\n", " break\n", - " inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors='pt')['input_ids'].to(DEVICE)\n", + " inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors=\"pt\")[\"input_ids\"].to(DEVICE)\n", " while True:\n", " outputs = model.generate(\n", " inputs,\n", diff --git a/examples/prompt-tuning-sst2.ipynb b/examples/prompt-tuning-sst2.ipynb index b6f2d8a..d4f4f64 100644 --- a/examples/prompt-tuning-sst2.ipynb +++ b/examples/prompt-tuning-sst2.ipynb @@ -98,10 +98,10 @@ "# The latter fine-tunes separate prefixes for each transformer block,\n", "# so prompt-tuning will take more time but yield better results.\n", "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n", - "TUNING_MODE = 'ptune'\n", + "TUNING_MODE = \"ptune\"\n", "\n", "NUM_PREFIX_TOKENS = 8\n", - "DEVICE = 'cuda'\n", + "DEVICE = \"cuda\"\n", "BATCH_SIZE = 32\n", "LR = 1e-2\n", "WEIGHT_DECAY = 0.0\n", @@ -130,14 +130,16 @@ "outputs": [], "source": [ "tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)\n", - "tokenizer.padding_side = 'right'\n", + "tokenizer.padding_side = \"right\"\n", "tokenizer.model_max_length = MODEL_MAX_LENGTH\n", "tokenizer.pad_token = tokenizer.unk_token\n", - "model = DistributedLlamaForSequenceClassification.from_pretrained(\n", - " MODEL_NAME,\n", - " pre_seq_len=NUM_PREFIX_TOKENS,\n", - " tuning_mode=TUNING_MODE\n", - ").float().to(DEVICE)\n", + "model = (\n", + " DistributedLlamaForSequenceClassification.from_pretrained(\n", + " MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n", + " )\n", + " .float()\n", + " .to(DEVICE)\n", + ")\n", "model.config.pad_token_id = tokenizer.pad_token_id" ] }, @@ -160,12 +162,14 @@ }, "outputs": [], "source": [ - "task = 'sst2'\n", + "task = \"sst2\"\n", "\n", "dataset = load_dataset(\"glue\", task)\n", "\n", + "\n", "def preprocess_function(examples):\n", - " return tokenizer(examples[\"sentence\"], padding='max_length', truncation=True, return_token_type_ids=False)\n", + " return tokenizer(examples[\"sentence\"], padding=\"max_length\", truncation=True, return_token_type_ids=False)\n", + "\n", "\n", "tokenized_datasets = dataset.map(preprocess_function, batched=True)\n", "tokenized_datasets = tokenized_datasets.remove_columns([\"sentence\", \"idx\", \"attention_mask\"])\n", @@ -198,9 +202,10 @@ }, "outputs": [], "source": [ - "metric = load_metric('glue', task)\n", + "metric = load_metric(\"glue\", task)\n", "\n", - "def eval_metrics(model, dataloader, device='cpu'):\n", + "\n", + "def eval_metrics(model, dataloader, device=\"cpu\"):\n", " model.eval()\n", " for batch in dataloader:\n", " batch = {k: v.to(device) for k, v in batch.items()}\n", @@ -294,7 +299,7 @@ " \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n", " \"model_name\": MODEL_NAME,\n", " \"seed\": SEED,\n", - " }\n", + " },\n", ")\n", "\n", "scaler = torch.cuda.amp.GradScaler()\n", @@ -305,7 +310,7 @@ " batch = {k: v.to(DEVICE) for k, v in batch.items()}\n", "\n", " with torch.autocast(device_type=DEVICE, dtype=torch.float16):\n", - " outputs = model(**batch)\n", + " outputs = model(**batch)\n", " loss = outputs.loss\n", " scaler.scale(loss).backward()\n", "\n", diff --git a/get_peersl.sh b/get_peersl.sh new file mode 100644 index 0000000..5dcd3ab --- /dev/null +++ b/get_peersl.sh @@ -0,0 +1 @@ +docker logs petals-backbone-1 2>&1 |grep initial_peers |cut "-d " -f18- | sort -u > peers.txt diff --git a/health.env b/health.env new file mode 100644 index 0000000..20e15a4 --- /dev/null +++ b/health.env @@ -0,0 +1,3 @@ +INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa +PJRT_DEVICE=TPU +DEVICE=xla \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index cfc991c..6f1475b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 120 -required-version = "22.3.0" +required-version = "24.3.0" [tool.isort] profile = "black" diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..15bff43 --- /dev/null +++ b/run.sh @@ -0,0 +1 @@ +PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10 diff --git a/run2.sh b/run2.sh new file mode 100644 index 0000000..23c984d --- /dev/null +++ b/run2.sh @@ -0,0 +1 @@ +docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id diff --git a/src/petals/__init__.py b/src/petals/__init__.py index f513f65..52e5af1 100644 --- a/src/petals/__init__.py +++ b/src/petals/__init__.py @@ -17,13 +17,13 @@ from petals.models import * from petals.utils import * from petals.utils.logging import initialize_logs as _initialize_logs -__version__ = "2.2.0" +__version__ = "2.3.0.dev2" -if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"): - assert ( - version.parse("4.32.0") <= version.parse(transformers.__version__) < version.parse("5.0.0") - ), "Please install a proper transformers version: pip install transformers>=4.32.0,<5.0.0" +#if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"): +# assert ( +# version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0") +# ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0" def _override_bfloat16_mode_default():