github actions and docker compose

update update run update adding steps working as root run update update Add ARM Dockerfile Temporarily require peft<0.5.0, transformers<4.32.0 (#470) Peft 0.5 recently released and broke some compatilibities. This PR temporarily requires petals to use the previous stable version of peft while we work on 0.5.0 support. run update adding steps working as root petals inference moving to my org first docker compose move to our org format adding hive mind back into the setup reformat ipynb now to test main version using cpu now working. The health server now needs to wait for the others to come up but otherwise it is working. adding chat health adding new test run update Update run-tests-docker.yaml tpu starting running locally in cpu mode, now we have the basic directory structure for an env, still need to tag the items properly. versions
2 months ago · 1222e172ef
parent 0fda7da816
commit 1222e172ef
18 changed files with 258 additions and 50 deletions
--- a/.github/workflows/check-style.yaml
+++ b/.github/workflows/check-style.yaml
@ -9,18 +9,19 @@ jobs:
  black:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+      - uses: meta-introspector/checkout@main
-      - uses: psf/black@stable
+      - uses: meta-introspector/black@main
        with:
          options: "--check --diff"
          version: "22.3.0"
  isort:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+      - uses: meta-introspector/checkout@main
-      - uses: actions/setup-python@v3
+      - uses: meta-introspector/setup-python@main
        with:
          python-version: 3.8
-      - uses: isort/isort-action@master
+      - uses: meta-introspector/isort-action@main
        with:
          isortVersion: "5.10.1"
--- a/.github/workflows/push-docker-image.yaml
+++ b/.github/workflows/push-docker-image.yaml
@ -14,15 +14,15 @@ jobs:
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: meta-introspector/checkout@main
      - name: Docker meta
        id: meta
-        uses: crazy-max/ghaction-docker-meta@v2
+        uses: meta-introspector/metadata-action@main
        with:
          # list of Docker images to use as base name for tags
          images: |
-            learningathome/petals
+            h4ckermike/petals
          # generate Docker tags based on the following events/attributes
          tags: |
            type=ref,event=branch
@ -33,18 +33,29 @@ jobs:
      - name: Set up Docker Buildx
        id: buildx
-        uses: docker/setup-buildx-action@v1
+        uses: meta-introspector/setup-buildx-action@main
      - name: Login to Docker Hub
        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v1
+        uses: meta-introspector/login-action@main
        with:
          username: ${{ secrets.DOCKER_HUB_USERNAME }}
          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
      - name: Free disk space on Ubuntu runner
        uses: meta-introspector/free-disk-space@main
        with:
          # found in: https://github.com/docker/build-push-action/issues/968
          tool-cache: false
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: true
      - name: Build and push
        id: docker_build
-        uses: docker/build-push-action@v2
+        uses: meta-introspector/build-push-action@main
        with:
          context: .
          push: ${{ github.event_name != 'pull_request' }}
--- a/.github/workflows/run-tests-docker.yaml
+++ b/.github/workflows/run-tests-docker.yaml
@ -0,0 +1,36 @@
 name: Tests in docker compose
 on:
  push:
    branches: [ main ]
  pull_request:
 jobs:
  run-tests-in-compose:
   # runs-on: ubuntu-latest    
    runs-on: self-hosted
    timeout-minutes: 20
    steps:
      - name: Increase swap space
        if: ${{ matrix.os == 'ubuntu' }}
        uses: meta-introspector/set-swap-space@main
        with:
          swap-size-gb: 10
      - name: Checkout
        uses: meta-introspector/checkout@main
      - name: Build the docker-compose stack
        run: docker-compose -f docker-compose.yml up -d
      - name: Check running containers
        run: docker ps -a
      - name: Check logs
        run: docker logs health
      - name: Build the docker-compose stack
        run: docker-compose down 
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@ -24,17 +24,17 @@ jobs:
    steps:
      - name: Increase swap space
        if: ${{ matrix.os == 'ubuntu' }}
-        uses: pierotofy/set-swap-space@master
+        uses: meta-introspector/set-swap-space@main
        with:
          swap-size-gb: 10
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: meta-introspector/checkout@main
      - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: meta-introspector/setup-python@main
        with:
          python-version: ${{ matrix.python-version }}
      - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: meta-introspector/cache@main
        with:
          path: ~/.cache/pip
          key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }}
--- a/README.md
+++ b/README.md
@ -229,3 +229,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.
 <p align="center">
    <img src="https://petals.dev/bigscience.png" width="150">
 </p>
 # setup
 1623  sudo cp petals-inference.service /etc/systemd/system/
 1634  sudo systemctl daemon-reload
 1635  sudo systemctl status petals-inference.service -l
 1636  sudo systemctl restart petals-inference.service -l
 1639  sudo useradd petals
 1640  sudo mkdir /home/petals
 1641  sudo chown petals: /home/petals/
 1643  sudo cp -r ~/.venv/ /home/petals/venv
 1644  sudo rm -rf /home/petals/venv
 1658  sudo mv ~/.venv/ /home/petals/venv
 1659  sudo chown petals: /home/petals/
 1670  sudo systemctl status petals-inference.service -l
 1674  sudo systemctl restart petals-inference.service -l
 1675  sudo systemctl status petals-inference.service -l
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,111 @@
 #version: "3"
 services:
  health:
    restart: always
    depends_on:
     - backbone
    image: h4ckermike/health.petals:main
    ports:
      - "8100:5000"
    env_file: health.env
    command: flask run --host=0.0.0.0 --port=5000
  inference   :
    restart: always
    depends_on:
      - backbone
    image: h4ckermike/inference.petals:main
    ports:
      - "8000:5000"
    env_file: health.env
    command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000
  tinyllamacpu:
    image: h4ckermike/petals:main
    depends_on:
       - backbone       
    command: python -m petals.cli.run_server --port 31331  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
    ports:
      - "31331:31331"
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: always
  tinyllamagpu:
    image: h4ckermike/petals:main
    depends_on:
       - backbone
    ports:
      - "31332:31332"
    command: python -m petals.cli.run_server --port 31332  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: always
  tinyllamatpu:
    image: h4ckermike/petals:main
    depends_on:
       - backbone
    ports:
      - "31333:31333"
    command: python -m petals.cli.run_server --port 31333  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
    # deploy:
    #   resources:
    #     reservations:
    #       devices:
    #         - driver: nvidia
    #           count: 1
    #           capabilities: [gpu]
    restart: always
    # beluga:
  #   image: h4ckermike/petals:main
  #   depends_on:
  #      - backbone
  #   deploy:
  #     resources:
  #       reservations:
  #         devices:
  #           - driver: nvidia
  #             count: 1
  #             capabilities: [gpu]
  #   ports:
  #     - "31330:31330"
  #   restart: always
  backbone:
    image: h4ckermike/petals:main
    command: python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path /cache/bootstrap1.id
    volumes:
      - petals-cache-backbone:/cache
    network_mode: host
    ipc: host
    restart: unless-stopped
    env_file: health.env
  #   # DEbug target
  # debug_health:
  #   #environment:
  #   env_file: health.env
  #   image: h4ckermike/health.petals:main
  #   command: bash
  #   stdin_open: true
  #   tty: true
 volumes:
  petals-cache-backbone:
--- a/envs/cpu/is1/env.txt
+++ b/envs/cpu/is1/env.txt
@ -0,0 +1,2 @@
 INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
 DEVICE=cpu
--- a/envs/gpu/is1/env.txt
+++ b/envs/gpu/is1/env.txt
@ -0,0 +1,5 @@
 INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
 #PJRT_DEVICE=TPU
 DEVICE=cuda
 #DEVICE=cpu
 #DEVICE=tpux
--- a/envs/tpu/v3-0/env.txt
+++ b/envs/tpu/v3-0/env.txt
@ -0,0 +1,4 @@
 INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
 #PJRT_DEVICE=TPU
 #DEVICE=xla
 DEVICE=cpu
--- a/etc/petals-inference.service
+++ b/etc/petals-inference.service
@ -0,0 +1,11 @@
 [Unit]
 Description=Petals Inference
 [Service]
 User=petals
 Group=petals
 Environment=PJRT_DEVICE=TPU
 ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4
 [Install]
 WantedBy=multi-user.target
--- a/examples/prompt-tuning-personachat.ipynb
+++ b/examples/prompt-tuning-personachat.ipynb
@ -85,10 +85,10 @@
    "# The latter fine-tunes separate prefixes for each transformer block,\n",
    "# so prompt-tuning will take more time but yield better results.\n",
    "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
-    "TUNING_MODE = 'ptune'\n",
+    "TUNING_MODE = \"ptune\"\n",
    "\n",
    "NUM_PREFIX_TOKENS = 16\n",
-    "DEVICE = 'cuda'\n",
+    "DEVICE = \"cuda\"\n",
    "BATCH_SIZE = 8\n",
    "LR = 1e-2\n",
    "WEIGHT_DECAY = 0.0\n",
@ -113,12 +113,10 @@
   "outputs": [],
   "source": [
    "tokenizer = BloomTokenizerFast.from_pretrained(MODEL_NAME)\n",
-    "tokenizer.padding_side = 'right'\n",
+    "tokenizer.padding_side = \"right\"\n",
    "tokenizer.model_max_length = MODEL_MAX_LENGTH\n",
    "model = DistributedBloomForCausalLM.from_pretrained(\n",
-    "    MODEL_NAME,\n",
+    "    MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n",
    "    pre_seq_len=NUM_PREFIX_TOKENS, \n",
    "    tuning_mode=TUNING_MODE\n",
    ").to(DEVICE)"
   ]
  },
@ -150,17 +148,13 @@
    "\n",
    "\n",
    "def tokenize(examples):\n",
-    "    outputs = {\n",
+    "    outputs = {\"input_ids\": tokenizer(examples[\"chunks\"], padding=\"max_length\", truncation=True)[\"input_ids\"]}\n",
    "        \"input_ids\": tokenizer(examples[\"chunks\"], padding='max_length', truncation=True)[\"input_ids\"]\n",
    "    }\n",
    "    outputs[\"labels\"] = outputs[\"input_ids\"]\n",
    "    return outputs\n",
    "\n",
    "\n",
-    "tokenized_datasets = (\n",
+    "tokenized_datasets = dataset.map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names).map(\n",
-    "    dataset\n",
+    "    tokenize, batched=True, remove_columns=[\"chunks\"]\n",
    "        .map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names)\n",
    "        .map(tokenize, batched=True, remove_columns=[\"chunks\"])\n",
    ")\n",
    "\n",
    "\n",
@ -241,7 +235,7 @@
    "        \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n",
    "        \"model_name\": MODEL_NAME,\n",
    "        \"seed\": SEED,\n",
-    "    }\n",
+    "    },\n",
    ")\n",
    "\n",
    "for batch in tqdm(train_dataloader):\n",
@ -285,7 +279,7 @@
    "        user_phrase = input()\n",
    "        if len(user_phrase) == 0:\n",
    "            break\n",
-    "        inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors='pt')['input_ids'].to(DEVICE)\n",
+    "        inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors=\"pt\")[\"input_ids\"].to(DEVICE)\n",
    "        while True:\n",
    "            outputs = model.generate(\n",
    "                inputs,\n",
--- a/examples/prompt-tuning-sst2.ipynb
+++ b/examples/prompt-tuning-sst2.ipynb
@ -98,10 +98,10 @@
    "# The latter fine-tunes separate prefixes for each transformer block,\n",
    "# so prompt-tuning will take more time but yield better results.\n",
    "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
-    "TUNING_MODE = 'ptune'\n",
+    "TUNING_MODE = \"ptune\"\n",
    "\n",
    "NUM_PREFIX_TOKENS = 8\n",
-    "DEVICE = 'cuda'\n",
+    "DEVICE = \"cuda\"\n",
    "BATCH_SIZE = 32\n",
    "LR = 1e-2\n",
    "WEIGHT_DECAY = 0.0\n",
@ -130,14 +130,16 @@
   "outputs": [],
   "source": [
    "tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)\n",
-    "tokenizer.padding_side = 'right'\n",
+    "tokenizer.padding_side = \"right\"\n",
    "tokenizer.model_max_length = MODEL_MAX_LENGTH\n",
    "tokenizer.pad_token = tokenizer.unk_token\n",
-    "model = DistributedLlamaForSequenceClassification.from_pretrained(\n",
+    "model = (\n",
-    "    MODEL_NAME,\n",
+    "    DistributedLlamaForSequenceClassification.from_pretrained(\n",
-    "    pre_seq_len=NUM_PREFIX_TOKENS,\n",
+    "        MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n",
-    "    tuning_mode=TUNING_MODE\n",
+    "    )\n",
-    ").float().to(DEVICE)\n",
+    "    .float()\n",
    "    .to(DEVICE)\n",
    ")\n",
    "model.config.pad_token_id = tokenizer.pad_token_id"
   ]
  },
@ -160,12 +162,14 @@
   },
   "outputs": [],
   "source": [
-    "task = 'sst2'\n",
+    "task = \"sst2\"\n",
    "\n",
    "dataset = load_dataset(\"glue\", task)\n",
    "\n",
    "\n",
    "def preprocess_function(examples):\n",
-    "    return tokenizer(examples[\"sentence\"], padding='max_length', truncation=True, return_token_type_ids=False)\n",
+    "    return tokenizer(examples[\"sentence\"], padding=\"max_length\", truncation=True, return_token_type_ids=False)\n",
    "\n",
    "\n",
    "tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
    "tokenized_datasets = tokenized_datasets.remove_columns([\"sentence\", \"idx\", \"attention_mask\"])\n",
@ -198,9 +202,10 @@
   },
   "outputs": [],
   "source": [
-    "metric = load_metric('glue', task)\n",
+    "metric = load_metric(\"glue\", task)\n",
    "\n",
-    "def eval_metrics(model, dataloader, device='cpu'):\n",
+    "\n",
    "def eval_metrics(model, dataloader, device=\"cpu\"):\n",
    "    model.eval()\n",
    "    for batch in dataloader:\n",
    "        batch = {k: v.to(device) for k, v in batch.items()}\n",
@ -294,7 +299,7 @@
    "        \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n",
    "        \"model_name\": MODEL_NAME,\n",
    "        \"seed\": SEED,\n",
-    "    }\n",
+    "    },\n",
    ")\n",
    "\n",
    "scaler = torch.cuda.amp.GradScaler()\n",
@ -305,7 +310,7 @@
    "        batch = {k: v.to(DEVICE) for k, v in batch.items()}\n",
    "\n",
    "        with torch.autocast(device_type=DEVICE, dtype=torch.float16):\n",
-    "          outputs = model(**batch)\n",
+    "            outputs = model(**batch)\n",
    "        loss = outputs.loss\n",
    "        scaler.scale(loss).backward()\n",
    "\n",
--- a/get_peersl.sh
+++ b/get_peersl.sh
@ -0,0 +1 @@
 docker logs petals-backbone-1 2>&1  |grep initial_peers |cut "-d " -f18-  | sort -u > peers.txt
--- a/health.env
+++ b/health.env
@ -0,0 +1,3 @@
 INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
 PJRT_DEVICE=TPU
 DEVICE=xla
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 [tool.black]
 line-length = 120
-required-version = "22.3.0"
+required-version = "24.3.0"
 [tool.isort]
 profile = "black"
--- a/run.sh
+++ b/run.sh
@ -0,0 +1 @@
 PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10
--- a/run2.sh
+++ b/run2.sh
@ -0,0 +1 @@
 docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id 
--- a/src/petals/init.py
+++ b/src/petals/init.py
@ -17,13 +17,13 @@ from petals.models import *
 from petals.utils import *
 from petals.utils.logging import initialize_logs as _initialize_logs
-__version__ = "2.2.0"
+__version__ = "2.3.0.dev2"
-if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
+#if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
-    assert (
+#    assert (
-        version.parse("4.32.0") <= version.parse(transformers.__version__) < version.parse("5.0.0")
+#        version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
-    ), "Please install a proper transformers version: pip install transformers>=4.32.0,<5.0.0"
+#    ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"
 def _override_bfloat16_mode_default():
		`@ -0,0 +1,2 @@`
							`INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N`
							`DEVICE=cpu`
		`@ -0,0 +1 @@`
							`docker logs petals-backbone-1 2>&1 \|grep initial_peers \|cut "-d " -f18- \| sort -u > peers.txt`
		`@ -0,0 +1 @@`
							`PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10`
		`@ -0,0 +1 @@`
							`docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id`