github actions and docker compose

update update run update adding steps working as root run update update Add ARM Dockerfile Temporarily require peft<0.5.0, transformers<4.32.0 (#470) Peft 0.5 recently released and broke some compatilibities. This PR temporarily requires petals to use the previous stable version of peft while we work on 0.5.0 support. run update adding steps working as root petals inference moving to my org first docker compose move to our org format adding hive mind back into the setup reformat ipynb now to test main version using cpu now working. The health server now needs to wait for the others to come up but otherwise it is working. adding chat health adding new test run update Update run-tests-docker.yaml tpu starting running locally in cpu mode, now we have the basic directory structure for an env, still need to tag the items properly. versions
2 months ago · 1222e172ef
parent 0fda7da816
commit 1222e172ef
18 changed files with 258 additions and 50 deletions
--- a/.github/workflows/check-style.yaml
+++ b/.github/workflows/check-style.yaml
@ -9,18 +9,19 @@ jobs:
  black:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
-      - uses: psf/black@stable
+      - uses: meta-introspector/checkout@main
+      - uses: meta-introspector/black@main
        with:
          options: "--check --diff"
          version: "22.3.0"
  isort:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
+      - uses: meta-introspector/checkout@main
+      - uses: meta-introspector/setup-python@main
        with:
          python-version: 3.8
-      - uses: isort/isort-action@master
+      - uses: meta-introspector/isort-action@main
        with:
          isortVersion: "5.10.1"
+
--- a/.github/workflows/push-docker-image.yaml
+++ b/.github/workflows/push-docker-image.yaml
@ -14,15 +14,15 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: meta-introspector/checkout@main

      - name: Docker meta
        id: meta
-        uses: crazy-max/ghaction-docker-meta@v2
+        uses: meta-introspector/metadata-action@main
        with:
          # list of Docker images to use as base name for tags
          images: |
-            learningathome/petals
+            h4ckermike/petals
          # generate Docker tags based on the following events/attributes
          tags: |
            type=ref,event=branch
@ -33,18 +33,29 @@ jobs:

      - name: Set up Docker Buildx
        id: buildx
-        uses: docker/setup-buildx-action@v1
+        uses: meta-introspector/setup-buildx-action@main

      - name: Login to Docker Hub
        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v1
+        uses: meta-introspector/login-action@main
        with:
          username: ${{ secrets.DOCKER_HUB_USERNAME }}
          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

+      - name: Free disk space on Ubuntu runner
+        uses: meta-introspector/free-disk-space@main
+        with:
+          # found in: https://github.com/docker/build-push-action/issues/968
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
      - name: Build and push
        id: docker_build
-        uses: docker/build-push-action@v2
+        uses: meta-introspector/build-push-action@main
        with:
          context: .
          push: ${{ github.event_name != 'pull_request' }}
--- a/.github/workflows/run-tests-docker.yaml
+++ b/.github/workflows/run-tests-docker.yaml
@ -0,0 +1,36 @@
+name: Tests in docker compose
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+
+jobs:
+  run-tests-in-compose:
+   # runs-on: ubuntu-latest    
+    runs-on: self-hosted
+    timeout-minutes: 20
+    steps:
+      - name: Increase swap space
+        if: ${{ matrix.os == 'ubuntu' }}
+        uses: meta-introspector/set-swap-space@main
+        with:
+          swap-size-gb: 10
+      - name: Checkout
+        uses: meta-introspector/checkout@main
+
+      - name: Build the docker-compose stack
+        run: docker-compose -f docker-compose.yml up -d
+        
+      - name: Check running containers
+        run: docker ps -a
+        
+      - name: Check logs
+        run: docker logs health
+        
+      - name: Build the docker-compose stack
+        run: docker-compose down 
+        
+
+  
+  
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@ -24,17 +24,17 @@ jobs:
    steps:
      - name: Increase swap space
        if: ${{ matrix.os == 'ubuntu' }}
-        uses: pierotofy/set-swap-space@master
+        uses: meta-introspector/set-swap-space@main
        with:
          swap-size-gb: 10
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: meta-introspector/checkout@main
      - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: meta-introspector/setup-python@main
        with:
          python-version: ${{ matrix.python-version }}
      - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: meta-introspector/cache@main
        with:
          path: ~/.cache/pip
          key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }}
--- a/README.md
+++ b/README.md
@ -229,3 +229,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.
 <p align="center">
    <img src="https://petals.dev/bigscience.png" width="150">
 </p>
+
+
+# setup
+
+
+ 1623  sudo cp petals-inference.service /etc/systemd/system/
+ 1634  sudo systemctl daemon-reload
+ 1635  sudo systemctl status petals-inference.service -l
+ 1636  sudo systemctl restart petals-inference.service -l
+
+ 1639  sudo useradd petals
+ 1640  sudo mkdir /home/petals
+ 1641  sudo chown petals: /home/petals/
+ 1643  sudo cp -r ~/.venv/ /home/petals/venv
+ 1644  sudo rm -rf /home/petals/venv
+ 1658  sudo mv ~/.venv/ /home/petals/venv
+ 1659  sudo chown petals: /home/petals/
+
+1670  sudo systemctl status petals-inference.service -l
+ 1674  sudo systemctl restart petals-inference.service -l
+ 1675  sudo systemctl status petals-inference.service -l
+ 
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,111 @@
+#version: "3"
+
+services:
+
+  health:
+    restart: always
+    depends_on:
+     - backbone
+    image: h4ckermike/health.petals:main
+    ports:
+      - "8100:5000"
+    env_file: health.env
+    command: flask run --host=0.0.0.0 --port=5000
+
+  inference   :
+    restart: always
+    depends_on:
+      - backbone
+    image: h4ckermike/inference.petals:main
+    ports:
+      - "8000:5000"
+    env_file: health.env
+    command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000
+
+  tinyllamacpu:
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone       
+    command: python -m petals.cli.run_server --port 31331  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    ports:
+      - "31331:31331"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: always
+
+  tinyllamagpu:
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone
+    ports:
+      - "31332:31332"
+    command: python -m petals.cli.run_server --port 31332  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: always
+
+  tinyllamatpu:
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone
+    ports:
+      - "31333:31333"
+    command: python -m petals.cli.run_server --port 31333  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
+    restart: always
+
+    # beluga:
+  #   image: h4ckermike/petals:main
+  #   depends_on:
+  #      - backbone
+  #   deploy:
+  #     resources:
+  #       reservations:
+  #         devices:
+  #           - driver: nvidia
+  #             count: 1
+  #             capabilities: [gpu]
+  #   ports:
+  #     - "31330:31330"
+  #   restart: always
+
+  backbone:
+    image: h4ckermike/petals:main
+    command: python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path /cache/bootstrap1.id
+    volumes:
+      - petals-cache-backbone:/cache
+    network_mode: host
+    ipc: host
+    restart: unless-stopped
+    env_file: health.env
+    
+  #   # DEbug target
+  # debug_health:
+  #   #environment:
+
+  #   env_file: health.env
+  #   image: h4ckermike/health.petals:main
+  #   command: bash
+  #   stdin_open: true
+  #   tty: true
+
+
+volumes:
+  petals-cache-backbone:
--- a/envs/cpu/is1/env.txt
+++ b/envs/cpu/is1/env.txt
@ -0,0 +1,2 @@
+INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
+DEVICE=cpu
--- a/envs/gpu/is1/env.txt
+++ b/envs/gpu/is1/env.txt
@ -0,0 +1,5 @@
+INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
+#PJRT_DEVICE=TPU
+DEVICE=cuda
+#DEVICE=cpu
+#DEVICE=tpux
--- a/envs/tpu/v3-0/env.txt
+++ b/envs/tpu/v3-0/env.txt
@ -0,0 +1,4 @@
+INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
+#PJRT_DEVICE=TPU
+#DEVICE=xla
+DEVICE=cpu
--- a/etc/petals-inference.service
+++ b/etc/petals-inference.service
@ -0,0 +1,11 @@
+[Unit]
+Description=Petals Inference
+
+[Service]
+User=petals
+Group=petals
+Environment=PJRT_DEVICE=TPU
+ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4
+
+[Install]
+WantedBy=multi-user.target
--- a/examples/prompt-tuning-personachat.ipynb
+++ b/examples/prompt-tuning-personachat.ipynb
@ -85,10 +85,10 @@
    "# The latter fine-tunes separate prefixes for each transformer block,\n",
    "# so prompt-tuning will take more time but yield better results.\n",
    "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
-    "TUNING_MODE = 'ptune'\n",
+    "TUNING_MODE = \"ptune\"\n",
    "\n",
    "NUM_PREFIX_TOKENS = 16\n",
-    "DEVICE = 'cuda'\n",
+    "DEVICE = \"cuda\"\n",
    "BATCH_SIZE = 8\n",
    "LR = 1e-2\n",
    "WEIGHT_DECAY = 0.0\n",
@ -113,12 +113,10 @@
   "outputs": [],
   "source": [
    "tokenizer = BloomTokenizerFast.from_pretrained(MODEL_NAME)\n",
-    "tokenizer.padding_side = 'right'\n",
+    "tokenizer.padding_side = \"right\"\n",
    "tokenizer.model_max_length = MODEL_MAX_LENGTH\n",
    "model = DistributedBloomForCausalLM.from_pretrained(\n",
-    "    MODEL_NAME,\n",
-    "    pre_seq_len=NUM_PREFIX_TOKENS, \n",
-    "    tuning_mode=TUNING_MODE\n",
+    "    MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n",
    ").to(DEVICE)"
   ]
  },
@ -150,17 +148,13 @@
    "\n",
    "\n",
    "def tokenize(examples):\n",
-    "    outputs = {\n",
-    "        \"input_ids\": tokenizer(examples[\"chunks\"], padding='max_length', truncation=True)[\"input_ids\"]\n",
-    "    }\n",
+    "    outputs = {\"input_ids\": tokenizer(examples[\"chunks\"], padding=\"max_length\", truncation=True)[\"input_ids\"]}\n",
    "    outputs[\"labels\"] = outputs[\"input_ids\"]\n",
    "    return outputs\n",
    "\n",
    "\n",
-    "tokenized_datasets = (\n",
-    "    dataset\n",
-    "        .map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names)\n",
-    "        .map(tokenize, batched=True, remove_columns=[\"chunks\"])\n",
+    "tokenized_datasets = dataset.map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names).map(\n",
+    "    tokenize, batched=True, remove_columns=[\"chunks\"]\n",
    ")\n",
    "\n",
    "\n",
@ -241,7 +235,7 @@
    "        \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n",
    "        \"model_name\": MODEL_NAME,\n",
    "        \"seed\": SEED,\n",
-    "    }\n",
+    "    },\n",
    ")\n",
    "\n",
    "for batch in tqdm(train_dataloader):\n",
@ -285,7 +279,7 @@
    "        user_phrase = input()\n",
    "        if len(user_phrase) == 0:\n",
    "            break\n",
-    "        inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors='pt')['input_ids'].to(DEVICE)\n",
+    "        inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors=\"pt\")[\"input_ids\"].to(DEVICE)\n",
    "        while True:\n",
    "            outputs = model.generate(\n",
    "                inputs,\n",
--- a/examples/prompt-tuning-sst2.ipynb
+++ b/examples/prompt-tuning-sst2.ipynb
@ -98,10 +98,10 @@
    "# The latter fine-tunes separate prefixes for each transformer block,\n",
    "# so prompt-tuning will take more time but yield better results.\n",
    "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
-    "TUNING_MODE = 'ptune'\n",
+    "TUNING_MODE = \"ptune\"\n",
    "\n",
    "NUM_PREFIX_TOKENS = 8\n",
-    "DEVICE = 'cuda'\n",
+    "DEVICE = \"cuda\"\n",
    "BATCH_SIZE = 32\n",
    "LR = 1e-2\n",
    "WEIGHT_DECAY = 0.0\n",
@ -130,14 +130,16 @@
   "outputs": [],
   "source": [
    "tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)\n",
-    "tokenizer.padding_side = 'right'\n",
+    "tokenizer.padding_side = \"right\"\n",
    "tokenizer.model_max_length = MODEL_MAX_LENGTH\n",
    "tokenizer.pad_token = tokenizer.unk_token\n",
-    "model = DistributedLlamaForSequenceClassification.from_pretrained(\n",
-    "    MODEL_NAME,\n",
-    "    pre_seq_len=NUM_PREFIX_TOKENS,\n",
-    "    tuning_mode=TUNING_MODE\n",
-    ").float().to(DEVICE)\n",
+    "model = (\n",
+    "    DistributedLlamaForSequenceClassification.from_pretrained(\n",
+    "        MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n",
+    "    )\n",
+    "    .float()\n",
+    "    .to(DEVICE)\n",
+    ")\n",
    "model.config.pad_token_id = tokenizer.pad_token_id"
   ]
  },
@ -160,12 +162,14 @@
   },
   "outputs": [],
   "source": [
-    "task = 'sst2'\n",
+    "task = \"sst2\"\n",
    "\n",
    "dataset = load_dataset(\"glue\", task)\n",
    "\n",
+    "\n",
    "def preprocess_function(examples):\n",
-    "    return tokenizer(examples[\"sentence\"], padding='max_length', truncation=True, return_token_type_ids=False)\n",
+    "    return tokenizer(examples[\"sentence\"], padding=\"max_length\", truncation=True, return_token_type_ids=False)\n",
+    "\n",
    "\n",
    "tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
    "tokenized_datasets = tokenized_datasets.remove_columns([\"sentence\", \"idx\", \"attention_mask\"])\n",
@ -198,9 +202,10 @@
   },
   "outputs": [],
   "source": [
-    "metric = load_metric('glue', task)\n",
+    "metric = load_metric(\"glue\", task)\n",
    "\n",
-    "def eval_metrics(model, dataloader, device='cpu'):\n",
+    "\n",
+    "def eval_metrics(model, dataloader, device=\"cpu\"):\n",
    "    model.eval()\n",
    "    for batch in dataloader:\n",
    "        batch = {k: v.to(device) for k, v in batch.items()}\n",
@ -294,7 +299,7 @@
    "        \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n",
    "        \"model_name\": MODEL_NAME,\n",
    "        \"seed\": SEED,\n",
-    "    }\n",
+    "    },\n",
    ")\n",
    "\n",
    "scaler = torch.cuda.amp.GradScaler()\n",
@ -305,7 +310,7 @@
    "        batch = {k: v.to(DEVICE) for k, v in batch.items()}\n",
    "\n",
    "        with torch.autocast(device_type=DEVICE, dtype=torch.float16):\n",
-    "          outputs = model(**batch)\n",
+    "            outputs = model(**batch)\n",
    "        loss = outputs.loss\n",
    "        scaler.scale(loss).backward()\n",
    "\n",
--- a/get_peersl.sh
+++ b/get_peersl.sh
@ -0,0 +1 @@
+docker logs petals-backbone-1 2>&1  |grep initial_peers |cut "-d " -f18-  | sort -u > peers.txt
--- a/health.env
+++ b/health.env
@ -0,0 +1,3 @@
+INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
+PJRT_DEVICE=TPU
+DEVICE=xla
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

 [tool.black]
 line-length = 120
-required-version = "22.3.0"
+required-version = "24.3.0"

 [tool.isort]
 profile = "black"
--- a/run.sh
+++ b/run.sh
@ -0,0 +1 @@
+PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10
--- a/run2.sh
+++ b/run2.sh
@ -0,0 +1 @@
+docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id 
--- a/src/petals/init.py
+++ b/src/petals/init.py
@ -17,13 +17,13 @@ from petals.models import *
 from petals.utils import *
 from petals.utils.logging import initialize_logs as _initialize_logs

-__version__ = "2.2.0"
+__version__ = "2.3.0.dev2"


-if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
-    assert (
-        version.parse("4.32.0") <= version.parse(transformers.__version__) < version.parse("5.0.0")
-    ), "Please install a proper transformers version: pip install transformers>=4.32.0,<5.0.0"
+#if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
+#    assert (
+#        version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
+#    ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"


 def _override_bfloat16_mode_default():
				`@ -0,0 +1 @@`
				`docker logs petals-backbone-1 2>&1 \|grep initial_peers \|cut "-d " -f18- \| sort -u > peers.txt`
				`@ -0,0 +1 @@`
				`PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10`
				`@ -0,0 +1 @@`
				`docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id`