infrastructure

pip freeze github actions and docker compose update update run update adding steps working as root run update update Add ARM Dockerfile Temporarily require peft<0.5.0, transformers<4.32.0 (#470) Peft 0.5 recently released and broke some compatilibities. This PR temporarily requires petals to use the previous stable version of peft while we work on 0.5.0 support. run update adding steps working as root petals inference moving to my org first docker compose move to our org format adding hive mind back into the setup reformat ipynb now to test main version using cpu now working. The health server now needs to wait for the others to come up but otherwise it is working. adding chat health adding new test run update Update run-tests-docker.yaml tpu starting running locally in cpu mode, now we have the basic directory structure for an env, still need to tag the items properly. versions building locally, same problem new env pip now loading update inference adding path .env now the .env file is passed in explicitly. This must be wrong somewhere. usage like this `sudo docker compose --profile core --env-file ./.env up`
1 month ago · c76e447ac7
parent 0ca54a5e76
commit c76e447ac7
22 changed files with 312 additions and 24 deletions
--- a/.github/workflows/check-style.yaml
+++ b/.github/workflows/check-style.yaml
@ -9,18 +9,19 @@ jobs:
  black:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
-      - uses: psf/black@stable
+      - uses: meta-introspector/checkout@main
+      - uses: meta-introspector/black@main
        with:
          options: "--check --diff"
          version: "22.3.0"
  isort:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
+      - uses: meta-introspector/checkout@main
+      - uses: meta-introspector/setup-python@main
        with:
          python-version: 3.8
-      - uses: isort/isort-action@master
+      - uses: meta-introspector/isort-action@main
        with:
          isortVersion: "5.10.1"
+
--- a/.github/workflows/push-docker-image.yaml
+++ b/.github/workflows/push-docker-image.yaml
@ -14,15 +14,15 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: meta-introspector/checkout@main

      - name: Docker meta
        id: meta
-        uses: crazy-max/ghaction-docker-meta@v2
+        uses: meta-introspector/metadata-action@main
        with:
          # list of Docker images to use as base name for tags
          images: |
-            learningathome/petals
+            h4ckermike/petals
          # generate Docker tags based on the following events/attributes
          tags: |
            type=ref,event=branch
@ -33,17 +33,17 @@ jobs:

      - name: Set up Docker Buildx
        id: buildx
-        uses: docker/setup-buildx-action@v1
+        uses: meta-introspector/setup-buildx-action@main

      - name: Login to Docker Hub
        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v1
+        uses: meta-introspector/login-action@main
        with:
          username: ${{ secrets.DOCKER_HUB_USERNAME }}
          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

      - name: Free disk space on Ubuntu runner
-        uses: kfir4444/free-disk-space@main
+        uses: meta-introspector/free-disk-space@main
        with:
          # found in: https://github.com/docker/build-push-action/issues/968
          tool-cache: false
@ -55,7 +55,7 @@ jobs:

      - name: Build and push
        id: docker_build
-        uses: docker/build-push-action@v2
+        uses: meta-introspector/build-push-action@main
        with:
          context: .
          push: ${{ github.event_name != 'pull_request' }}
--- a/.github/workflows/run-tests-docker.yaml
+++ b/.github/workflows/run-tests-docker.yaml
@ -0,0 +1,36 @@
+name: Tests in docker compose
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+
+jobs:
+  run-tests-in-compose:
+   # runs-on: ubuntu-latest    
+    runs-on: self-hosted
+    timeout-minutes: 20
+    steps:
+      - name: Increase swap space
+        if: ${{ matrix.os == 'ubuntu' }}
+        uses: meta-introspector/set-swap-space@main
+        with:
+          swap-size-gb: 10
+      - name: Checkout
+        uses: meta-introspector/checkout@main
+
+      - name: Build the docker-compose stack
+        run: docker-compose -f docker-compose.yml up -d
+        
+      - name: Check running containers
+        run: docker ps -a
+        
+      - name: Check logs
+        run: docker logs health
+        
+      - name: Build the docker-compose stack
+        run: docker-compose down 
+        
+
+  
+  
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@ -24,17 +24,17 @@ jobs:
    steps:
      - name: Increase swap space
        if: ${{ matrix.os == 'ubuntu' }}
-        uses: pierotofy/set-swap-space@master
+        uses: meta-introspector/set-swap-space@main
        with:
          swap-size-gb: 10
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: meta-introspector/checkout@main
      - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: meta-introspector/setup-python@main
        with:
          python-version: ${{ matrix.python-version }}
      - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: meta-introspector/cache@main
        with:
          path: ~/.cache/pip
          key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }}
--- a/14
+++ b/14
@ -1,5 +1,5 @@
 FROM nvcr.io/nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04
-LABEL maintainer="bigscience-workshop"
+LABEL maintainer="meta-introspector"
 LABEL repository="petals"

 WORKDIR /home
@ -23,9 +23,17 @@ RUN conda install python~=3.10.12 pip && \

 VOLUME /cache
 ENV PETALS_CACHE=/cache
+COPY pip.freeze petals/pip.freeze
+RUN pip install --no-cache-dir -r petals/pip.freeze
+COPY pip.freeze2 petals/pip.freeze2
+RUN pip install --no-cache-dir -r petals/pip.freeze2

-COPY . petals/
+
+ADD tests petals/tests
+ADD LICENSE  README.md pyproject.toml setup.cfg petals/
+ADD src petals/src
 RUN pip install --no-cache-dir -e petals
+RUN pip install --no-cache-dir --upgrade transformers==4.34.0

 WORKDIR /home/petals/
-CMD bash
+CMD python -m petals.cli.run_server --port 31331  --num_blocks=1 Maykeye/TinyLLama-v0 
--- a/README.md
+++ b/README.md
@ -144,3 +144,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.
 <p align="center">
    <img src="https://petals.dev/bigscience.png" width="150">
 </p>
+
+
+# setup
+
+
+ 1623  sudo cp petals-inference.service /etc/systemd/system/
+ 1634  sudo systemctl daemon-reload
+ 1635  sudo systemctl status petals-inference.service -l
+ 1636  sudo systemctl restart petals-inference.service -l
+
+ 1639  sudo useradd petals
+ 1640  sudo mkdir /home/petals
+ 1641  sudo chown petals: /home/petals/
+ 1643  sudo cp -r ~/.venv/ /home/petals/venv
+ 1644  sudo rm -rf /home/petals/venv
+ 1658  sudo mv ~/.venv/ /home/petals/venv
+ 1659  sudo chown petals: /home/petals/
+
+1670  sudo systemctl status petals-inference.service -l
+ 1674  sudo systemctl restart petals-inference.service -l
+ 1675  sudo systemctl status petals-inference.service -l
+ 
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,116 @@
+services:
+  backbone:
+    profiles: ["core"]
+    image: h4ckermike/petals:main
+    command: python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8008 --identity_path /cache/bootstrap1.id
+    volumes:
+      - petals-cache-backbone:/cache
+    network_mode: host
+    ipc: host
+    restart: unless-stopped
+    env_file: ./.env
+    
+  health:
+    profiles: ["core"]
+    restart: always
+    depends_on:
+     - backbone
+    image: h4ckermike/health.petals:main
+    ports:
+      - "8009:5000"
+    command: flask run --host=0.0.0.0 --port=5000
+    env_file: ./.env
+
+  tinyllama_local_gpu:
+    profiles: ["local","gpu"]
+   #    image: h4ckermike/petals:main
+    build : .
+    depends_on:
+       - backbone
+    ports:
+      - "31336:31336"
+    command: python -m petals.cli.run_server --port 31336  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: always
+    env_file: ./.env
+    
+  tinyllama_local_cpu:
+    profiles: ["local","cpu"]
+    build: .
+    depends_on:
+       - backbone
+    command: python -m petals.cli.run_server --port 31331  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    ports:
+      - "31331:31331"
+    restart: always
+    env_file: ./.env
+    
+  tinyllamacpu:
+    profiles: ["tinyllama","cpu"]
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone
+    command: python -m petals.cli.run_server --port 31331  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    ports:
+      - "31331:31331"
+    restart: always
+    env_file: ./.env
+    
+  tinyllamagpu:
+    profiles: ["core"]
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone
+    ports:
+      - "31332:31332"
+    command: python -m petals.cli.run_server --port 31332  --num_blocks=100 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: always
+    env_file: ./.env
+    
+  tinyllamatpu:
+    profiles: ["tpu"]
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone
+    ports:
+      - "31333:31333"
+    command: python -m petals.cli.run_server --port 31333  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    restart: always
+    env_file: ./.env
+
+  debug_health:
+    profiles: ["debug"]
+
+    image: h4ckermike/health.petals:main
+    command: bash
+    stdin_open: true
+    tty: true
+
+  inference   :
+    profiles: ["core"]
+    restart: always
+    depends_on:
+      - backbone
+#    image: h4ckermike/inference.petals:main
+    image: petals-inference-test
+    ports:
+      - "8010:5000"
+    env_file: ./.env
+    command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000
+
+
+volumes:
+  petals-cache-backbone:
--- a/envs/cpu/is1/env.txt
+++ b/envs/cpu/is1/env.txt
@ -0,0 +1,2 @@
+INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
+DEVICE=cpu
--- a/envs/gpu/h100.txt
+++ b/envs/gpu/h100.txt
@ -0,0 +1,2 @@
+INITIAL_PEERS=/ip4/216.81.245.26/tcp/8008/p2p/QmR4PcZvHg414Q2HNEaQZLiu69HD4Vs17hcwDgq8qJdJq2
+DEVICE=cuda
--- a/envs/gpu/h100/peers.txt
+++ b/envs/gpu/h100/peers.txt
@ -0,0 +1 @@
+/ip4/216.81.245.26/tcp/8099/p2p/QmR4PcZvHg414Q2HNEaQZLiu69HD4Vs17hcwDgq8qJdJq2
--- a/envs/gpu/is1/env.txt
+++ b/envs/gpu/is1/env.txt
@ -0,0 +1,5 @@
+INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
+#PJRT_DEVICE=TPU
+DEVICE=cuda
+#DEVICE=cpu
+#DEVICE=tpux
--- a/envs/tpu/v3-0/env.txt
+++ b/envs/tpu/v3-0/env.txt
@ -0,0 +1,4 @@
+INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
+#PJRT_DEVICE=TPU
+#DEVICE=xla
+DEVICE=cpu
--- a/etc/petals-inference.service
+++ b/etc/petals-inference.service
@ -0,0 +1,11 @@
+[Unit]
+Description=Petals Inference
+
+[Service]
+User=petals
+Group=petals
+Environment=PJRT_DEVICE=TPU
+ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4
+
+[Install]
+WantedBy=multi-user.target
--- a/get_peersl.sh
+++ b/get_peersl.sh
@ -0,0 +1 @@
+docker logs petals-backbone-1 2>&1  |grep initial_peers |cut "-d " -f18-  | sort -u > peers.txt
--- a/health.env
+++ b/health.env
@ -0,0 +1,3 @@
+INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
+PJRT_DEVICE=TPU
+DEVICE=xla
--- a/pip.freeze
+++ b/pip.freeze
@ -0,0 +1,70 @@
+accelerate==0.29.2
+async-timeout==4.0.3
+base58==2.1.1
+bitsandbytes==0.41.1
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+ConfigArgParse==1.7
+cpufeature==0.2.1
+cryptography==42.0.5
+Dijkstar==2.6.0
+filelock==3.13.4
+fsspec==2024.3.1
+grpcio==1.62.1
+grpcio-tools==1.62.1
+hivemind==1.1.10.post2
+#huggingface-hub==0.22.2
+humanfriendly==10.0
+idna==3.7
+Jinja2==3.1.3
+MarkupSafe==2.1.5
+mpmath==1.3.0
+msgpack==1.0.8
+multiaddr==0.0.9
+netaddr==1.2.1
+networkx==3.3
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.1.105
+packaging==24.0
+peft==0.5.0
+prefetch-generator==1.0.3
+protobuf==4.25.3
+psutil==5.9.8
+pycparser==2.22
+pydantic==1.10.15
+pymultihash==0.8.2
+PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+safetensors==0.4.3
+scipy==1.13.0
+sentencepiece==0.2.0
+six==1.16.0
+sortedcontainers==2.4.0
+speedtest-cli==2.1.3
+sympy==1.12
+tensor-parallel==1.0.23
+
+torch==2.2.2
+tqdm==4.66.2
+#transformers==4.38.2
+
+triton==2.2.0
+typing_extensions==4.11.0
+urllib3==2.2.1
+uvloop==0.19.0
+varint==1.0.2
+
+
--- a/pip.freeze2
+++ b/pip.freeze2
@ -0,0 +1,3 @@
+transformers==4.34.0
+tokenizers>=0.14,<0.15
+huggingface_hub>=0.16.4,<0.18
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

 [tool.black]
 line-length = 120
-required-version = "22.3.0"
+required-version = "24.3.0"

 [tool.isort]
 profile = "black"
--- a/run.sh
+++ b/run.sh
@ -0,0 +1 @@
+PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10
--- a/run2.sh
+++ b/run2.sh
@ -0,0 +1 @@
+docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id 
--- a/src/petals/init.py
+++ b/src/petals/init.py
@ -20,10 +20,10 @@ from petals.utils.logging import initialize_logs as _initialize_logs
 __version__ = "2.3.0.dev2"


-if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
-    assert (
-        version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
-    ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"
+#if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
+#    assert (
+#        version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
+#    ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"


 def _override_bfloat16_mode_default():
--- a/up.sh
+++ b/up.sh
@ -0,0 +1 @@
+sudo docker compose  --profile core --env-file ./envs/gpu/h100.txt up health
				`@ -0,0 +1 @@`
				`/ip4/216.81.245.26/tcp/8099/p2p/QmR4PcZvHg414Q2HNEaQZLiu69HD4Vs17hcwDgq8qJdJq2`
				`@ -0,0 +1 @@`
				`docker logs petals-backbone-1 2>&1 \|grep initial_peers \|cut "-d " -f18- \| sort -u > peers.txt`
				`@ -0,0 +1 @@`
				`PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10`
				`@ -0,0 +1 @@`
				`docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id`
				`@ -0,0 +1 @@`
				`sudo docker compose --profile core --env-file ./envs/gpu/h100.txt up health`