infrastructure

pip freeze

github actions and docker compose

update

update

run

update

adding steps

working as root

run

update

update

Add ARM Dockerfile

Temporarily require peft<0.5.0, transformers<4.32.0 (#470)

Peft 0.5 recently released and broke some compatilibities. This PR temporarily requires petals to use the previous stable version of peft while we work on 0.5.0 support.

run

update

adding steps

working as root

petals inference

moving to my org

first docker compose

move to our org

format

adding hive mind back into the setup

reformat ipynb

now to test main version

using cpu

now working.

The health server now needs to wait for the others to come up but
otherwise it is working.

adding chat

health

adding new test

run

update

Update run-tests-docker.yaml

tpu starting

running locally in cpu mode,

now we have the basic directory structure for an env, still need to
tag the items properly.

versions

building locally, same problem

new env

pip

now loading

update

inference

adding path

.env

now the .env file is passed in explicitly. This must be wrong
somewhere.

usage like this `sudo docker compose  --profile core --env-file ./.env up`
pull/579/head
j mike dupont 1 month ago committed by mike dupont
parent 0ca54a5e76
commit c76e447ac7

@ -9,18 +9,19 @@ jobs:
black:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: psf/black@stable
- uses: meta-introspector/checkout@main
- uses: meta-introspector/black@main
with:
options: "--check --diff"
version: "22.3.0"
isort:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: meta-introspector/checkout@main
- uses: meta-introspector/setup-python@main
with:
python-version: 3.8
- uses: isort/isort-action@master
- uses: meta-introspector/isort-action@main
with:
isortVersion: "5.10.1"

@ -14,15 +14,15 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
uses: meta-introspector/checkout@main
- name: Docker meta
id: meta
uses: crazy-max/ghaction-docker-meta@v2
uses: meta-introspector/metadata-action@main
with:
# list of Docker images to use as base name for tags
images: |
learningathome/petals
h4ckermike/petals
# generate Docker tags based on the following events/attributes
tags: |
type=ref,event=branch
@ -33,17 +33,17 @@ jobs:
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1
uses: meta-introspector/setup-buildx-action@main
- name: Login to Docker Hub
if: github.event_name != 'pull_request'
uses: docker/login-action@v1
uses: meta-introspector/login-action@main
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Free disk space on Ubuntu runner
uses: kfir4444/free-disk-space@main
uses: meta-introspector/free-disk-space@main
with:
# found in: https://github.com/docker/build-push-action/issues/968
tool-cache: false
@ -55,7 +55,7 @@ jobs:
- name: Build and push
id: docker_build
uses: docker/build-push-action@v2
uses: meta-introspector/build-push-action@main
with:
context: .
push: ${{ github.event_name != 'pull_request' }}

@ -0,0 +1,36 @@
name: Tests in docker compose
on:
push:
branches: [ main ]
pull_request:
jobs:
run-tests-in-compose:
# runs-on: ubuntu-latest
runs-on: self-hosted
timeout-minutes: 20
steps:
- name: Increase swap space
if: ${{ matrix.os == 'ubuntu' }}
uses: meta-introspector/set-swap-space@main
with:
swap-size-gb: 10
- name: Checkout
uses: meta-introspector/checkout@main
- name: Build the docker-compose stack
run: docker-compose -f docker-compose.yml up -d
- name: Check running containers
run: docker ps -a
- name: Check logs
run: docker logs health
- name: Build the docker-compose stack
run: docker-compose down

@ -24,17 +24,17 @@ jobs:
steps:
- name: Increase swap space
if: ${{ matrix.os == 'ubuntu' }}
uses: pierotofy/set-swap-space@master
uses: meta-introspector/set-swap-space@main
with:
swap-size-gb: 10
- name: Checkout
uses: actions/checkout@v3
uses: meta-introspector/checkout@main
- name: Set up Python
uses: actions/setup-python@v3
uses: meta-introspector/setup-python@main
with:
python-version: ${{ matrix.python-version }}
- name: Cache dependencies
uses: actions/cache@v3
uses: meta-introspector/cache@main
with:
path: ~/.cache/pip
key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }}

@ -1,5 +1,5 @@
FROM nvcr.io/nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04
LABEL maintainer="bigscience-workshop"
LABEL maintainer="meta-introspector"
LABEL repository="petals"
WORKDIR /home
@ -23,9 +23,17 @@ RUN conda install python~=3.10.12 pip && \
VOLUME /cache
ENV PETALS_CACHE=/cache
COPY pip.freeze petals/pip.freeze
RUN pip install --no-cache-dir -r petals/pip.freeze
COPY pip.freeze2 petals/pip.freeze2
RUN pip install --no-cache-dir -r petals/pip.freeze2
COPY . petals/
ADD tests petals/tests
ADD LICENSE README.md pyproject.toml setup.cfg petals/
ADD src petals/src
RUN pip install --no-cache-dir -e petals
RUN pip install --no-cache-dir --upgrade transformers==4.34.0
WORKDIR /home/petals/
CMD bash
CMD python -m petals.cli.run_server --port 31331 --num_blocks=1 Maykeye/TinyLLama-v0

@ -144,3 +144,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.
<p align="center">
<img src="https://petals.dev/bigscience.png" width="150">
</p>
# setup
1623 sudo cp petals-inference.service /etc/systemd/system/
1634 sudo systemctl daemon-reload
1635 sudo systemctl status petals-inference.service -l
1636 sudo systemctl restart petals-inference.service -l
1639 sudo useradd petals
1640 sudo mkdir /home/petals
1641 sudo chown petals: /home/petals/
1643 sudo cp -r ~/.venv/ /home/petals/venv
1644 sudo rm -rf /home/petals/venv
1658 sudo mv ~/.venv/ /home/petals/venv
1659 sudo chown petals: /home/petals/
1670 sudo systemctl status petals-inference.service -l
1674 sudo systemctl restart petals-inference.service -l
1675 sudo systemctl status petals-inference.service -l

@ -0,0 +1,116 @@
services:
backbone:
profiles: ["core"]
image: h4ckermike/petals:main
command: python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8008 --identity_path /cache/bootstrap1.id
volumes:
- petals-cache-backbone:/cache
network_mode: host
ipc: host
restart: unless-stopped
env_file: ./.env
health:
profiles: ["core"]
restart: always
depends_on:
- backbone
image: h4ckermike/health.petals:main
ports:
- "8009:5000"
command: flask run --host=0.0.0.0 --port=5000
env_file: ./.env
tinyllama_local_gpu:
profiles: ["local","gpu"]
# image: h4ckermike/petals:main
build : .
depends_on:
- backbone
ports:
- "31336:31336"
command: python -m petals.cli.run_server --port 31336 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: always
env_file: ./.env
tinyllama_local_cpu:
profiles: ["local","cpu"]
build: .
depends_on:
- backbone
command: python -m petals.cli.run_server --port 31331 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE
ports:
- "31331:31331"
restart: always
env_file: ./.env
tinyllamacpu:
profiles: ["tinyllama","cpu"]
image: h4ckermike/petals:main
depends_on:
- backbone
command: python -m petals.cli.run_server --port 31331 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE
ports:
- "31331:31331"
restart: always
env_file: ./.env
tinyllamagpu:
profiles: ["core"]
image: h4ckermike/petals:main
depends_on:
- backbone
ports:
- "31332:31332"
command: python -m petals.cli.run_server --port 31332 --num_blocks=100 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: always
env_file: ./.env
tinyllamatpu:
profiles: ["tpu"]
image: h4ckermike/petals:main
depends_on:
- backbone
ports:
- "31333:31333"
command: python -m petals.cli.run_server --port 31333 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE
restart: always
env_file: ./.env
debug_health:
profiles: ["debug"]
image: h4ckermike/health.petals:main
command: bash
stdin_open: true
tty: true
inference :
profiles: ["core"]
restart: always
depends_on:
- backbone
# image: h4ckermike/inference.petals:main
image: petals-inference-test
ports:
- "8010:5000"
env_file: ./.env
command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000
volumes:
petals-cache-backbone:

@ -0,0 +1,2 @@
INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
DEVICE=cpu

@ -0,0 +1,2 @@
INITIAL_PEERS=/ip4/216.81.245.26/tcp/8008/p2p/QmR4PcZvHg414Q2HNEaQZLiu69HD4Vs17hcwDgq8qJdJq2
DEVICE=cuda

@ -0,0 +1 @@
/ip4/216.81.245.26/tcp/8099/p2p/QmR4PcZvHg414Q2HNEaQZLiu69HD4Vs17hcwDgq8qJdJq2

@ -0,0 +1,5 @@
INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
#PJRT_DEVICE=TPU
DEVICE=cuda
#DEVICE=cpu
#DEVICE=tpux

@ -0,0 +1,4 @@
INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
#PJRT_DEVICE=TPU
#DEVICE=xla
DEVICE=cpu

@ -0,0 +1,11 @@
[Unit]
Description=Petals Inference
[Service]
User=petals
Group=petals
Environment=PJRT_DEVICE=TPU
ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4
[Install]
WantedBy=multi-user.target

@ -0,0 +1 @@
docker logs petals-backbone-1 2>&1 |grep initial_peers |cut "-d " -f18- | sort -u > peers.txt

@ -0,0 +1,3 @@
INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
PJRT_DEVICE=TPU
DEVICE=xla

@ -0,0 +1,70 @@
accelerate==0.29.2
async-timeout==4.0.3
base58==2.1.1
bitsandbytes==0.41.1
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
ConfigArgParse==1.7
cpufeature==0.2.1
cryptography==42.0.5
Dijkstar==2.6.0
filelock==3.13.4
fsspec==2024.3.1
grpcio==1.62.1
grpcio-tools==1.62.1
hivemind==1.1.10.post2
#huggingface-hub==0.22.2
humanfriendly==10.0
idna==3.7
Jinja2==3.1.3
MarkupSafe==2.1.5
mpmath==1.3.0
msgpack==1.0.8
multiaddr==0.0.9
netaddr==1.2.1
networkx==3.3
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.19.3
nvidia-nvjitlink-cu12==12.4.127
nvidia-nvtx-cu12==12.1.105
packaging==24.0
peft==0.5.0
prefetch-generator==1.0.3
protobuf==4.25.3
psutil==5.9.8
pycparser==2.22
pydantic==1.10.15
pymultihash==0.8.2
PyYAML==6.0.1
regex==2023.12.25
requests==2.31.0
safetensors==0.4.3
scipy==1.13.0
sentencepiece==0.2.0
six==1.16.0
sortedcontainers==2.4.0
speedtest-cli==2.1.3
sympy==1.12
tensor-parallel==1.0.23
torch==2.2.2
tqdm==4.66.2
#transformers==4.38.2
triton==2.2.0
typing_extensions==4.11.0
urllib3==2.2.1
uvloop==0.19.0
varint==1.0.2

@ -0,0 +1,3 @@
transformers==4.34.0
tokenizers>=0.14,<0.15
huggingface_hub>=0.16.4,<0.18

@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
[tool.black]
line-length = 120
required-version = "22.3.0"
required-version = "24.3.0"
[tool.isort]
profile = "black"

@ -0,0 +1 @@
PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10

@ -0,0 +1 @@
docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id

@ -20,10 +20,10 @@ from petals.utils.logging import initialize_logs as _initialize_logs
__version__ = "2.3.0.dev2"
if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
assert (
version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"
#if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
# assert (
# version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
# ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"
def _override_bfloat16_mode_default():

@ -0,0 +1 @@
sudo docker compose --profile core --env-file ./envs/gpu/h100.txt up health
Loading…
Cancel
Save