From c98b5328cbb0b76109ead837b044578a74eabef1 Mon Sep 17 00:00:00 2001 From: mike dupont Date: Wed, 17 Apr 2024 12:35:10 -0400 Subject: [PATCH] docker changes --- Dockerfile | 14 +++++++++----- docker-compose.yml | 37 ++++++++++++++++++++++++++++--------- pip.freeze | 15 +++++++++------ pip2.freeze | 6 ++++++ 4 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 pip2.freeze diff --git a/Dockerfile b/Dockerfile index ac4d033..1fe2adb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,17 +23,21 @@ RUN conda install python~=3.10.12 pip && \ VOLUME /cache ENV PETALS_CACHE=/cache -#COPY pip.freeze petals/pip.freeze -#RUN pip install --no-cache-dir -r petals/pip.freeze -#COPY pip.freeze2 petals/pip.freeze2 -#RUN pip install --no-cache-dir -r petals/pip.freeze2 +ADD pip.freeze petals/pip.freeze +RUN pip install --no-cache-dir -r petals/pip.freeze +ADD pip2.freeze petals/pip2.freeze +RUN pip install --no-cache-dir -r petals/pip2.freeze ADD tests petals/tests -ADD LICENSE README.md pyproject.toml setup.cfg petals/ ADD src petals/src +ADD LICENSE README.md pyproject.toml setup.cfg petals/ + RUN pip install --no-cache-dir -e petals #RUN pip install --no-cache-dir --upgrade transformers==4.34.0 WORKDIR /home/petals/ + +RUN pip freeze > pip.freeze.new + CMD python -m petals.cli.run_server --port 31331 --num_blocks=1 Maykeye/TinyLLama-v0 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 7c8f293..a36276c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,7 +8,6 @@ services: network_mode: host ipc: host restart: unless-stopped - env_file: ./.env health: profiles: ["core"] @@ -19,7 +18,6 @@ services: ports: - "8009:5000" command: flask run --host=0.0.0.0 --port=5000 - env_file: ./.env tinyllama_local_gpu: profiles: ["local","gpu"] @@ -38,9 +36,30 @@ services: count: 1 capabilities: [gpu] restart: always - env_file: ./.env # + envmodel_local_gpu: + profiles: ["local","gpu"] + build: . + environment: + - MODEL=${MODEL} + depends_on: + - backbone + # xai-org/grok-1 + # hpcai-tech/grok-1 + # keyfan/grok-1-hf + command: python -m petals.cli.run_server --port 31331 --num_blocks=1 $MODEL --initial_peers $INITIAL_PEERS --device=$DEVICE + ports: + - "31331:31331" + restart: always + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] +>>>>>>> 79bb253 (docker changes) tinymixtral_local_gpu: profiles: ["local","gpu"] build: . @@ -50,7 +69,7 @@ services: ports: - "31331:31331" restart: always - env_file: ./.env + deploy: resources: reservations: @@ -68,7 +87,7 @@ services: ports: - "31331:31331" restart: always - env_file: ./.env + tinyllamacpu: profiles: ["tinyllama","cpu"] @@ -79,7 +98,7 @@ services: ports: - "31331:31331" restart: always - env_file: ./.env + tinyllamagpu: profiles: ["core"] @@ -97,7 +116,7 @@ services: count: 1 capabilities: [gpu] restart: always - env_file: ./.env + tinyllamatpu: profiles: ["tpu"] @@ -108,7 +127,7 @@ services: - "31333:31333" command: python -m petals.cli.run_server --port 31333 --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS --device=$DEVICE restart: always - env_file: ./.env + debug_health: profiles: ["debug"] @@ -127,7 +146,7 @@ services: # image: petals-inference-test ports: - "8010:5000" - env_file: ./.env + command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000 diff --git a/pip.freeze b/pip.freeze index 43d0b4a..f18d1c7 100644 --- a/pip.freeze +++ b/pip.freeze @@ -8,16 +8,17 @@ charset-normalizer==3.3.2 ConfigArgParse==1.7 cpufeature==0.2.1 cryptography==42.0.5 + Dijkstar==2.6.0 filelock==3.13.4 fsspec==2024.3.1 grpcio==1.62.1 grpcio-tools==1.62.1 -hivemind==1.1.10.post2 -#huggingface-hub==0.22.2 humanfriendly==10.0 idna==3.7 Jinja2==3.1.3 +jsonpointer==2.1 + MarkupSafe==2.1.5 mpmath==1.3.0 msgpack==1.0.8 @@ -37,6 +38,7 @@ nvidia-cusparse-cu12==12.1.0.106 nvidia-nccl-cu12==2.19.3 nvidia-nvjitlink-cu12==12.4.127 nvidia-nvtx-cu12==12.1.105 + packaging==24.0 peft==0.5.0 prefetch-generator==1.0.3 @@ -48,6 +50,7 @@ pymultihash==0.8.2 PyYAML==6.0.1 regex==2023.12.25 requests==2.31.0 + safetensors==0.4.3 scipy==1.13.0 sentencepiece==0.2.0 @@ -57,14 +60,14 @@ speedtest-cli==2.1.3 sympy==1.12 tensor-parallel==1.0.23 -torch==2.2.2 tqdm==4.66.2 -#transformers==4.38.2 - triton==2.2.0 typing_extensions==4.11.0 urllib3==2.2.1 uvloop==0.19.0 varint==1.0.2 - +triton==2.2.0 +typing_extensions==4.11.0 +uvloop==0.19.0 +varint==1.0.2 diff --git a/pip2.freeze b/pip2.freeze new file mode 100644 index 0000000..3191ffb --- /dev/null +++ b/pip2.freeze @@ -0,0 +1,6 @@ +transformers==4.38.2 +huggingface-hub==0.22.2 +hivemind==1.1.10.post2 +peft==0.5.0 +tokenizers==0.15.2 +torch==2.2.2