docker changes

1 month ago · c98b5328cb
parent 661545f0ff
commit c98b5328cb
4 changed files with 52 additions and 20 deletions
--- a/14
+++ b/14
@ -23,17 +23,21 @@ RUN conda install python~=3.10.12 pip && \

 VOLUME /cache
 ENV PETALS_CACHE=/cache
-#COPY pip.freeze petals/pip.freeze
-#RUN pip install --no-cache-dir -r petals/pip.freeze
-#COPY pip.freeze2 petals/pip.freeze2
-#RUN pip install --no-cache-dir -r petals/pip.freeze2

+ADD pip.freeze petals/pip.freeze
+RUN pip install --no-cache-dir -r petals/pip.freeze
+ADD pip2.freeze petals/pip2.freeze
+RUN pip install --no-cache-dir -r petals/pip2.freeze

 ADD tests petals/tests
-ADD LICENSE  README.md pyproject.toml setup.cfg petals/
 ADD src petals/src
+ADD LICENSE  README.md pyproject.toml setup.cfg petals/
+
 RUN pip install --no-cache-dir -e petals
 #RUN pip install --no-cache-dir --upgrade transformers==4.34.0

 WORKDIR /home/petals/
+
+RUN pip freeze > pip.freeze.new
+
 CMD python -m petals.cli.run_server --port 31331  --num_blocks=1 Maykeye/TinyLLama-v0 
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,7 +8,6 @@ services:
    network_mode: host
    ipc: host
    restart: unless-stopped
-    env_file: ./.env
    
  health:
    profiles: ["core"]
@ -19,7 +18,6 @@ services:
    ports:
      - "8009:5000"
    command: flask run --host=0.0.0.0 --port=5000
-    env_file: ./.env

  tinyllama_local_gpu:
    profiles: ["local","gpu"]
@ -38,9 +36,30 @@ services:
              count: 1
              capabilities: [gpu]
    restart: always
-    env_file: ./.env

 #
+  envmodel_local_gpu:
+    profiles: ["local","gpu"]
+    build: .
+    environment:
+      - MODEL=${MODEL}
+    depends_on:
+       - backbone
+       # xai-org/grok-1
+       # hpcai-tech/grok-1
+       # keyfan/grok-1-hf
+    command: python -m petals.cli.run_server --port 31331  --num_blocks=1  $MODEL  --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    ports:
+      - "31331:31331"
+    restart: always
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+>>>>>>> 79bb253 (docker changes)
  tinymixtral_local_gpu:
    profiles: ["local","gpu"]
    build: .
@ -50,7 +69,7 @@ services:
    ports:
      - "31331:31331"
    restart: always
-    env_file: ./.env
+
    deploy:
      resources:
        reservations:
@ -68,7 +87,7 @@ services:
    ports:
      - "31331:31331"
    restart: always
-    env_file: ./.env
+
    
  tinyllamacpu:
    profiles: ["tinyllama","cpu"]
@ -79,7 +98,7 @@ services:
    ports:
      - "31331:31331"
    restart: always
-    env_file: ./.env
+
    
  tinyllamagpu:
    profiles: ["core"]
@ -97,7 +116,7 @@ services:
              count: 1
              capabilities: [gpu]
    restart: always
-    env_file: ./.env
+
    
  tinyllamatpu:
    profiles: ["tpu"]
@ -108,7 +127,7 @@ services:
      - "31333:31333"
    command: python -m petals.cli.run_server --port 31333  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
    restart: always
-    env_file: ./.env
+

  debug_health:
    profiles: ["debug"]
@ -127,7 +146,7 @@ services:
 #    image: petals-inference-test
    ports:
      - "8010:5000"
-    env_file: ./.env
+
    command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000


--- a/pip.freeze
+++ b/pip.freeze
@ -8,16 +8,17 @@ charset-normalizer==3.3.2
 ConfigArgParse==1.7
 cpufeature==0.2.1
 cryptography==42.0.5
+
 Dijkstar==2.6.0
 filelock==3.13.4
 fsspec==2024.3.1
 grpcio==1.62.1
 grpcio-tools==1.62.1
-hivemind==1.1.10.post2
-#huggingface-hub==0.22.2
 humanfriendly==10.0
 idna==3.7
 Jinja2==3.1.3
+jsonpointer==2.1
+
 MarkupSafe==2.1.5
 mpmath==1.3.0
 msgpack==1.0.8
@ -37,6 +38,7 @@ nvidia-cusparse-cu12==12.1.0.106
 nvidia-nccl-cu12==2.19.3
 nvidia-nvjitlink-cu12==12.4.127
 nvidia-nvtx-cu12==12.1.105
+
 packaging==24.0
 peft==0.5.0
 prefetch-generator==1.0.3
@ -48,6 +50,7 @@ pymultihash==0.8.2
 PyYAML==6.0.1
 regex==2023.12.25
 requests==2.31.0
+
 safetensors==0.4.3
 scipy==1.13.0
 sentencepiece==0.2.0
@ -57,14 +60,14 @@ speedtest-cli==2.1.3
 sympy==1.12
 tensor-parallel==1.0.23

-torch==2.2.2
 tqdm==4.66.2
-#transformers==4.38.2
-
 triton==2.2.0
 typing_extensions==4.11.0
 urllib3==2.2.1
 uvloop==0.19.0
 varint==1.0.2
-
+triton==2.2.0
+typing_extensions==4.11.0
+uvloop==0.19.0
+varint==1.0.2

--- a/pip2.freeze
+++ b/pip2.freeze
@ -0,0 +1,6 @@
+transformers==4.38.2
+huggingface-hub==0.22.2
+hivemind==1.1.10.post2
+peft==0.5.0
+tokenizers==0.15.2
+torch==2.2.2