Refactor to pull Docker hub images

Also fixes `triton` dependency conflict
12 months ago · a0137d40b8
parent b29d617880
commit a0137d40b8
4 changed files with 68 additions and 22 deletions
--- a/7
+++ b/7
@ -56,7 +56,11 @@ EXPOSE 7860
 EXPOSE 5000
 EXPOSE 5005
 # Required for Python print statements to appear in logs
-ENV PYTHONUNBUFFERED=1  
+ENV PYTHONUNBUFFERED=1
+# Force variant layers to sync cache by setting --build-arg BUILD_DATE
+ARG BUILD_DATE
+ENV BUILD_DATE=$BUILD_DATE
+RUN echo "$BUILD_DATE" > /build_date.txt
 # Run
 COPY ./scripts/docker-entrypoint.sh /scripts/docker-entrypoint.sh
 RUN chmod +x /scripts/docker-entrypoint.sh
@ -81,6 +85,7 @@ RUN apt-get install --no-install-recommends -y git python3-dev build-essential p
 RUN rm -rf /app/repositories/GPTQ-for-LLaMa && \
    git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b triton /app/repositories/GPTQ-for-LLaMa
 RUN pip3 uninstall -y quant-cuda && \
+    sed -i 's/^safetensors==0\.3\.0$/safetensors/g' /app/repositories/GPTQ-for-LLaMa/requirements.txt && \
    pip3 install -r /app/repositories/GPTQ-for-LLaMa/requirements.txt
 ENV EXTRA_LAUNCH_ARGS=""
 CMD ["python3", "/app/server.py"]
--- a/README.md
+++ b/README.md
@ -15,7 +15,7 @@ This project dockerises the deployment of [oobabooga/text-generation-webui](http
 This is the recommended deployment method.

 ### Select variant
-Choose the desired variant by setting the build `target` in `docker-compose.yml` to one of the following options:
+Choose the desired variant by setting the image `:tag` in `docker-compose.yml` to one of the following options:

 | Variant | Description | 
 |---|---|
@ -27,13 +27,6 @@ Choose the desired variant by setting the build `target` in `docker-compose.yml`

 *See: [oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md) and [obabooga/text-generation-webui/blob/main/docs/llama.cpp-models.md](https://github.com/oobabooga/text-generation-webui/blob/main/docs/llama.cpp-models.md) for more information on variants.*

-### Build
-Build the image:
-
-`docker compose build`
-
-*If you choose a different variant later, you must **rebuild** the image.*
-
 ### Deploy
 Deploy the service:

@ -69,11 +62,22 @@ Extra launch arguments can be defined in the environment variable `EXTRA_LAUNCH_
 *Launch arguments should be defined as a space-separated list, just like writing them on the command line. These arguments are passed to the `server.py` module.*

 ### Updates
-These projects are moving quickly! To update to the latest version, rebuild the image without cache:
+These projects are moving quickly! To update to the most recent version on Docker hub, pull the latest image:
+
+`docker compose pull`

-`docker compose build --no-cache`
+Then recreate the container:

-*When the container is launched, it will print out how many commits behind origin the current build is, so you can decide if you want to update it.*
+`docker compose up`
+
+*When the container is launched, it will print out how many commits behind origin the current build is, so you can decide if you want to update it. Docker hub images will be periodically updated, but if you need bleeding edge versions you must build locally.*
+
+### Build (optional)
+The provided `docker-compose.yml.build` shows how to build the image locally. You can use it as a reference to modify the original `docker-compose.yml`, or you can rename it and use it as-is. Choose the desired variant to build by setting the build `target` and then run:
+
+`docker compose build`
+
+*If you choose a different variant later, you must **rebuild** the image.*

 ### Developers / Advanced Users
 The Dockerfile can be easily modified to compile and run the application from a local source folder. This is useful if you want to do some development or run a custom version. See the Dockerfile itself for instructions on how to do this.
@ -83,15 +87,21 @@ The Dockerfile can be easily modified to compile and run the application from a
 ## Standalone Container
 NOT recommended, instructions are included for completeness.

-### Build
-Build the image for the default target:
+### Run
+Run a container (and destroy it upon completion):
+
+`docker run --it --rm --gpus all -p 7860:7860 atinoda/text-generation-webui:default`
+
+### Build and run (optional)
+Build the image for the default target and tag it as `local` :

 `docker build --target default -t text-generation-webui:local .`

-### Run
-Run a container (and destroy it upon completion):
+Run the local image (and destroy it upon completion):
+
+`docker run --it --rm --gpus all -p 7860:7860 text-generation-webui:local`
+

-`docker run --it --rm -p 7860:7860 text-generation-webui:local`

 # Contributions
 Contributions are welcomed - please feel free to submit a PR. More variants (e.g., AMD/ROC-M support) and Windows support can help lower the barrier to entry, make this technology accessible to as many people as possible, and push towards democratising the severe impacts that AI is having on our society.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,11 +1,7 @@
 version: "3"
 services:
  text-generation-webui-docker:
-    build:
-      context: .
-      target: default  # Specify the variant to build
-#      args:
-#        - LCL_SRC_DIR=text-generation-webui  # Developers - see Dockerfile app_base
+    image: atinoda/text-generation-webui:default # Specify variant as the :tag
    container_name: text-generation-webui
    environment:
      - EXTRA_LAUNCH_ARGS="--listen --verbose" # Custom launch args (e.g., --model MODEL_NAME)
--- a/docker-compose.yml.build
+++ b/docker-compose.yml.build
@ -0,0 +1,35 @@
+version: "3"
+services:
+  text-generation-webui-docker:
+    build:
+      context: .
+      target: default  # Specify the variant to build
+#      args:
+#        - LCL_SRC_DIR=text-generation-webui  # Developers - see Dockerfile app_base
+    container_name: text-generation-webui
+    environment:
+      - EXTRA_LAUNCH_ARGS="--listen --verbose" # Custom launch args (e.g., --model MODEL_NAME)
+    ports:
+      - 7860:7860  # Default web port
+#      - 5000:5000  # Default API port
+#      - 5005:5005  # Default streaming port
+#      - 5001:5001  # Default OpenAI API extension port
+    volumes:
+      - ./config/loras:/app/loras
+      - ./config/models:/app/models
+      - ./config/presets:/app/presets
+      - ./config/prompts:/app/prompts
+      - ./config/softprompts:/app/softprompts
+      - ./config/training:/app/training
+    logging:
+      driver:  json-file
+      options:
+        max-file: "3"   # number of files or file count
+        max-size: '10m'
+    deploy:
+        resources:
+          reservations:
+            devices:
+              - driver: nvidia
+                device_ids: ['0']
+                capabilities: [gpu]