From 43392402e0a9c07d6dc060a95f401d095a408e6d Mon Sep 17 00:00:00 2001
From: Atinoda <61033436+Atinoda@users.noreply.github.com>
Date: Mon, 28 Aug 2023 15:28:17 +0100
Subject: [PATCH] Deprecate `llama-cublas` variant

`default` already includes CUDA GPU offloading for llama
---
 Dockerfile | 8 --------
 README.md  | 1 -
 2 files changed, 9 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 4d7aebc..128b4d2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -97,14 +97,6 @@ RUN pip3 uninstall -y quant-cuda && \
 ENV EXTRA_LAUNCH_ARGS=""
 CMD ["python3", "/app/server.py"]
 
-FROM base AS llama-cublas
-RUN echo "LLAMA-CUBLAS" >> /variant.txt
-RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
-ENV LLAMA_CUBLAS=1
-RUN pip uninstall -y llama-cpp-python && pip install llama-cpp-python
-ENV EXTRA_LAUNCH_ARGS=""
-CMD ["python3", "/app/server.py"]
-
 FROM base AS monkey-patch
 RUN echo "4-BIT MONKEY-PATCH" >> /variant.txt
 RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
diff --git a/README.md b/README.md
index bb674b1..39ed553 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,6 @@ Each variant has the 'extras' incuded in `default` but has some changes made as
 | `cuda` | Updated `GPTQ-for-llama` using the latest `cuda` branch from `qwopqwop200/GPTQ-for-LLaMa`. *This version is very slow!* |
 | `llama-cpu` | GPU supported is REMOVED from `llama-cpp`. Suitable for systems without a CUDA-capable GPU. *This is only for when GPU acceleration is not available and is a slower way to run models!* |
 | `monkey-patch` | Use LoRAs in 4-Bit `GPTQ-for-llama` mode. ***DEPRECATION WARNING:** This version is outdated, but will remain for now.* |
-| `llama-cublas` | CUDA GPU offloading enabled for `llama-cpp`. Use by setting option `n-gpu-layers` > 0. ***DEPRECATION WARNING:** This capability has been rolled into the default. The variant will be removed if the upstream dependency does not conflict with `default`.* |
 | `{VARIANT}-version` | Build of each {VARIANT} tagged with the release version of the text-generation-webui (e.g., `default-v1.5`).  *Visit [obabooga/text-generation-webui/releases](https://github.com/oobabooga/text-generation-webui/releases) for details.* |
 
 *See: [oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md), [obabooga/text-generation-webui/blob/main/docs/llama.cpp-models.md](https://github.com/oobabooga/text-generation-webui/blob/main/docs/llama.cpp-models.md), and [oobabooga/text-generation-webui/blob/main/docs/ExLlama.md](https://github.com/oobabooga/text-generation-webui/blob/main/docs/ExLlama.md) for more information on variants.*