diff --git a/Dockerfile b/Dockerfile
index cb7c3ab..daa1149 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -33,6 +33,9 @@ RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda /app/repos
 # Build and install default GPTQ ('quant_cuda')
 ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX"
 RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install
+# Install ExLlama
+RUN pip install safetensors sentencepiece ninja && \
+    cd /app/repositories && git clone https://github.com/turboderp/exllama
 
 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS base
 # Runtime pre-reqs
diff --git a/README.md b/README.md
index 6c0e3d4..3972722 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Choose the desired variant by setting the image `:tag` in `docker-compose.yml` t
 
 | Variant | Description | 
 |---|---|
-| `default` | Implementation of the vanilla deployment from source. Also includes pre-installed `AutoGPTQ` library from `PanQiWei/AutoGPTQ`.  |
+| `default` | Implementation of the vanilla deployment from source. Also includes pre-installed `ExLlAMA` library from `turboderp/exllama`.  |
 | `triton` | Updated `GPTQ-for-llama` using the latest `triton` branch from `qwopqwop200/GPTQ-for-LLaMa`. Suitable for Linux only. |
 | `cuda` | Updated `GPTQ-for-llama` using the latest `cuda` branch from `qwopqwop200/GPTQ-for-LLaMa`. |
 | `monkey-patch` | Use LoRAs in 4-Bit `GPTQ-for-llama` mode. |