|
|
@ -34,9 +34,6 @@ RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda /app/repos
|
|
|
|
# Build and install default GPTQ ('quant_cuda')
|
|
|
|
# Build and install default GPTQ ('quant_cuda')
|
|
|
|
ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX"
|
|
|
|
ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX"
|
|
|
|
RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install
|
|
|
|
RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install
|
|
|
|
# Install ExLlama
|
|
|
|
|
|
|
|
RUN pip install safetensors sentencepiece ninja && \
|
|
|
|
|
|
|
|
cd /app/repositories && git clone https://github.com/turboderp/exllama
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS base
|
|
|
|
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS base
|
|
|
|
# Runtime pre-reqs
|
|
|
|
# Runtime pre-reqs
|
|
|
|