gpt4all/gpt4all-api/docker-compose.gpu.yaml

24 lines
615 B
YAML
Raw Normal View History

version: "3.8"
services:
gpt4all_gpu:
image: ghcr.io/huggingface/text-generation-inference
container_name: gpt4all_gpu
restart: always #restart on error (usually code compilation from save during bad state)
environment:
- HUGGING_FACE_HUB_TOKEN=token
- USE_FLASH_ATTENTION=false
- MODEL_ID=''
- NUM_SHARD=1
command: --model-id $MODEL_ID --num-shard $NUM_SHARD
volumes:
- ./:/data
ports:
- "8080:80"
shm_size: 1g
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]