mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-02 09:40:42 +00:00
8aba2c9009
* feat: local inference server * fix: source to use bash + vars * chore: isort and black * fix: make file + inference mode * chore: logging * refactor: remove old links * fix: add new env vars * feat: hf inference server * refactor: remove old links * test: batch and single response * chore: black + isort * separate gpu and cpu dockerfiles * moved gpu to separate dockerfile * Fixed test endpoints * Edits to API. server won't start due to failed instantiation error * Method signature * fix: gpu_infer * tests: fix tests --------- Co-authored-by: Andriy Mulyar <andriy.mulyar@gmail.com>
24 lines
615 B
YAML
24 lines
615 B
YAML
version: "3.8"
|
|
|
|
services:
|
|
gpt4all_gpu:
|
|
image: ghcr.io/huggingface/text-generation-inference
|
|
container_name: gpt4all_gpu
|
|
restart: always #restart on error (usually code compilation from save during bad state)
|
|
environment:
|
|
- HUGGING_FACE_HUB_TOKEN=token
|
|
- USE_FLASH_ATTENTION=false
|
|
- MODEL_ID=''
|
|
- NUM_SHARD=1
|
|
command: --model-id $MODEL_ID --num-shard $NUM_SHARD
|
|
volumes:
|
|
- ./:/data
|
|
ports:
|
|
- "8080:80"
|
|
shm_size: 1g
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
capabilities: [gpu] |