docker-compose.yml

services:
  web:
    build: 
      context: .
      args:
        COMPUTE_LEVEL: "86" # Your CUDA GPU compute level (here: 8.6)
        CUDA_VERSION: "12.6.0"
    image: ghcr.io/katherlab/llmaix:latest
    ports:
      - "19999:${PORT:-5000}"
    volumes:
      # Adjust: /path/to/your/local/model/dir:/models - don't modify the path after the colon!
      - /PATH/TO/YOUR/LOCAL/MODEL/DIR:/models
    environment:
      - SERVER_PATH=${SERVER_PATH:-/build/llama.cpp/llama-server}
      - PORT=${PORT:-5000}
      - HOST=${HOST:-0.0.0.0}
      # Adjust if your model config in your models directory has another name than config.yml
      - CONFIG_FILE=${CONFIG_FILE:-/models/config.yml}
      - GPU=${GPU:-ALL}
      - LLAMACPP_PORT=${LLAMACPP_PORT:-2929}
      - DEBUG=${DEBUG:-false}
      - MODE=${MODE:-choice}
      - VERBOSE_LLAMA=${VERBOSE_LLAMA:-false}
      - NO_PASSWORD=${NO_PASSWORD:-true}
    deploy:
      resources:
        reservations:
          devices:
          - driver: nvidia
            device_ids: ['0']
            capabilities: [gpu]