- Added server-gpu and full-gpu Docker Compose profiles with NVIDIA CUDA support for GPU-accelerated inference - Created server-gpu.Dockerfile with PyTorch CUDA base image and GPU runtime configuration - Added compose.gpu.yaml for optional GPU allocation customization (device count, IDs, memory settings) - Documented GPU setup requirements for Linux (nvidia-container-toolkit), Windows WSL2, and macOS in docker/CLAUDE
30 lines
1.0 KiB
YAML
30 lines
1.0 KiB
YAML
# GPU-specific overrides for Docker Compose
|
|
# Usage: docker compose -f compose.yaml -f compose.gpu.yaml --profile server-gpu up
|
|
#
|
|
# This file provides additional GPU configuration options.
|
|
# It is optional - the server-gpu profile in compose.yaml includes sensible defaults.
|
|
|
|
services:
|
|
server-gpu:
|
|
# Override GPU allocation (uncomment to customize)
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
# Use 'all' to use all available GPUs, or specify count
|
|
count: 1
|
|
# count: all
|
|
capabilities: [gpu]
|
|
# Optionally specify device IDs (e.g., for multi-GPU systems)
|
|
# device_ids: ['0']
|
|
|
|
# Additional environment variables for GPU optimization
|
|
environment:
|
|
# PyTorch CUDA settings
|
|
CUDA_VISIBLE_DEVICES: "0"
|
|
# Enable TF32 for better performance on Ampere+ GPUs
|
|
NVIDIA_TF32_OVERRIDE: "1"
|
|
# Memory management
|
|
PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:True"
|