Files
noteflow/docker/server-gpu.Dockerfile
2026-01-16 04:18:41 -05:00

131 lines
5.6 KiB
Docker

# syntax=docker/dockerfile:1
# GPU-enabled server Dockerfile with CUDA support
# Use this for systems with NVIDIA GPUs
# =============================================================================
# Python Stage - Get Python 3.12 from official image
# =============================================================================
FROM python:3.12-slim-bookworm AS python-base
# =============================================================================
# Base Stage - NVIDIA CUDA with cuDNN for GPU-accelerated inference
# =============================================================================
# Using NVIDIA's official CUDA image with cuDNN 9.x for CTranslate2/faster-whisper
# The runtime image includes cuDNN libraries required for GPU inference
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
# CUDA/cuDNN environment variables
# NOTE: PyTorch bundles cuDNN 9.8.0, but system has 9.1.0
# We set LD_LIBRARY_PATH at runtime to prioritize PyTorch's bundled cuDNN
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
UV_COMPILE_BYTECODE=1 \
UV_LINK_MODE=copy \
# CUDA environment - these tell nvidia-container-runtime to inject GPU
NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
# Base CUDA path (cuDNN paths added at runtime to use PyTorch's bundled version)
LD_LIBRARY_PATH=/usr/local/cuda/lib64 \
# Python path configuration
PATH=/usr/local/bin:$PATH
# Copy Python 3.12 from official image (avoids PPA network issues)
COPY --from=python-base /usr/local/bin/python3.12 /usr/local/bin/python3.12
COPY --from=python-base /usr/local/bin/python3 /usr/local/bin/python3
COPY --from=python-base /usr/local/bin/pip3 /usr/local/bin/pip3
COPY --from=python-base /usr/local/lib/python3.12 /usr/local/lib/python3.12
COPY --from=python-base /usr/local/include/python3.12 /usr/local/include/python3.12
COPY --from=python-base /usr/local/lib/libpython3.12.so* /usr/local/lib/
# Create symlinks for python/pip commands
RUN ln -sf /usr/local/bin/python3.12 /usr/local/bin/python \
&& ln -sf /usr/local/bin/pip3 /usr/local/bin/pip \
&& ldconfig
# Install uv and system dependencies
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Core build/runtime deps for project packages (sounddevice, asyncpg, cryptography).
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
pkg-config \
portaudio19-dev \
libsndfile1 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /workspace
# Copy dependency files first for better layer caching
COPY pyproject.toml uv.lock* ./
# =============================================================================
# Server Stage - GPU Enabled
# =============================================================================
FROM base AS server
# Install dependencies with CUDA-enabled PyTorch
# The --extra-index-url ensures we get CUDA-enabled torch
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-install-project --group dev --all-extras
# Copy source code
COPY . .
# Install the project itself
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --group dev --all-extras
# Install spaCy small English model for NER (baked into image)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
# Verify CUDA and cuDNN are accessible
# Note: torch.cuda.is_available() may return False during build (no GPU)
# but cuDNN libraries should be present in the image for runtime
RUN python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'cuDNN version: {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else \"N/A\"}')" || true
# Verify cuDNN shared libraries are present
RUN ldconfig -p | grep -i cudnn || echo "cuDNN libraries will be available at runtime"
# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly
COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
EXPOSE 50051
# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime
# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0
ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]
# =============================================================================
# Server Production Stage - GPU Enabled with all extras
# =============================================================================
FROM base AS server-full
# Install all dependencies including optional extras
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-install-project --group dev --all-extras
# Copy source code
COPY . .
# Install the project itself
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --group dev --all-extras
# Install spaCy small English model for NER (baked into image)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly
COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
EXPOSE 50051
# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime
# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0
ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]