Files
noteflow/docker/server-gpu.Dockerfile

120 lines
4.1 KiB
Docker

# syntax=docker/dockerfile:1
ARG PYTHON_VERSION=3.12
ARG CUDA_VERSION=12.4.1
ARG SPACY_MODEL_URL=https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
# =============================================================================
# Python Stage - Get Python 3.12 from official image
# =============================================================================
FROM python:${PYTHON_VERSION}-slim-bookworm AS python-base
# =============================================================================
# Base Stage - NVIDIA CUDA with cuDNN for GPU-accelerated inference
# =============================================================================
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-runtime-ubuntu22.04 AS base
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
UV_COMPILE_BYTECODE=1 \
UV_LINK_MODE=copy \
NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
LD_LIBRARY_PATH=/usr/local/cuda/lib64 \
PATH=/usr/local/bin:$PATH
COPY --from=python-base /usr/local/bin/python3.12 /usr/local/bin/python3.12
COPY --from=python-base /usr/local/bin/python3 /usr/local/bin/python3
COPY --from=python-base /usr/local/bin/pip3 /usr/local/bin/pip3
COPY --from=python-base /usr/local/lib/python3.12 /usr/local/lib/python3.12
COPY --from=python-base /usr/local/include/python3.12 /usr/local/include/python3.12
COPY --from=python-base /usr/local/lib/libpython3.12.so* /usr/local/lib/
RUN ln -sf /usr/local/bin/python3.12 /usr/local/bin/python \
&& ln -sf /usr/local/bin/pip3 /usr/local/bin/pip \
&& ldconfig
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libportaudio2 \
libsndfile1 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /workspace
COPY pyproject.toml uv.lock* ./
# =============================================================================
# Production Server Stage (minimal deps)
# =============================================================================
FROM base AS build
ARG SPACY_MODEL_URL
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
pkg-config \
portaudio19-dev \
&& rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-install-project --no-dev
COPY . .
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-dev
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install "${SPACY_MODEL_URL}"
FROM base AS server
COPY --from=build /workspace /workspace
COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
RUN useradd --create-home --shell /bin/bash noteflow
USER noteflow
EXPOSE 50051
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import grpc; channel = grpc.insecure_channel('localhost:50051'); grpc.channel_ready_future(channel).result(timeout=5)" || exit 1
ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
CMD ["uv", "run", "python", "-m", "noteflow.grpc.server"]
# =============================================================================
# Development Server Stage (all extras + dev deps + hot reload)
# =============================================================================
FROM base AS dev
ARG SPACY_MODEL_URL
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
pkg-config \
portaudio19-dev \
&& rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-install-project --group dev --all-extras
COPY . .
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --group dev --all-extras
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install "${SPACY_MODEL_URL}"
COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
EXPOSE 50051
ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]