feat: Introduce Gitea CI/CD workflows, refactor Docker deployment with dedicated dev/prod compose files and enhanced ROCm GPU support, and update RAG documentation for new AI and ASR infrastructure.

2026-01-24 14:50:19 +00:00
parent 09d70af58f
commit acfba090e4
29 changed files with 2676 additions and 918 deletions
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -1,16 +1,4 @@
 # syntax=docker/dockerfile:1
-# NoteFlow ROCm Docker Image
-# For AMD GPU support using PyTorch ROCm
-#
-# Build:
-#   docker build -f docker/Dockerfile.rocm -t noteflow:rocm .
-#
-# Run (with GPU access):
-#   docker run --device=/dev/kfd --device=/dev/dri --group-add video --group-add render \
-#     --security-opt seccomp=unconfined \
-#     -v /path/to/models:/workspace/models \
-#     noteflow:rocm
-
 ARG ROCM_VERSION=6.4.1
 ARG ROCM_PYTORCH_RELEASE=2.6.0
 ARG SPACY_MODEL_URL=https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
@@ -29,10 +17,8 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
    UV_LINK_MODE=copy \
    PATH=/usr/local/bin:$PATH

-# Install uv
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

-# Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    pkg-config \
@@ -43,72 +29,51 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    && rm -rf /var/lib/apt/lists/*

-# =============================================================================
-# Server Stage - ROCm
-# =============================================================================
-FROM base AS server
-
 WORKDIR /workspace

-# Copy dependency files first for better layer caching
 COPY pyproject.toml uv.lock* ./
 COPY README.md ./
 COPY src ./src/

-# Create venv with access to system site-packages (ROCm torch)
 ENV VIRTUAL_ENV=/opt/venv
 RUN uv venv --system-site-packages ${VIRTUAL_ENV}
 ENV PATH="${VIRTUAL_ENV}/bin:$PATH"

-# Install NoteFlow with ROCm extras (into venv)
-RUN uv pip install --python ${VIRTUAL_ENV}/bin/python -e ".[rocm,optional]"
-# Improve redis client performance and silence hiredis warning.
-RUN uv pip install --python ${VIRTUAL_ENV}/bin/python hiredis
-
-# Install spaCy small English model for NER (baked into image)
+# =============================================================================
+# Production Server Stage (minimal deps)
+# =============================================================================
+FROM base AS server
 ARG SPACY_MODEL_URL
+
+RUN uv pip install --python ${VIRTUAL_ENV}/bin/python -e ".[rocm]"
 RUN uv pip install --python ${VIRTUAL_ENV}/bin/python ${SPACY_MODEL_URL}

-# Copy remaining files (scripts, configs, etc.)
 COPY . .

-# Environment variables for ROCm
 ENV ROCM_PATH=/opt/rocm \
    HIP_VISIBLE_DEVICES=0 \
    HSA_OVERRIDE_GFX_VERSION="" \
    NOTEFLOW_ASR_DEVICE=rocm \
    NOTEFLOW_FEATURE_ROCM_ENABLED=true

-# gRPC server port
+RUN useradd --create-home --shell /bin/bash noteflow
+USER noteflow
+
 EXPOSE 50051

-# Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD python -c "import grpc; channel = grpc.insecure_channel('localhost:50051'); grpc.channel_ready_future(channel).result(timeout=5)" || exit 1

-# Run gRPC server
 CMD ["python", "-m", "noteflow.grpc.server"]

 # =============================================================================
-# Server Dev Stage - ROCm (hot reload)
+# Development Server Stage (all extras + hot reload)
 # =============================================================================
-FROM base AS server-dev
-
-WORKDIR /workspace
-
-COPY pyproject.toml uv.lock* ./
-COPY README.md ./
-COPY src ./src/
-
-ENV VIRTUAL_ENV=/opt/venv
-RUN uv venv --system-site-packages ${VIRTUAL_ENV}
-ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
+FROM base AS dev
+ARG SPACY_MODEL_URL

 RUN uv pip install --python ${VIRTUAL_ENV}/bin/python -e ".[rocm,optional]"
-RUN uv pip install --python ${VIRTUAL_ENV}/bin/python hiredis
 RUN uv pip install --python ${VIRTUAL_ENV}/bin/python watchfiles
-
-ARG SPACY_MODEL_URL
 RUN uv pip install --python ${VIRTUAL_ENV}/bin/python ${SPACY_MODEL_URL}

 COPY . .
@@ -122,38 +87,3 @@ ENV ROCM_PATH=/opt/rocm \
 EXPOSE 50051

 CMD ["python", "scripts/dev_watch_server.py"]
-
-# =============================================================================
-# Server Full Stage - ROCm (optional extras)
-# =============================================================================
-FROM base AS server-full
-
-WORKDIR /workspace
-
-COPY pyproject.toml uv.lock* ./
-COPY README.md ./
-COPY src ./src/
-
-ENV VIRTUAL_ENV=/opt/venv
-RUN uv venv --system-site-packages ${VIRTUAL_ENV}
-ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
-
-RUN uv pip install --python ${VIRTUAL_ENV}/bin/python -e ".[rocm,optional]"
-
-ARG SPACY_MODEL_URL
-RUN uv pip install --python ${VIRTUAL_ENV}/bin/python ${SPACY_MODEL_URL}
-
-COPY . .
-
-ENV ROCM_PATH=/opt/rocm \
-    HIP_VISIBLE_DEVICES=0 \
-    HSA_OVERRIDE_GFX_VERSION="" \
-    NOTEFLOW_ASR_DEVICE=rocm \
-    NOTEFLOW_FEATURE_ROCM_ENABLED=true
-
-EXPOSE 50051
-
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
-    CMD python -c "import grpc; channel = grpc.insecure_channel('localhost:50051'); grpc.channel_ready_future(channel).result(timeout=5)" || exit 1
-
-CMD ["python", "-m", "noteflow.grpc.server"]
--- a/docker/entrypoint-gpu.sh
+++ b/docker/entrypoint-gpu.sh
@@ -1,21 +1,8 @@
 #!/bin/bash
-# GPU entrypoint script
-# Sets LD_LIBRARY_PATH to prioritize PyTorch's bundled cuDNN 9.8.0 over system cuDNN 9.1.0
+set -e

-# PyTorch bundles cuDNN 9.8.0 in its site-packages
-# We must add these paths FIRST to override system cuDNN 9.1.0
 PYTORCH_NVIDIA_LIBS="/workspace/.venv/lib/python3.12/site-packages/nvidia"

 export LD_LIBRARY_PATH="${PYTORCH_NVIDIA_LIBS}/cudnn/lib:${PYTORCH_NVIDIA_LIBS}/cublas/lib:${PYTORCH_NVIDIA_LIBS}/cuda_runtime/lib:${PYTORCH_NVIDIA_LIBS}/cufft/lib:${PYTORCH_NVIDIA_LIBS}/cusolver/lib:${PYTORCH_NVIDIA_LIBS}/cusparse/lib:${PYTORCH_NVIDIA_LIBS}/nccl/lib:${PYTORCH_NVIDIA_LIBS}/nvtx/lib:/usr/local/cuda/lib64"

-echo "=== GPU Entrypoint ==="
-echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
-echo "Checking cuDNN libraries..."
-ls -la "${PYTORCH_NVIDIA_LIBS}/cudnn/lib/" 2>/dev/null | head -5 || echo "cuDNN libs not found (will be installed on first run)"
-echo "======================"
-
-# Run uv sync to ensure dependencies are installed
-uv sync --frozen --group dev --all-extras
-
-# Execute the command passed to docker run
 exec "$@"
--- a/docker/server-gpu.Dockerfile
+++ b/docker/server-gpu.Dockerfile
@@ -1,35 +1,26 @@
 # syntax=docker/dockerfile:1
-# GPU-enabled server Dockerfile with CUDA support
-# Use this for systems with NVIDIA GPUs
-
+ARG PYTHON_VERSION=3.12
+ARG CUDA_VERSION=12.4.1
+ARG SPACY_MODEL_URL=https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
 # =============================================================================
 # Python Stage - Get Python 3.12 from official image
 # =============================================================================
-FROM python:3.12-slim-bookworm AS python-base
+FROM python:${PYTHON_VERSION}-slim-bookworm AS python-base

 # =============================================================================
 # Base Stage - NVIDIA CUDA with cuDNN for GPU-accelerated inference
 # =============================================================================
-# Using NVIDIA's official CUDA image with cuDNN 9.x for CTranslate2/faster-whisper
-# The runtime image includes cuDNN libraries required for GPU inference
-FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-runtime-ubuntu22.04 AS base

-# CUDA/cuDNN environment variables
-# NOTE: PyTorch bundles cuDNN 9.8.0, but system has 9.1.0
-# We set LD_LIBRARY_PATH at runtime to prioritize PyTorch's bundled cuDNN
 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    UV_COMPILE_BYTECODE=1 \
    UV_LINK_MODE=copy \
-    # CUDA environment - these tell nvidia-container-runtime to inject GPU
    NVIDIA_VISIBLE_DEVICES=all \
    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
-    # Base CUDA path (cuDNN paths added at runtime to use PyTorch's bundled version)
    LD_LIBRARY_PATH=/usr/local/cuda/lib64 \
-    # Python path configuration
    PATH=/usr/local/bin:$PATH

-# Copy Python 3.12 from official image (avoids PPA network issues)
 COPY --from=python-base /usr/local/bin/python3.12 /usr/local/bin/python3.12
 COPY --from=python-base /usr/local/bin/python3 /usr/local/bin/python3
 COPY --from=python-base /usr/local/bin/pip3 /usr/local/bin/pip3
@@ -37,94 +28,92 @@ COPY --from=python-base /usr/local/lib/python3.12 /usr/local/lib/python3.12
 COPY --from=python-base /usr/local/include/python3.12 /usr/local/include/python3.12
 COPY --from=python-base /usr/local/lib/libpython3.12.so* /usr/local/lib/

-# Create symlinks for python/pip commands
 RUN ln -sf /usr/local/bin/python3.12 /usr/local/bin/python \
    && ln -sf /usr/local/bin/pip3 /usr/local/bin/pip \
    && ldconfig

-# Install uv and system dependencies
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

-# Core build/runtime deps for project packages (sounddevice, asyncpg, cryptography).
 RUN apt-get update \
    && apt-get install -y --no-install-recommends \
-        build-essential \
-        pkg-config \
-        portaudio19-dev \
+        libportaudio2 \
        libsndfile1 \
    && rm -rf /var/lib/apt/lists/*

 WORKDIR /workspace

-# Copy dependency files first for better layer caching
 COPY pyproject.toml uv.lock* ./

 # =============================================================================
-# Server Stage - GPU Enabled
+# Production Server Stage (minimal deps)
 # =============================================================================
+FROM base AS build
+ARG SPACY_MODEL_URL
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        build-essential \
+        pkg-config \
+        portaudio19-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-install-project --no-dev
+
+COPY . .
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install "${SPACY_MODEL_URL}"
+
 FROM base AS server

-# Install dependencies with CUDA-enabled PyTorch
-# The --extra-index-url ensures we get CUDA-enabled torch
+COPY --from=build /workspace /workspace
+
+COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
+RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
+
+RUN useradd --create-home --shell /bin/bash noteflow
+USER noteflow
+
+EXPOSE 50051
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python -c "import grpc; channel = grpc.insecure_channel('localhost:50051'); grpc.channel_ready_future(channel).result(timeout=5)" || exit 1
+
+ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
+CMD ["uv", "run", "python", "-m", "noteflow.grpc.server"]
+
+# =============================================================================
+# Development Server Stage (all extras + dev deps + hot reload)
+# =============================================================================
+FROM base AS dev
+ARG SPACY_MODEL_URL
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        build-essential \
+        pkg-config \
+        portaudio19-dev \
+    && rm -rf /var/lib/apt/lists/*
+
 RUN --mount=type=cache,target=/root/.cache/uv \
    uv sync --frozen --no-install-project --group dev --all-extras

-# Copy source code
 COPY . .

-# Install the project itself
 RUN --mount=type=cache,target=/root/.cache/uv \
    uv sync --frozen --group dev --all-extras

-# Install spaCy small English model for NER (baked into image)
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+    uv pip install "${SPACY_MODEL_URL}"

-# Verify CUDA and cuDNN are accessible
-# Note: torch.cuda.is_available() may return False during build (no GPU)
-# but cuDNN libraries should be present in the image for runtime
-RUN python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'cuDNN version: {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else \"N/A\"}')" || true
-# Verify cuDNN shared libraries are present
-RUN ldconfig -p | grep -i cudnn || echo "cuDNN libraries will be available at runtime"
-
-# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly
 COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
 RUN chmod +x /usr/local/bin/entrypoint-gpu.sh

 EXPOSE 50051

-# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime
-# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0
-ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
-CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]
-
-# =============================================================================
-# Server Production Stage - GPU Enabled with all extras
-# =============================================================================
-FROM base AS server-full
-
-# Install all dependencies including optional extras
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --frozen --no-install-project --group dev --all-extras
-
-# Copy source code
-COPY . .
-
-# Install the project itself
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --frozen --group dev --all-extras
-
-# Install spaCy small English model for NER (baked into image)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
-
-# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly
-COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
-RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
-
-EXPOSE 50051
-
-# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime
-# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0
 ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
 CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]
--- a/docker/server.Dockerfile
+++ b/docker/server.Dockerfile
@@ -1,91 +1,88 @@
 # syntax=docker/dockerfile:1
-FROM python:3.12-bookworm AS base
+ARG PYTHON_VERSION=3.12
+ARG SPACY_MODEL_URL=https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+
+FROM python:${PYTHON_VERSION}-slim-bookworm AS base

 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    UV_COMPILE_BYTECODE=1 \
    UV_LINK_MODE=copy

-# Install uv
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

-# Core build/runtime deps for project packages (sounddevice, asyncpg, cryptography).
 RUN apt-get update \
    && apt-get install -y --no-install-recommends \
-        build-essential \
-        pkg-config \
-        portaudio19-dev \
+        libportaudio2 \
        libsndfile1 \
    && rm -rf /var/lib/apt/lists/*

 WORKDIR /workspace

-# Copy dependency files first for better layer caching
 COPY pyproject.toml uv.lock* ./

 # =============================================================================
-# Server Stage
+# Production Server Stage (minimal deps, no dev tooling)
 # =============================================================================
+FROM base AS build
+ARG SPACY_MODEL_URL
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        build-essential \
+        pkg-config \
+        portaudio19-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-install-project --no-dev
+
+COPY . .
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install "${SPACY_MODEL_URL}"
+
 FROM base AS server

-# Install dependencies (server needs dev deps for watchfiles)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --frozen --no-install-project --group dev --all-extras
+COPY --from=build /workspace /workspace

-# Copy source code
-COPY . .
-
-# Install the project itself
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --frozen --group dev --all-extras
-
-# Install spaCy small English model for NER (baked into image)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+RUN useradd --create-home --shell /bin/bash noteflow
+USER noteflow

 EXPOSE 50051

-CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"]
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python -c "import grpc; channel = grpc.insecure_channel('localhost:50051'); grpc.channel_ready_future(channel).result(timeout=5)" || exit 1
+
+CMD ["uv", "run", "python", "-m", "noteflow.grpc.server"]

 # =============================================================================
-# Server Production Stage (all optional dependencies)
+# Development Server Stage (all extras + dev deps + hot reload)
 # =============================================================================
-FROM base AS server-full
-
-# Install all dependencies including optional extras
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --frozen --no-install-project --group dev --all-extras
-
-# Copy source code
-COPY . .
-
-# Install the project itself
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --frozen --group dev --all-extras
-
-# Install spaCy small English model for NER (baked into image)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
-
-EXPOSE 50051
-
-CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"]
-
-# -----------------------------------------------------------------------------
-# NER stage: Add spaCy model for named entity recognition
-# -----------------------------------------------------------------------------
-FROM base AS with-ner
-
-# Install NER dependencies and download spaCy model
-RUN uv pip install -e ".[ner]" \
-    && uv run python -m spacy download en_core_web_sm
-
-# Verify model is available
-RUN uv run python -c "import spacy; nlp = spacy.load('en_core_web_sm'); print('NER model loaded successfully')"
-
-# -----------------------------------------------------------------------------
-# Development target (default)
-# -----------------------------------------------------------------------------
 FROM base AS dev
+ARG SPACY_MODEL_URL

-CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"]
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        build-essential \
+        pkg-config \
+        portaudio19-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-install-project --group dev --all-extras
+
+COPY . .
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --group dev --all-extras
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install "${SPACY_MODEL_URL}"
+
+EXPOSE 50051
+
+CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]