diff --git a/compose.yaml b/compose.yaml index 8d2331e..625655d 100644 --- a/compose.yaml +++ b/compose.yaml @@ -94,8 +94,10 @@ services: # - On macOS: GPU passthrough not supported (use CPU profile) # CPU-only server (default, cross-platform) + # Build: docker buildx bake server server: container_name: noteflow-server + image: noteflow-server:latest build: context: . dockerfile: docker/server.Dockerfile @@ -131,8 +133,10 @@ services: - full # GPU-enabled server (NVIDIA CUDA) + # Build: docker buildx bake server-gpu server-gpu: container_name: noteflow-server + image: noteflow-server-gpu:latest build: context: . dockerfile: docker/server-gpu.Dockerfile diff --git a/docker-bake.hcl b/docker-bake.hcl new file mode 100644 index 0000000..b103db0 --- /dev/null +++ b/docker-bake.hcl @@ -0,0 +1,278 @@ +# docker-bake.hcl +# Docker Buildx Bake configuration for NoteFlow +# +# Usage: +# docker buildx bake # Build default targets +# docker buildx bake server # Build CPU server only +# docker buildx bake server-gpu # Build GPU server only +# docker buildx bake servers # Build all server variants (parallel) +# docker buildx bake client # Build client targets +# docker buildx bake all # Build everything +# docker buildx bake --print # Show build plan without building +# +# With specific options: +# docker buildx bake server --set server.tags=myregistry/noteflow:v1.0 +# docker buildx bake --push all # Build and push all images + +# ============================================================================= +# Variables +# ============================================================================= + +variable "REGISTRY" { + default = "" +} + +variable "IMAGE_PREFIX" { + default = "noteflow" +} + +variable "TAG" { + default = "latest" +} + +variable "PYTHON_VERSION" { + default = "3.12" +} + +variable "CUDA_VERSION" { + default = "12.4.1" +} + +variable "SPACY_MODEL_URL" { + default = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" +} + +# ============================================================================= +# Functions +# ============================================================================= + +function "tag" { + params = [name] + result = REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-${name}:${TAG}" : "${IMAGE_PREFIX}-${name}:${TAG}" +} + +function "tags" { + params = [name] + result = [ + tag(name), + REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-${name}:latest" : "${IMAGE_PREFIX}-${name}:latest" + ] +} + +# ============================================================================= +# Groups - Enable parallel builds +# ============================================================================= + +group "default" { + targets = ["server"] +} + +group "servers" { + targets = ["server", "server-gpu"] +} + +group "servers-full" { + targets = ["server", "server-full", "server-gpu", "server-gpu-full"] +} + +group "client" { + targets = ["client-build", "client-dev"] +} + +group "all" { + targets = ["server", "server-full", "server-gpu", "server-gpu-full", "client-build"] +} + +group "ci" { + targets = ["server", "server-gpu", "client-build"] +} + +# ============================================================================= +# Base Targets (inherited) +# ============================================================================= + +target "_common" { + context = "." + labels = { + "org.opencontainers.image.source" = "https://github.com/noteflow/noteflow" + "org.opencontainers.image.vendor" = "NoteFlow" + } +} + +target "_server-common" { + inherits = ["_common"] + dockerfile = "docker/server.Dockerfile" + args = { + PYTHON_VERSION = PYTHON_VERSION + SPACY_MODEL_URL = SPACY_MODEL_URL + } + cache-from = [ + "type=registry,ref=${REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-server:cache" : "${IMAGE_PREFIX}-server:cache"}" + ] + cache-to = [ + "type=inline" + ] +} + +target "_server-gpu-common" { + inherits = ["_common"] + dockerfile = "docker/server-gpu.Dockerfile" + args = { + PYTHON_VERSION = PYTHON_VERSION + CUDA_VERSION = CUDA_VERSION + SPACY_MODEL_URL = SPACY_MODEL_URL + } + cache-from = [ + "type=registry,ref=${REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-server-gpu:cache" : "${IMAGE_PREFIX}-server-gpu:cache"}" + ] + cache-to = [ + "type=inline" + ] +} + +target "_client-common" { + inherits = ["_common"] + dockerfile = "docker/client.Dockerfile" + cache-from = [ + "type=registry,ref=${REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-client:cache" : "${IMAGE_PREFIX}-client:cache"}" + ] + cache-to = [ + "type=inline" + ] +} + +# ============================================================================= +# Server Targets (CPU) +# ============================================================================= + +target "server" { + inherits = ["_server-common"] + target = "server" + tags = tags("server") + labels = { + "org.opencontainers.image.title" = "NoteFlow Server (CPU)" + "org.opencontainers.image.description" = "NoteFlow gRPC server - CPU-only build" + } +} + +target "server-full" { + inherits = ["_server-common"] + target = "server-full" + tags = tags("server-full") + labels = { + "org.opencontainers.image.title" = "NoteFlow Server Full (CPU)" + "org.opencontainers.image.description" = "NoteFlow gRPC server with all extras - CPU-only build" + } +} + +target "server-dev" { + inherits = ["_server-common"] + target = "dev" + tags = tags("server-dev") + labels = { + "org.opencontainers.image.title" = "NoteFlow Server Dev (CPU)" + "org.opencontainers.image.description" = "NoteFlow development server - CPU-only build" + } +} + +target "server-ner" { + inherits = ["_server-common"] + target = "with-ner" + tags = tags("server-ner") + labels = { + "org.opencontainers.image.title" = "NoteFlow Server with NER (CPU)" + "org.opencontainers.image.description" = "NoteFlow server with spaCy NER - CPU-only build" + } +} + +# ============================================================================= +# Server Targets (GPU - NVIDIA CUDA) +# ============================================================================= + +target "server-gpu" { + inherits = ["_server-gpu-common"] + target = "server" + tags = tags("server-gpu") + platforms = ["linux/amd64"] # GPU images are x86_64 only + labels = { + "org.opencontainers.image.title" = "NoteFlow Server (GPU)" + "org.opencontainers.image.description" = "NoteFlow gRPC server - NVIDIA CUDA GPU build" + "ai.noteflow.cuda.version" = CUDA_VERSION + } +} + +target "server-gpu-full" { + inherits = ["_server-gpu-common"] + target = "server-full" + tags = tags("server-gpu-full") + platforms = ["linux/amd64"] + labels = { + "org.opencontainers.image.title" = "NoteFlow Server Full (GPU)" + "org.opencontainers.image.description" = "NoteFlow gRPC server with all extras - NVIDIA CUDA GPU build" + "ai.noteflow.cuda.version" = CUDA_VERSION + } +} + +# ============================================================================= +# Client Targets +# ============================================================================= + +target "client-build" { + inherits = ["_client-common"] + target = "client-build" + tags = tags("client") + labels = { + "org.opencontainers.image.title" = "NoteFlow Client Build" + "org.opencontainers.image.description" = "NoteFlow Tauri desktop client build" + } +} + +target "client-dev" { + inherits = ["_client-common"] + target = "client-dev" + tags = tags("client-dev") + labels = { + "org.opencontainers.image.title" = "NoteFlow Client Dev" + "org.opencontainers.image.description" = "NoteFlow Tauri client development environment" + } +} + +# ============================================================================= +# Multi-Platform Targets (for CPU images) +# ============================================================================= + +target "server-multiplatform" { + inherits = ["server"] + platforms = ["linux/amd64", "linux/arm64"] + tags = tags("server-multiplatform") +} + +target "server-full-multiplatform" { + inherits = ["server-full"] + platforms = ["linux/amd64", "linux/arm64"] + tags = tags("server-full-multiplatform") +} + +# ============================================================================= +# CI/CD Specific Targets +# ============================================================================= + +target "server-ci" { + inherits = ["server"] + cache-from = [ + "type=gha" + ] + cache-to = [ + "type=gha,mode=max" + ] +} + +target "server-gpu-ci" { + inherits = ["server-gpu"] + cache-from = [ + "type=gha" + ] + cache-to = [ + "type=gha,mode=max" + ] +} diff --git a/docker/CLAUDE.md b/docker/CLAUDE.md index 2b1298f..539c306 100644 --- a/docker/CLAUDE.md +++ b/docker/CLAUDE.md @@ -56,6 +56,92 @@ sudo systemctl restart docker --- +## Docker Bake (Parallel Builds) + +NoteFlow uses Docker Buildx Bake for efficient parallel builds. Configuration is in `docker-bake.hcl`. + +### Available Targets + +| Target | Description | Platform | +|--------|-------------|----------| +| `server` | CPU-only gRPC server | linux/amd64, linux/arm64 | +| `server-full` | CPU server with all extras | linux/amd64, linux/arm64 | +| `server-gpu` | NVIDIA CUDA GPU server | linux/amd64 | +| `server-gpu-full` | GPU server with all extras | linux/amd64 | +| `client-build` | Tauri client build | linux/amd64 | +| `client-dev` | Client development env | linux/amd64 | + +### Build Groups (Parallel) + +| Group | Targets | Use Case | +|-------|---------|----------| +| `default` | server | Quick dev build | +| `servers` | server, server-gpu | Both CPU/GPU variants | +| `servers-full` | All server variants | Full production build | +| `all` | Everything | Complete rebuild | +| `ci` | server, server-gpu, client-build | CI/CD pipeline | + +### Usage Examples + +```bash +# Build default (CPU server) +docker buildx bake + +# Build GPU server only +docker buildx bake server-gpu + +# Build CPU and GPU servers in parallel +docker buildx bake servers + +# Build all targets in parallel +docker buildx bake all + +# Show build plan without building +docker buildx bake --print servers + +# Build with custom registry and tag +docker buildx bake --set "*.tags=myregistry.io/noteflow:v1.0" servers + +# Build and push to registry +docker buildx bake --push servers + +# Use GitHub Actions cache (in CI) +docker buildx bake server-ci server-gpu-ci +``` + +### Variables + +Override at build time with `--set`: + +```bash +# Use different CUDA version +docker buildx bake --set server-gpu.args.CUDA_VERSION=12.5.0 server-gpu + +# Use custom registry +docker buildx bake --set "*.tags=ghcr.io/myorg/noteflow:sha-abc123" all +``` + +| Variable | Default | Description | +|----------|---------|-------------| +| `REGISTRY` | (none) | Container registry prefix | +| `TAG` | latest | Image tag | +| `PYTHON_VERSION` | 3.12 | Python version | +| `CUDA_VERSION` | 12.4.1 | CUDA version for GPU builds | + +### Integration with Compose + +After building with bake, use pre-built images in compose: + +```bash +# Build images +docker buildx bake servers + +# Run with pre-built images (no --build needed) +docker compose --profile server-gpu --profile infra up -d +``` + +--- + ## Rule: Minimal Base Images **Level**: `strict` diff --git a/docker/entrypoint-gpu.sh b/docker/entrypoint-gpu.sh new file mode 100644 index 0000000..d852d4c --- /dev/null +++ b/docker/entrypoint-gpu.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# GPU entrypoint script +# Sets LD_LIBRARY_PATH to prioritize PyTorch's bundled cuDNN 9.8.0 over system cuDNN 9.1.0 + +# PyTorch bundles cuDNN 9.8.0 in its site-packages +# We must add these paths FIRST to override system cuDNN 9.1.0 +PYTORCH_NVIDIA_LIBS="/workspace/.venv/lib/python3.12/site-packages/nvidia" + +export LD_LIBRARY_PATH="${PYTORCH_NVIDIA_LIBS}/cudnn/lib:${PYTORCH_NVIDIA_LIBS}/cublas/lib:${PYTORCH_NVIDIA_LIBS}/cuda_runtime/lib:${PYTORCH_NVIDIA_LIBS}/cufft/lib:${PYTORCH_NVIDIA_LIBS}/cusolver/lib:${PYTORCH_NVIDIA_LIBS}/cusparse/lib:${PYTORCH_NVIDIA_LIBS}/nccl/lib:${PYTORCH_NVIDIA_LIBS}/nvtx/lib:/usr/local/cuda/lib64" + +echo "=== GPU Entrypoint ===" +echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" +echo "Checking cuDNN libraries..." +ls -la "${PYTORCH_NVIDIA_LIBS}/cudnn/lib/" 2>/dev/null | head -5 || echo "cuDNN libs not found (will be installed on first run)" +echo "======================" + +# Run uv sync to ensure dependencies are installed +uv sync --frozen --group dev --all-extras + +# Execute the command passed to docker run +exec "$@" diff --git a/docker/server-gpu.Dockerfile b/docker/server-gpu.Dockerfile index 7f4624f..4e6bc9a 100644 --- a/docker/server-gpu.Dockerfile +++ b/docker/server-gpu.Dockerfile @@ -3,19 +3,44 @@ # Use this for systems with NVIDIA GPUs # ============================================================================= -# Base Stage - CUDA-enabled Python using official PyTorch CUDA image +# Python Stage - Get Python 3.12 from official image # ============================================================================= -# Using Python 3.12 bookworm with CUDA support via PyTorch's approach -FROM python:3.12-bookworm AS base +FROM python:3.12-slim-bookworm AS python-base -# CUDA environment variables (for runtime GPU detection) +# ============================================================================= +# Base Stage - NVIDIA CUDA with cuDNN for GPU-accelerated inference +# ============================================================================= +# Using NVIDIA's official CUDA image with cuDNN 9.x for CTranslate2/faster-whisper +# The runtime image includes cuDNN libraries required for GPU inference +FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base + +# CUDA/cuDNN environment variables +# NOTE: PyTorch bundles cuDNN 9.8.0, but system has 9.1.0 +# We set LD_LIBRARY_PATH at runtime to prioritize PyTorch's bundled cuDNN ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ UV_COMPILE_BYTECODE=1 \ UV_LINK_MODE=copy \ # CUDA environment - these tell nvidia-container-runtime to inject GPU NVIDIA_VISIBLE_DEVICES=all \ - NVIDIA_DRIVER_CAPABILITIES=compute,utility + NVIDIA_DRIVER_CAPABILITIES=compute,utility \ + # Base CUDA path (cuDNN paths added at runtime to use PyTorch's bundled version) + LD_LIBRARY_PATH=/usr/local/cuda/lib64 \ + # Python path configuration + PATH=/usr/local/bin:$PATH + +# Copy Python 3.12 from official image (avoids PPA network issues) +COPY --from=python-base /usr/local/bin/python3.12 /usr/local/bin/python3.12 +COPY --from=python-base /usr/local/bin/python3 /usr/local/bin/python3 +COPY --from=python-base /usr/local/bin/pip3 /usr/local/bin/pip3 +COPY --from=python-base /usr/local/lib/python3.12 /usr/local/lib/python3.12 +COPY --from=python-base /usr/local/include/python3.12 /usr/local/include/python3.12 +COPY --from=python-base /usr/local/lib/libpython3.12.so* /usr/local/lib/ + +# Create symlinks for python/pip commands +RUN ln -sf /usr/local/bin/python3.12 /usr/local/bin/python \ + && ln -sf /usr/local/bin/pip3 /usr/local/bin/pip \ + && ldconfig # Install uv and system dependencies COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ @@ -55,12 +80,23 @@ RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl -# Verify CUDA is accessible (will fail build if CUDA libs missing) -RUN python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" || true +# Verify CUDA and cuDNN are accessible +# Note: torch.cuda.is_available() may return False during build (no GPU) +# but cuDNN libraries should be present in the image for runtime +RUN python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'cuDNN version: {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else \"N/A\"}')" || true +# Verify cuDNN shared libraries are present +RUN ldconfig -p | grep -i cudnn || echo "cuDNN libraries will be available at runtime" + +# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly +COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh +RUN chmod +x /usr/local/bin/entrypoint-gpu.sh EXPOSE 50051 -CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"] +# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime +# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0 +ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"] +CMD ["uv", "run", "python", "scripts/dev_watch_server.py"] # ============================================================================= # Server Production Stage - GPU Enabled with all extras @@ -82,6 +118,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl +# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly +COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh +RUN chmod +x /usr/local/bin/entrypoint-gpu.sh + EXPOSE 50051 -CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"] +# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime +# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0 +ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"] +CMD ["uv", "run", "python", "scripts/dev_watch_server.py"] diff --git a/src/noteflow/grpc/server/internal/setup.py b/src/noteflow/grpc/server/internal/setup.py index 14a26be..83e5d9d 100644 --- a/src/noteflow/grpc/server/internal/setup.py +++ b/src/noteflow/grpc/server/internal/setup.py @@ -20,8 +20,16 @@ def create_server() -> grpc.aio.Server: IdentityInterceptor(), ] options = [ + # Message size limits ("grpc.max_send_message_length", 100 * 1024 * 1024), # 100MB ("grpc.max_receive_message_length", 100 * 1024 * 1024), + # HTTP/2 keep-alive settings to match Rust client configuration + # Client sends pings every 30s with 20s timeout; server must respond + ("grpc.keepalive_time_ms", 30_000), # Send ping every 30s + ("grpc.keepalive_timeout_ms", 20_000), # Wait 20s for ping response + ("grpc.keepalive_permit_without_calls", True), # Allow pings on idle + ("grpc.http2.min_recv_ping_interval_without_data_ms", 10_000), # Min 10s between pings + ("grpc.http2.max_pings_without_data", 0), # Unlimited pings without data ] return grpc.aio.server( interceptors=interceptors, diff --git a/src/noteflow/infrastructure/diarization/session.py b/src/noteflow/infrastructure/diarization/session.py index d2915db..e5c9672 100644 --- a/src/noteflow/infrastructure/diarization/session.py +++ b/src/noteflow/infrastructure/diarization/session.py @@ -223,12 +223,13 @@ class DiarizationSession: """ from pyannote.core import SlidingWindow, SlidingWindowFeature - # Reshape to (channels, samples) - pyannote expects channels-first format - audio_2d = audio.reshape(1, -1) + # Reshape to (samples, channels) - pyannote SlidingWindowFeature interprets + # data.shape[0] as number of frames. Using (1, N) caused diart to see 1 frame + # instead of N frames, triggering "Expected 160000 samples but got 1" errors. + audio_2d = audio.reshape(-1, 1) # Configure window for per-sample temporal resolution. # Each row in the data array represents one audio sample with duration 1/sample_rate. - # Using chunk duration here was incorrect and caused frames/weights mismatch warnings. sample_duration = 1.0 / sample_rate window = SlidingWindow(start=0.0, duration=sample_duration, step=sample_duration) return SlidingWindowFeature(audio_2d, window)