diff --git a/compose.yaml b/compose.yaml
index 8d2331e..625655d 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -94,8 +94,10 @@ services:
   # - On macOS: GPU passthrough not supported (use CPU profile)
 
   # CPU-only server (default, cross-platform)
+  # Build: docker buildx bake server
   server:
     container_name: noteflow-server
+    image: noteflow-server:latest
     build:
       context: .
       dockerfile: docker/server.Dockerfile
@@ -131,8 +133,10 @@ services:
       - full
 
   # GPU-enabled server (NVIDIA CUDA)
+  # Build: docker buildx bake server-gpu
   server-gpu:
     container_name: noteflow-server
+    image: noteflow-server-gpu:latest
     build:
       context: .
       dockerfile: docker/server-gpu.Dockerfile
diff --git a/docker-bake.hcl b/docker-bake.hcl
new file mode 100644
index 0000000..b103db0
--- /dev/null
+++ b/docker-bake.hcl
@@ -0,0 +1,278 @@
+# docker-bake.hcl
+# Docker Buildx Bake configuration for NoteFlow
+#
+# Usage:
+#   docker buildx bake                    # Build default targets
+#   docker buildx bake server             # Build CPU server only
+#   docker buildx bake server-gpu         # Build GPU server only
+#   docker buildx bake servers            # Build all server variants (parallel)
+#   docker buildx bake client             # Build client targets
+#   docker buildx bake all                # Build everything
+#   docker buildx bake --print            # Show build plan without building
+#
+# With specific options:
+#   docker buildx bake server --set server.tags=myregistry/noteflow:v1.0
+#   docker buildx bake --push all         # Build and push all images
+
+# =============================================================================
+# Variables
+# =============================================================================
+
+variable "REGISTRY" {
+  default = ""
+}
+
+variable "IMAGE_PREFIX" {
+  default = "noteflow"
+}
+
+variable "TAG" {
+  default = "latest"
+}
+
+variable "PYTHON_VERSION" {
+  default = "3.12"
+}
+
+variable "CUDA_VERSION" {
+  default = "12.4.1"
+}
+
+variable "SPACY_MODEL_URL" {
+  default = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl"
+}
+
+# =============================================================================
+# Functions
+# =============================================================================
+
+function "tag" {
+  params = [name]
+  result = REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-${name}:${TAG}" : "${IMAGE_PREFIX}-${name}:${TAG}"
+}
+
+function "tags" {
+  params = [name]
+  result = [
+    tag(name),
+    REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-${name}:latest" : "${IMAGE_PREFIX}-${name}:latest"
+  ]
+}
+
+# =============================================================================
+# Groups - Enable parallel builds
+# =============================================================================
+
+group "default" {
+  targets = ["server"]
+}
+
+group "servers" {
+  targets = ["server", "server-gpu"]
+}
+
+group "servers-full" {
+  targets = ["server", "server-full", "server-gpu", "server-gpu-full"]
+}
+
+group "client" {
+  targets = ["client-build", "client-dev"]
+}
+
+group "all" {
+  targets = ["server", "server-full", "server-gpu", "server-gpu-full", "client-build"]
+}
+
+group "ci" {
+  targets = ["server", "server-gpu", "client-build"]
+}
+
+# =============================================================================
+# Base Targets (inherited)
+# =============================================================================
+
+target "_common" {
+  context = "."
+  labels = {
+    "org.opencontainers.image.source" = "https://github.com/noteflow/noteflow"
+    "org.opencontainers.image.vendor" = "NoteFlow"
+  }
+}
+
+target "_server-common" {
+  inherits = ["_common"]
+  dockerfile = "docker/server.Dockerfile"
+  args = {
+    PYTHON_VERSION = PYTHON_VERSION
+    SPACY_MODEL_URL = SPACY_MODEL_URL
+  }
+  cache-from = [
+    "type=registry,ref=${REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-server:cache" : "${IMAGE_PREFIX}-server:cache"}"
+  ]
+  cache-to = [
+    "type=inline"
+  ]
+}
+
+target "_server-gpu-common" {
+  inherits = ["_common"]
+  dockerfile = "docker/server-gpu.Dockerfile"
+  args = {
+    PYTHON_VERSION = PYTHON_VERSION
+    CUDA_VERSION = CUDA_VERSION
+    SPACY_MODEL_URL = SPACY_MODEL_URL
+  }
+  cache-from = [
+    "type=registry,ref=${REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-server-gpu:cache" : "${IMAGE_PREFIX}-server-gpu:cache"}"
+  ]
+  cache-to = [
+    "type=inline"
+  ]
+}
+
+target "_client-common" {
+  inherits = ["_common"]
+  dockerfile = "docker/client.Dockerfile"
+  cache-from = [
+    "type=registry,ref=${REGISTRY != "" ? "${REGISTRY}/${IMAGE_PREFIX}-client:cache" : "${IMAGE_PREFIX}-client:cache"}"
+  ]
+  cache-to = [
+    "type=inline"
+  ]
+}
+
+# =============================================================================
+# Server Targets (CPU)
+# =============================================================================
+
+target "server" {
+  inherits = ["_server-common"]
+  target = "server"
+  tags = tags("server")
+  labels = {
+    "org.opencontainers.image.title" = "NoteFlow Server (CPU)"
+    "org.opencontainers.image.description" = "NoteFlow gRPC server - CPU-only build"
+  }
+}
+
+target "server-full" {
+  inherits = ["_server-common"]
+  target = "server-full"
+  tags = tags("server-full")
+  labels = {
+    "org.opencontainers.image.title" = "NoteFlow Server Full (CPU)"
+    "org.opencontainers.image.description" = "NoteFlow gRPC server with all extras - CPU-only build"
+  }
+}
+
+target "server-dev" {
+  inherits = ["_server-common"]
+  target = "dev"
+  tags = tags("server-dev")
+  labels = {
+    "org.opencontainers.image.title" = "NoteFlow Server Dev (CPU)"
+    "org.opencontainers.image.description" = "NoteFlow development server - CPU-only build"
+  }
+}
+
+target "server-ner" {
+  inherits = ["_server-common"]
+  target = "with-ner"
+  tags = tags("server-ner")
+  labels = {
+    "org.opencontainers.image.title" = "NoteFlow Server with NER (CPU)"
+    "org.opencontainers.image.description" = "NoteFlow server with spaCy NER - CPU-only build"
+  }
+}
+
+# =============================================================================
+# Server Targets (GPU - NVIDIA CUDA)
+# =============================================================================
+
+target "server-gpu" {
+  inherits = ["_server-gpu-common"]
+  target = "server"
+  tags = tags("server-gpu")
+  platforms = ["linux/amd64"]  # GPU images are x86_64 only
+  labels = {
+    "org.opencontainers.image.title" = "NoteFlow Server (GPU)"
+    "org.opencontainers.image.description" = "NoteFlow gRPC server - NVIDIA CUDA GPU build"
+    "ai.noteflow.cuda.version" = CUDA_VERSION
+  }
+}
+
+target "server-gpu-full" {
+  inherits = ["_server-gpu-common"]
+  target = "server-full"
+  tags = tags("server-gpu-full")
+  platforms = ["linux/amd64"]
+  labels = {
+    "org.opencontainers.image.title" = "NoteFlow Server Full (GPU)"
+    "org.opencontainers.image.description" = "NoteFlow gRPC server with all extras - NVIDIA CUDA GPU build"
+    "ai.noteflow.cuda.version" = CUDA_VERSION
+  }
+}
+
+# =============================================================================
+# Client Targets
+# =============================================================================
+
+target "client-build" {
+  inherits = ["_client-common"]
+  target = "client-build"
+  tags = tags("client")
+  labels = {
+    "org.opencontainers.image.title" = "NoteFlow Client Build"
+    "org.opencontainers.image.description" = "NoteFlow Tauri desktop client build"
+  }
+}
+
+target "client-dev" {
+  inherits = ["_client-common"]
+  target = "client-dev"
+  tags = tags("client-dev")
+  labels = {
+    "org.opencontainers.image.title" = "NoteFlow Client Dev"
+    "org.opencontainers.image.description" = "NoteFlow Tauri client development environment"
+  }
+}
+
+# =============================================================================
+# Multi-Platform Targets (for CPU images)
+# =============================================================================
+
+target "server-multiplatform" {
+  inherits = ["server"]
+  platforms = ["linux/amd64", "linux/arm64"]
+  tags = tags("server-multiplatform")
+}
+
+target "server-full-multiplatform" {
+  inherits = ["server-full"]
+  platforms = ["linux/amd64", "linux/arm64"]
+  tags = tags("server-full-multiplatform")
+}
+
+# =============================================================================
+# CI/CD Specific Targets
+# =============================================================================
+
+target "server-ci" {
+  inherits = ["server"]
+  cache-from = [
+    "type=gha"
+  ]
+  cache-to = [
+    "type=gha,mode=max"
+  ]
+}
+
+target "server-gpu-ci" {
+  inherits = ["server-gpu"]
+  cache-from = [
+    "type=gha"
+  ]
+  cache-to = [
+    "type=gha,mode=max"
+  ]
+}
diff --git a/docker/CLAUDE.md b/docker/CLAUDE.md
index 2b1298f..539c306 100644
--- a/docker/CLAUDE.md
+++ b/docker/CLAUDE.md
@@ -56,6 +56,92 @@ sudo systemctl restart docker
 
 ---
 
+## Docker Bake (Parallel Builds)
+
+NoteFlow uses Docker Buildx Bake for efficient parallel builds. Configuration is in `docker-bake.hcl`.
+
+### Available Targets
+
+| Target | Description | Platform |
+|--------|-------------|----------|
+| `server` | CPU-only gRPC server | linux/amd64, linux/arm64 |
+| `server-full` | CPU server with all extras | linux/amd64, linux/arm64 |
+| `server-gpu` | NVIDIA CUDA GPU server | linux/amd64 |
+| `server-gpu-full` | GPU server with all extras | linux/amd64 |
+| `client-build` | Tauri client build | linux/amd64 |
+| `client-dev` | Client development env | linux/amd64 |
+
+### Build Groups (Parallel)
+
+| Group | Targets | Use Case |
+|-------|---------|----------|
+| `default` | server | Quick dev build |
+| `servers` | server, server-gpu | Both CPU/GPU variants |
+| `servers-full` | All server variants | Full production build |
+| `all` | Everything | Complete rebuild |
+| `ci` | server, server-gpu, client-build | CI/CD pipeline |
+
+### Usage Examples
+
+```bash
+# Build default (CPU server)
+docker buildx bake
+
+# Build GPU server only
+docker buildx bake server-gpu
+
+# Build CPU and GPU servers in parallel
+docker buildx bake servers
+
+# Build all targets in parallel
+docker buildx bake all
+
+# Show build plan without building
+docker buildx bake --print servers
+
+# Build with custom registry and tag
+docker buildx bake --set "*.tags=myregistry.io/noteflow:v1.0" servers
+
+# Build and push to registry
+docker buildx bake --push servers
+
+# Use GitHub Actions cache (in CI)
+docker buildx bake server-ci server-gpu-ci
+```
+
+### Variables
+
+Override at build time with `--set`:
+
+```bash
+# Use different CUDA version
+docker buildx bake --set server-gpu.args.CUDA_VERSION=12.5.0 server-gpu
+
+# Use custom registry
+docker buildx bake --set "*.tags=ghcr.io/myorg/noteflow:sha-abc123" all
+```
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `REGISTRY` | (none) | Container registry prefix |
+| `TAG` | latest | Image tag |
+| `PYTHON_VERSION` | 3.12 | Python version |
+| `CUDA_VERSION` | 12.4.1 | CUDA version for GPU builds |
+
+### Integration with Compose
+
+After building with bake, use pre-built images in compose:
+
+```bash
+# Build images
+docker buildx bake servers
+
+# Run with pre-built images (no --build needed)
+docker compose --profile server-gpu --profile infra up -d
+```
+
+---
+
 ## Rule: Minimal Base Images
 
 **Level**: `strict`
diff --git a/docker/entrypoint-gpu.sh b/docker/entrypoint-gpu.sh
new file mode 100644
index 0000000..d852d4c
--- /dev/null
+++ b/docker/entrypoint-gpu.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# GPU entrypoint script
+# Sets LD_LIBRARY_PATH to prioritize PyTorch's bundled cuDNN 9.8.0 over system cuDNN 9.1.0
+
+# PyTorch bundles cuDNN 9.8.0 in its site-packages
+# We must add these paths FIRST to override system cuDNN 9.1.0
+PYTORCH_NVIDIA_LIBS="/workspace/.venv/lib/python3.12/site-packages/nvidia"
+
+export LD_LIBRARY_PATH="${PYTORCH_NVIDIA_LIBS}/cudnn/lib:${PYTORCH_NVIDIA_LIBS}/cublas/lib:${PYTORCH_NVIDIA_LIBS}/cuda_runtime/lib:${PYTORCH_NVIDIA_LIBS}/cufft/lib:${PYTORCH_NVIDIA_LIBS}/cusolver/lib:${PYTORCH_NVIDIA_LIBS}/cusparse/lib:${PYTORCH_NVIDIA_LIBS}/nccl/lib:${PYTORCH_NVIDIA_LIBS}/nvtx/lib:/usr/local/cuda/lib64"
+
+echo "=== GPU Entrypoint ==="
+echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
+echo "Checking cuDNN libraries..."
+ls -la "${PYTORCH_NVIDIA_LIBS}/cudnn/lib/" 2>/dev/null | head -5 || echo "cuDNN libs not found (will be installed on first run)"
+echo "======================"
+
+# Run uv sync to ensure dependencies are installed
+uv sync --frozen --group dev --all-extras
+
+# Execute the command passed to docker run
+exec "$@"
diff --git a/docker/server-gpu.Dockerfile b/docker/server-gpu.Dockerfile
index 7f4624f..4e6bc9a 100644
--- a/docker/server-gpu.Dockerfile
+++ b/docker/server-gpu.Dockerfile
@@ -3,19 +3,44 @@
 # Use this for systems with NVIDIA GPUs
 
 # =============================================================================
-# Base Stage - CUDA-enabled Python using official PyTorch CUDA image
+# Python Stage - Get Python 3.12 from official image
 # =============================================================================
-# Using Python 3.12 bookworm with CUDA support via PyTorch's approach
-FROM python:3.12-bookworm AS base
+FROM python:3.12-slim-bookworm AS python-base
 
-# CUDA environment variables (for runtime GPU detection)
+# =============================================================================
+# Base Stage - NVIDIA CUDA with cuDNN for GPU-accelerated inference
+# =============================================================================
+# Using NVIDIA's official CUDA image with cuDNN 9.x for CTranslate2/faster-whisper
+# The runtime image includes cuDNN libraries required for GPU inference
+FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
+
+# CUDA/cuDNN environment variables
+# NOTE: PyTorch bundles cuDNN 9.8.0, but system has 9.1.0
+# We set LD_LIBRARY_PATH at runtime to prioritize PyTorch's bundled cuDNN
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     UV_COMPILE_BYTECODE=1 \
     UV_LINK_MODE=copy \
     # CUDA environment - these tell nvidia-container-runtime to inject GPU
     NVIDIA_VISIBLE_DEVICES=all \
-    NVIDIA_DRIVER_CAPABILITIES=compute,utility
+    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
+    # Base CUDA path (cuDNN paths added at runtime to use PyTorch's bundled version)
+    LD_LIBRARY_PATH=/usr/local/cuda/lib64 \
+    # Python path configuration
+    PATH=/usr/local/bin:$PATH
+
+# Copy Python 3.12 from official image (avoids PPA network issues)
+COPY --from=python-base /usr/local/bin/python3.12 /usr/local/bin/python3.12
+COPY --from=python-base /usr/local/bin/python3 /usr/local/bin/python3
+COPY --from=python-base /usr/local/bin/pip3 /usr/local/bin/pip3
+COPY --from=python-base /usr/local/lib/python3.12 /usr/local/lib/python3.12
+COPY --from=python-base /usr/local/include/python3.12 /usr/local/include/python3.12
+COPY --from=python-base /usr/local/lib/libpython3.12.so* /usr/local/lib/
+
+# Create symlinks for python/pip commands
+RUN ln -sf /usr/local/bin/python3.12 /usr/local/bin/python \
+    && ln -sf /usr/local/bin/pip3 /usr/local/bin/pip \
+    && ldconfig
 
 # Install uv and system dependencies
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
@@ -55,12 +80,23 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
 
-# Verify CUDA is accessible (will fail build if CUDA libs missing)
-RUN python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" || true
+# Verify CUDA and cuDNN are accessible
+# Note: torch.cuda.is_available() may return False during build (no GPU)
+# but cuDNN libraries should be present in the image for runtime
+RUN python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'cuDNN version: {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else \"N/A\"}')" || true
+# Verify cuDNN shared libraries are present
+RUN ldconfig -p | grep -i cudnn || echo "cuDNN libraries will be available at runtime"
+
+# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly
+COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
+RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
 
 EXPOSE 50051
 
-CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"]
+# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime
+# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0
+ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
+CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]
 
 # =============================================================================
 # Server Production Stage - GPU Enabled with all extras
@@ -82,6 +118,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
 
+# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly
+COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
+RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
+
 EXPOSE 50051
 
-CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"]
+# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime
+# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0
+ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
+CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]
diff --git a/src/noteflow/grpc/server/internal/setup.py b/src/noteflow/grpc/server/internal/setup.py
index 14a26be..83e5d9d 100644
--- a/src/noteflow/grpc/server/internal/setup.py
+++ b/src/noteflow/grpc/server/internal/setup.py
@@ -20,8 +20,16 @@ def create_server() -> grpc.aio.Server:
         IdentityInterceptor(),
     ]
     options = [
+        # Message size limits
         ("grpc.max_send_message_length", 100 * 1024 * 1024),  # 100MB
         ("grpc.max_receive_message_length", 100 * 1024 * 1024),
+        # HTTP/2 keep-alive settings to match Rust client configuration
+        # Client sends pings every 30s with 20s timeout; server must respond
+        ("grpc.keepalive_time_ms", 30_000),  # Send ping every 30s
+        ("grpc.keepalive_timeout_ms", 20_000),  # Wait 20s for ping response
+        ("grpc.keepalive_permit_without_calls", True),  # Allow pings on idle
+        ("grpc.http2.min_recv_ping_interval_without_data_ms", 10_000),  # Min 10s between pings
+        ("grpc.http2.max_pings_without_data", 0),  # Unlimited pings without data
     ]
     return grpc.aio.server(
         interceptors=interceptors,
diff --git a/src/noteflow/infrastructure/diarization/session.py b/src/noteflow/infrastructure/diarization/session.py
index d2915db..e5c9672 100644
--- a/src/noteflow/infrastructure/diarization/session.py
+++ b/src/noteflow/infrastructure/diarization/session.py
@@ -223,12 +223,13 @@ class DiarizationSession:
         """
         from pyannote.core import SlidingWindow, SlidingWindowFeature
 
-        # Reshape to (channels, samples) - pyannote expects channels-first format
-        audio_2d = audio.reshape(1, -1)
+        # Reshape to (samples, channels) - pyannote SlidingWindowFeature interprets
+        # data.shape[0] as number of frames. Using (1, N) caused diart to see 1 frame
+        # instead of N frames, triggering "Expected 160000 samples but got 1" errors.
+        audio_2d = audio.reshape(-1, 1)
 
         # Configure window for per-sample temporal resolution.
         # Each row in the data array represents one audio sample with duration 1/sample_rate.
-        # Using chunk duration here was incorrect and caused frames/weights mismatch warnings.
         sample_duration = 1.0 / sample_rate
         window = SlidingWindow(start=0.0, duration=sample_duration, step=sample_duration)
         return SlidingWindowFeature(audio_2d, window)