..
This commit is contained in:
@@ -56,6 +56,92 @@ sudo systemctl restart docker
|
||||
|
||||
---
|
||||
|
||||
## Docker Bake (Parallel Builds)
|
||||
|
||||
NoteFlow uses Docker Buildx Bake for efficient parallel builds. Configuration is in `docker-bake.hcl`.
|
||||
|
||||
### Available Targets
|
||||
|
||||
| Target | Description | Platform |
|
||||
|--------|-------------|----------|
|
||||
| `server` | CPU-only gRPC server | linux/amd64, linux/arm64 |
|
||||
| `server-full` | CPU server with all extras | linux/amd64, linux/arm64 |
|
||||
| `server-gpu` | NVIDIA CUDA GPU server | linux/amd64 |
|
||||
| `server-gpu-full` | GPU server with all extras | linux/amd64 |
|
||||
| `client-build` | Tauri client build | linux/amd64 |
|
||||
| `client-dev` | Client development env | linux/amd64 |
|
||||
|
||||
### Build Groups (Parallel)
|
||||
|
||||
| Group | Targets | Use Case |
|
||||
|-------|---------|----------|
|
||||
| `default` | server | Quick dev build |
|
||||
| `servers` | server, server-gpu | Both CPU/GPU variants |
|
||||
| `servers-full` | All server variants | Full production build |
|
||||
| `all` | Everything | Complete rebuild |
|
||||
| `ci` | server, server-gpu, client-build | CI/CD pipeline |
|
||||
|
||||
### Usage Examples
|
||||
|
||||
```bash
|
||||
# Build default (CPU server)
|
||||
docker buildx bake
|
||||
|
||||
# Build GPU server only
|
||||
docker buildx bake server-gpu
|
||||
|
||||
# Build CPU and GPU servers in parallel
|
||||
docker buildx bake servers
|
||||
|
||||
# Build all targets in parallel
|
||||
docker buildx bake all
|
||||
|
||||
# Show build plan without building
|
||||
docker buildx bake --print servers
|
||||
|
||||
# Build with custom registry and tag
|
||||
docker buildx bake --set "*.tags=myregistry.io/noteflow:v1.0" servers
|
||||
|
||||
# Build and push to registry
|
||||
docker buildx bake --push servers
|
||||
|
||||
# Use GitHub Actions cache (in CI)
|
||||
docker buildx bake server-ci server-gpu-ci
|
||||
```
|
||||
|
||||
### Variables
|
||||
|
||||
Override at build time with `--set`:
|
||||
|
||||
```bash
|
||||
# Use different CUDA version
|
||||
docker buildx bake --set server-gpu.args.CUDA_VERSION=12.5.0 server-gpu
|
||||
|
||||
# Use custom registry
|
||||
docker buildx bake --set "*.tags=ghcr.io/myorg/noteflow:sha-abc123" all
|
||||
```
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `REGISTRY` | (none) | Container registry prefix |
|
||||
| `TAG` | latest | Image tag |
|
||||
| `PYTHON_VERSION` | 3.12 | Python version |
|
||||
| `CUDA_VERSION` | 12.4.1 | CUDA version for GPU builds |
|
||||
|
||||
### Integration with Compose
|
||||
|
||||
After building with bake, use pre-built images in compose:
|
||||
|
||||
```bash
|
||||
# Build images
|
||||
docker buildx bake servers
|
||||
|
||||
# Run with pre-built images (no --build needed)
|
||||
docker compose --profile server-gpu --profile infra up -d
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rule: Minimal Base Images
|
||||
|
||||
**Level**: `strict`
|
||||
|
||||
21
docker/entrypoint-gpu.sh
Normal file
21
docker/entrypoint-gpu.sh
Normal file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
# GPU entrypoint script
|
||||
# Sets LD_LIBRARY_PATH to prioritize PyTorch's bundled cuDNN 9.8.0 over system cuDNN 9.1.0
|
||||
|
||||
# PyTorch bundles cuDNN 9.8.0 in its site-packages
|
||||
# We must add these paths FIRST to override system cuDNN 9.1.0
|
||||
PYTORCH_NVIDIA_LIBS="/workspace/.venv/lib/python3.12/site-packages/nvidia"
|
||||
|
||||
export LD_LIBRARY_PATH="${PYTORCH_NVIDIA_LIBS}/cudnn/lib:${PYTORCH_NVIDIA_LIBS}/cublas/lib:${PYTORCH_NVIDIA_LIBS}/cuda_runtime/lib:${PYTORCH_NVIDIA_LIBS}/cufft/lib:${PYTORCH_NVIDIA_LIBS}/cusolver/lib:${PYTORCH_NVIDIA_LIBS}/cusparse/lib:${PYTORCH_NVIDIA_LIBS}/nccl/lib:${PYTORCH_NVIDIA_LIBS}/nvtx/lib:/usr/local/cuda/lib64"
|
||||
|
||||
echo "=== GPU Entrypoint ==="
|
||||
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
|
||||
echo "Checking cuDNN libraries..."
|
||||
ls -la "${PYTORCH_NVIDIA_LIBS}/cudnn/lib/" 2>/dev/null | head -5 || echo "cuDNN libs not found (will be installed on first run)"
|
||||
echo "======================"
|
||||
|
||||
# Run uv sync to ensure dependencies are installed
|
||||
uv sync --frozen --group dev --all-extras
|
||||
|
||||
# Execute the command passed to docker run
|
||||
exec "$@"
|
||||
@@ -3,19 +3,44 @@
|
||||
# Use this for systems with NVIDIA GPUs
|
||||
|
||||
# =============================================================================
|
||||
# Base Stage - CUDA-enabled Python using official PyTorch CUDA image
|
||||
# Python Stage - Get Python 3.12 from official image
|
||||
# =============================================================================
|
||||
# Using Python 3.12 bookworm with CUDA support via PyTorch's approach
|
||||
FROM python:3.12-bookworm AS base
|
||||
FROM python:3.12-slim-bookworm AS python-base
|
||||
|
||||
# CUDA environment variables (for runtime GPU detection)
|
||||
# =============================================================================
|
||||
# Base Stage - NVIDIA CUDA with cuDNN for GPU-accelerated inference
|
||||
# =============================================================================
|
||||
# Using NVIDIA's official CUDA image with cuDNN 9.x for CTranslate2/faster-whisper
|
||||
# The runtime image includes cuDNN libraries required for GPU inference
|
||||
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
|
||||
|
||||
# CUDA/cuDNN environment variables
|
||||
# NOTE: PyTorch bundles cuDNN 9.8.0, but system has 9.1.0
|
||||
# We set LD_LIBRARY_PATH at runtime to prioritize PyTorch's bundled cuDNN
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
UV_COMPILE_BYTECODE=1 \
|
||||
UV_LINK_MODE=copy \
|
||||
# CUDA environment - these tell nvidia-container-runtime to inject GPU
|
||||
NVIDIA_VISIBLE_DEVICES=all \
|
||||
NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
|
||||
# Base CUDA path (cuDNN paths added at runtime to use PyTorch's bundled version)
|
||||
LD_LIBRARY_PATH=/usr/local/cuda/lib64 \
|
||||
# Python path configuration
|
||||
PATH=/usr/local/bin:$PATH
|
||||
|
||||
# Copy Python 3.12 from official image (avoids PPA network issues)
|
||||
COPY --from=python-base /usr/local/bin/python3.12 /usr/local/bin/python3.12
|
||||
COPY --from=python-base /usr/local/bin/python3 /usr/local/bin/python3
|
||||
COPY --from=python-base /usr/local/bin/pip3 /usr/local/bin/pip3
|
||||
COPY --from=python-base /usr/local/lib/python3.12 /usr/local/lib/python3.12
|
||||
COPY --from=python-base /usr/local/include/python3.12 /usr/local/include/python3.12
|
||||
COPY --from=python-base /usr/local/lib/libpython3.12.so* /usr/local/lib/
|
||||
|
||||
# Create symlinks for python/pip commands
|
||||
RUN ln -sf /usr/local/bin/python3.12 /usr/local/bin/python \
|
||||
&& ln -sf /usr/local/bin/pip3 /usr/local/bin/pip \
|
||||
&& ldconfig
|
||||
|
||||
# Install uv and system dependencies
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||
@@ -55,12 +80,23 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
|
||||
|
||||
# Verify CUDA is accessible (will fail build if CUDA libs missing)
|
||||
RUN python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" || true
|
||||
# Verify CUDA and cuDNN are accessible
|
||||
# Note: torch.cuda.is_available() may return False during build (no GPU)
|
||||
# but cuDNN libraries should be present in the image for runtime
|
||||
RUN python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'cuDNN version: {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else \"N/A\"}')" || true
|
||||
# Verify cuDNN shared libraries are present
|
||||
RUN ldconfig -p | grep -i cudnn || echo "cuDNN libraries will be available at runtime"
|
||||
|
||||
# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly
|
||||
COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
|
||||
RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
|
||||
|
||||
EXPOSE 50051
|
||||
|
||||
CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"]
|
||||
# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime
|
||||
# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0
|
||||
ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
|
||||
CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]
|
||||
|
||||
# =============================================================================
|
||||
# Server Production Stage - GPU Enabled with all extras
|
||||
@@ -82,6 +118,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
|
||||
|
||||
# Copy GPU entrypoint script that sets LD_LIBRARY_PATH correctly
|
||||
COPY docker/entrypoint-gpu.sh /usr/local/bin/entrypoint-gpu.sh
|
||||
RUN chmod +x /usr/local/bin/entrypoint-gpu.sh
|
||||
|
||||
EXPOSE 50051
|
||||
|
||||
CMD ["sh", "-c", "uv sync --frozen --group dev --all-extras && uv run python scripts/dev_watch_server.py"]
|
||||
# Use entrypoint script to set LD_LIBRARY_PATH correctly at runtime
|
||||
# This ensures PyTorch's bundled cuDNN 9.8.0 takes priority over system cuDNN 9.1.0
|
||||
ENTRYPOINT ["/usr/local/bin/entrypoint-gpu.sh"]
|
||||
CMD ["uv", "run", "python", "scripts/dev_watch_server.py"]
|
||||
|
||||
Reference in New Issue
Block a user