chore: configure devcontainer Python venv persistence and normalize package-lock peer dependencies

- Added bind mount for .venv directory in devcontainer to persist Python virtual environment across container rebuilds - Enabled updateRemoteUserUID for proper file permissions in devcontainer - Normalized peer dependency flags in package-lock.json (removed inconsistent "peer": true from core dependencies, added to test-only dependencies) - Added empty codex file placeholder - Created comprehensive
2026-01-17 21:36:22 -05:00
parent c906eea3be
commit d0d4eea847
6 changed files with 2748 additions and 28 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -20,6 +20,10 @@
    "DISPLAY": ":1",
    "XDG_RUNTIME_DIR": "/tmp/runtime-vscode"
  },
+  "mounts": [
+    "source=${localWorkspaceFolder}/.venv,target=${containerWorkspaceFolder}/.venv,type=bind,consistency=cached"
+  ],
+  "updateRemoteUserUID": true,
  "postCreateCommand": ".devcontainer/postCreate.sh",
  "remoteUser": "vscode",
  "customizations": {
--- a/client/package-lock.json
+++ b/client/package-lock.json
@@ -482,7 +482,6 @@
        }
      ],
      "license": "MIT",
-      "peer": true,
      "engines": {
        "node": ">=18"
      },
@@ -524,7 +523,6 @@
        }
      ],
      "license": "MIT",
-      "peer": true,
      "engines": {
        "node": ">=18"
      }
@@ -4531,7 +4529,8 @@
      "version": "5.0.4",
      "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz",
      "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
    },
    "node_modules/@types/chai": {
      "version": "5.2.3",
@@ -4665,7 +4664,6 @@
      "integrity": "sha512-qm+G8HuG6hOHQigsi7VGuLjUVu6TtBo/F05zvX04Mw2uCg9Dv0Qxy3Qw7j41SidlTcl5D/5yg0SEZqOB+EqZnQ==",
      "devOptional": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "undici-types": "~6.21.0"
      }
@@ -4690,7 +4688,6 @@
      "integrity": "sha512-cisd7gxkzjBKU2GgdYrTdtQx1SORymWyaAFhaxQPK9bYO9ot3Y5OikQRvY0VYQtvwjeQnizCINJAenh/V7MK2w==",
      "devOptional": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@types/prop-types": "*",
        "csstype": "^3.2.2"
@@ -4702,7 +4699,6 @@
      "integrity": "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ==",
      "devOptional": true,
      "license": "MIT",
-      "peer": true,
      "peerDependencies": {
        "@types/react": "^18.0.0"
      }
@@ -4811,7 +4807,6 @@
      "integrity": "sha512-npiaib8XzbjtzS2N4HlqPvlpxpmZ14FjSJrteZpPxGUaYPlvhzlzUZ4mZyABo0EFrOWnvyd0Xxroq//hKhtAWg==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@typescript-eslint/scope-manager": "8.53.0",
        "@typescript-eslint/types": "8.53.0",
@@ -5276,7 +5271,6 @@
      "integrity": "sha512-OmwPKV8c5ecLqo+EkytN7oUeYfNmRI4uOXGIR1ybP7AK5Zz+l9R0dGfoadEuwi1aZXAL0vwuhtq3p0OL3dfqHQ==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "engines": {
        "node": ">=18.20.0"
      },
@@ -5331,7 +5325,6 @@
      "integrity": "sha512-HdzDrRs+ywAqbXGKqe1i/bLtCv47plz4TvsHFH3j729OooT5VH38ctFn5aLXgECmiAKDkmH/A6kOq2Zh5DIxww==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "chalk": "^5.1.2",
        "loglevel": "^1.6.0",
@@ -5582,7 +5575,6 @@
      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "bin": {
        "acorn": "bin/acorn"
      },
@@ -6091,7 +6083,6 @@
        }
      ],
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "baseline-browser-mapping": "^2.9.0",
        "caniuse-lite": "^1.0.30001759",
@@ -6852,7 +6843,6 @@
      "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-3.6.0.tgz",
      "integrity": "sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==",
      "license": "MIT",
-      "peer": true,
      "funding": {
        "type": "github",
        "url": "https://github.com/sponsors/kossnocorp"
@@ -6997,7 +6987,8 @@
      "version": "0.5.16",
      "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz",
      "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
    },
    "node_modules/dom-helpers": {
      "version": "5.2.1",
@@ -7208,8 +7199,7 @@
      "version": "8.6.0",
      "resolved": "https://registry.npmjs.org/embla-carousel/-/embla-carousel-8.6.0.tgz",
      "integrity": "sha512-SjWyZBHJPbqxHOzckOfo8lHisEaJWmwd23XppYFYVh10bU66/Pn5tkVkbkCMZVdbUE5eTCI2nD8OyIP4Z+uwkA==",
-      "license": "MIT",
-      "peer": true
+      "license": "MIT"
    },
    "node_modules/embla-carousel-react": {
      "version": "8.6.0",
@@ -7398,7 +7388,6 @@
      "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.8.0",
        "@eslint-community/regexpp": "^4.12.1",
@@ -7735,7 +7724,6 @@
      "integrity": "sha512-gQHqfI6SmtYBIkTeMizpHThdpXh6ej2Hk68oKZneFM6iu99ZGXvOPnmhd8VDus3xOWhVDDdf4sLsMV2/o+X6Yg==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@vitest/snapshot": "^4.0.16",
        "deep-eql": "^5.0.2",
@@ -9133,7 +9121,6 @@
      "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.7.tgz",
      "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==",
      "license": "MIT",
-      "peer": true,
      "bin": {
        "jiti": "bin/jiti.js"
      }
@@ -9162,7 +9149,6 @@
      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-27.4.0.tgz",
      "integrity": "sha512-mjzqwWRD9Y1J1KUi7W97Gja1bwOOM5Ug0EZ6UDK3xS7j7mndrkwozHtSblfomlzyB4NepioNt+B2sOSzczVgtQ==",
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@acemir/cssom": "^0.9.28",
        "@asamuzakjp/dom-selector": "^6.7.6",
@@ -9608,6 +9594,7 @@
      "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz",
      "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==",
      "license": "MIT",
+      "peer": true,
      "bin": {
        "lz-string": "bin/bin.js"
      }
@@ -10572,7 +10559,6 @@
        }
      ],
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "nanoid": "^3.3.11",
        "picocolors": "^1.1.1",
@@ -10739,6 +10725,7 @@
      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz",
      "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "ansi-regex": "^5.0.1",
        "ansi-styles": "^5.0.0",
@@ -10907,7 +10894,6 @@
      "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
      "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "loose-envify": "^1.1.0"
      },
@@ -10934,7 +10920,6 @@
      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
      "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "loose-envify": "^1.1.0",
        "scheduler": "^0.23.2"
@@ -10948,7 +10933,6 @@
      "resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.71.1.tgz",
      "integrity": "sha512-9SUJKCGKo8HUSsCO+y0CtqkqI5nNuaDqTxyqPsZPqIwudpj4rCrAz/jZV+jn57bx5gtZKOh3neQu94DXMc+w5w==",
      "license": "MIT",
-      "peer": true,
      "engines": {
        "node": ">=18.0.0"
      },
@@ -10964,7 +10948,8 @@
      "version": "17.0.2",
      "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz",
      "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
    },
    "node_modules/react-remove-scroll": {
      "version": "2.7.2",
@@ -12204,7 +12189,6 @@
      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.19.tgz",
      "integrity": "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==",
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@alloc/quick-lru": "^5.2.0",
        "arg": "^5.0.2",
@@ -13056,7 +13040,6 @@
      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
      "dev": true,
      "license": "Apache-2.0",
-      "peer": true,
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
@@ -13286,7 +13269,6 @@
      "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz",
      "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==",
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "esbuild": "^0.27.0",
        "fdir": "^6.5.0",
@@ -14047,7 +14029,6 @@
      "integrity": "sha512-Y5y4jpwHvuduUfup+gXTuCU6AROn/k6qOba3st0laFluKHY+q5SHOpQAJdS8acYLwE8caDQ2dXJhmXyxuJrm0Q==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@types/node": "^20.11.30",
        "@types/sinonjs__fake-timers": "^8.1.5",
--- a/docs/sprints/phase-5-evolution/sprint-18.5-rocm-support/ARCHITECTURE.md
+++ b/docs/sprints/phase-5-evolution/sprint-18.5-rocm-support/ARCHITECTURE.md
--- a/docs/sprints/phase-5-evolution/sprint-18.5-rocm-support/IMPLEMENTATION_CHECKLIST.md
+++ b/docs/sprints/phase-5-evolution/sprint-18.5-rocm-support/IMPLEMENTATION_CHECKLIST.md
@@ -0,0 +1,282 @@
+# ROCm Support Implementation Checklist
+
+This checklist tracks the implementation progress for Sprint 18.5.
+
+---
+
+## Phase 1: Device Abstraction Layer
+
+### 1.1 GPU Detection Module
+
+- [ ] Create `src/noteflow/infrastructure/gpu/__init__.py`
+- [ ] Create `src/noteflow/infrastructure/gpu/detection.py`
+  - [ ] Implement `GpuBackend` enum (NONE, CUDA, ROCM, MPS)
+  - [ ] Implement `GpuInfo` dataclass
+  - [ ] Implement `detect_gpu_backend()` function
+  - [ ] Implement `get_gpu_info()` function
+  - [ ] Add ROCm version detection via `torch.version.hip`
+- [ ] Create `tests/infrastructure/gpu/test_detection.py`
+  - [ ] Test no-torch case
+  - [ ] Test CUDA detection
+  - [ ] Test ROCm detection (HIP check)
+  - [ ] Test MPS detection
+  - [ ] Test CPU fallback
+
+### 1.2 Domain Types
+
+- [ ] Create `src/noteflow/domain/ports/gpu.py`
+  - [ ] Export `GpuBackend` enum
+  - [ ] Export `GpuInfo` type
+  - [ ] Define `GpuDetectionProtocol`
+
+### 1.3 ASR Device Types
+
+- [ ] Update `src/noteflow/application/services/asr_config/types.py`
+  - [ ] Add `ROCM = "rocm"` to `AsrDevice` enum
+  - [ ] Add ROCm entry to `DEVICE_COMPUTE_TYPES` mapping
+  - [ ] Update `AsrCapabilities` dataclass with `rocm_available` and `gpu_backend` fields
+
+### 1.4 Diarization Device Mixin
+
+- [ ] Update `src/noteflow/infrastructure/diarization/engine/_device_mixin.py`
+  - [ ] Add ROCm detection in `_detect_available_device()`
+  - [ ] Maintain backward compatibility with "cuda" device string
+
+### 1.5 System Metrics
+
+- [ ] Update `src/noteflow/infrastructure/metrics/system_resources.py`
+  - [ ] Handle ROCm VRAM queries (same API as CUDA via HIP)
+  - [ ] Add `gpu_backend` field to metrics
+
+### 1.6 gRPC Proto
+
+- [ ] Update `src/noteflow/grpc/proto/noteflow.proto`
+  - [ ] Add `ASR_DEVICE_ROCM = 3` to `AsrDevice` enum
+  - [ ] Add `rocm_available` field to `AsrConfiguration`
+  - [ ] Add `gpu_backend` field to `AsrConfiguration`
+- [ ] Regenerate Python stubs
+- [ ] Run `scripts/patch_grpc_stubs.py`
+
+### 1.7 Phase 1 Tests
+
+- [ ] Run `pytest tests/infrastructure/gpu/`
+- [ ] Run `make quality-py`
+- [ ] Verify no regressions in CUDA detection
+
+---
+
+## Phase 2: ASR Engine Protocol
+
+### 2.1 Engine Protocol Definition
+
+- [ ] Extend `src/noteflow/infrastructure/asr/protocols.py` (or relocate to `domain/ports`)
+  - [ ] Reuse `AsrResult` / `WordTiming` from `infrastructure/asr/dto.py`
+  - [ ] Add `device` property (logical device: cpu/cuda/rocm)
+  - [ ] Add `compute_type` property
+  - [ ] Confirm `model_size` + `is_loaded` already covered
+  - [ ] Add optional `transcribe_file()` helper (if needed)
+
+### 2.2 Refactor FasterWhisperEngine
+
+- [ ] Update `src/noteflow/infrastructure/asr/engine.py`
+  - [ ] Ensure compliance with `AsrEngine`
+  - [ ] Add explicit type annotations
+  - [ ] Document as CUDA/CPU backend
+- [ ] Create `tests/infrastructure/asr/test_protocol_compliance.py`
+  - [ ] Verify `FasterWhisperEngine` implements protocol
+
+### 2.3 PyTorch Whisper Engine (Fallback)
+
+- [ ] Create `src/noteflow/infrastructure/asr/pytorch_engine.py`
+  - [ ] Implement `WhisperPyTorchEngine` class
+  - [ ] Implement all protocol methods
+  - [ ] Handle device placement (cuda/rocm/cpu)
+  - [ ] Support all compute types
+- [ ] Create `tests/infrastructure/asr/test_pytorch_engine.py`
+  - [ ] Test model loading
+  - [ ] Test transcription
+  - [ ] Test device handling
+
+### 2.4 Engine Factory
+
+- [ ] Create `src/noteflow/infrastructure/asr/factory.py`
+  - [ ] Implement `create_asr_engine()` function
+  - [ ] Implement `_resolve_device()` helper
+  - [ ] Implement `_create_cpu_engine()` helper
+  - [ ] Implement `_create_cuda_engine()` helper
+  - [ ] Implement `_create_rocm_engine()` helper
+  - [ ] Define `EngineCreationError` exception
+- [ ] Create `tests/infrastructure/asr/test_factory.py`
+  - [ ] Test auto device resolution
+  - [ ] Test explicit device selection
+  - [ ] Test fallback behavior
+  - [ ] Test error cases
+
+### 2.5 Update Engine Manager
+
+- [ ] Update `src/noteflow/application/services/asr_config/_engine_manager.py`
+  - [ ] Add `detect_rocm_available()` method
+  - [ ] Update `build_capabilities()` for ROCm
+  - [ ] Update `check_configuration()` for ROCm validation
+  - [ ] Use factory for engine creation in `build_engine_for_job()`
+- [ ] Update `tests/application/test_asr_config_service.py`
+  - [ ] Add ROCm detection tests
+  - [ ] Add ROCm validation tests
+
+### 2.6 Phase 2 Tests
+
+- [ ] Run full ASR test suite
+- [ ] Run `make quality-py`
+- [ ] Verify CUDA path unchanged
+
+---
+
+## Phase 3: ROCm-Specific Engine
+
+### 3.1 ROCm Engine Implementation
+
+- [ ] Create `src/noteflow/infrastructure/asr/rocm_engine.py`
+  - [ ] Implement `FasterWhisperRocmEngine` class
+  - [ ] Handle CTranslate2-ROCm import with fallback
+  - [ ] Implement all protocol methods
+  - [ ] Add ROCm-specific optimizations
+- [ ] Create `tests/infrastructure/asr/test_rocm_engine.py`
+  - [ ] Test import fallback behavior
+  - [ ] Test engine creation (mock)
+  - [ ] Test protocol compliance
+
+### 3.2 Update Factory for ROCm
+
+- [ ] Update `src/noteflow/infrastructure/asr/factory.py`
+  - [ ] Add ROCm engine import with graceful fallback
+  - [ ] Log warning when falling back to PyTorch
+- [ ] Update factory tests for ROCm path
+
+### 3.3 ROCm Installation Detection
+
+- [ ] Update `src/noteflow/infrastructure/gpu/detection.py`
+  - [ ] Add `is_ctranslate2_rocm_available()` function
+  - [ ] Add `get_rocm_version()` function
+- [ ] Add corresponding tests
+
+### 3.4 Phase 3 Tests
+
+- [ ] Run ROCm-specific tests (skip if no ROCm)
+- [ ] Run `make quality-py`
+- [ ] Test on AMD hardware (if available)
+
+---
+
+## Phase 4: Configuration & Distribution
+
+### 4.1 Feature Flag
+
+- [ ] Update `src/noteflow/config/settings/_features.py`
+  - [ ] Add `NOTEFLOW_FEATURE_ROCM_ENABLED` flag
+  - [ ] Document in settings
+- [ ] Update any feature flag guards
+
+### 4.2 gRPC Config Handlers
+
+- [ ] Update `src/noteflow/grpc/mixins/asr_config.py`
+  - [ ] Handle ROCm device in `GetAsrConfiguration()`
+  - [ ] Handle ROCm device in `UpdateAsrConfiguration()`
+  - [ ] Add ROCm to capabilities response
+- [ ] Update tests in `tests/grpc/test_asr_config.py`
+
+### 4.3 Dependencies
+
+- [ ] Update `pyproject.toml`
+  - [ ] Add `rocm` extras group
+  - [ ] Add `openai-whisper` as optional dependency
+  - [ ] Document ROCm installation in comments
+- [ ] Create `requirements-rocm.txt` (optional)
+
+### 4.4 Docker ROCm Image
+
+- [ ] Create `docker/Dockerfile.rocm`
+  - [ ] Base on `rocm/pytorch` image
+  - [ ] Install NoteFlow with ROCm extras
+  - [ ] Configure for GPU access
+- [ ] Update `compose.yaml` (and/or add `compose.rocm.yaml`) with ROCm profile
+- [ ] Test Docker image build
+
+### 4.5 Documentation
+
+- [ ] Create `docs/installation/rocm.md`
+  - [ ] System requirements
+  - [ ] PyTorch ROCm installation
+  - [ ] CTranslate2-ROCm installation (optional)
+  - [ ] Docker usage
+  - [ ] Troubleshooting
+- [ ] Update main README with ROCm section
+- [ ] Update `CLAUDE.md` with ROCm notes
+
+### 4.6 Phase 4 Tests
+
+- [ ] Run full test suite
+- [ ] Run `make quality`
+- [ ] Build ROCm Docker image
+- [ ] Test on AMD hardware
+
+---
+
+## Final Validation
+
+### Quality Gates
+
+- [ ] `pytest tests/quality/` passes
+- [ ] `make quality-py` passes
+- [ ] `make quality` passes (full stack)
+- [ ] Proto regenerated correctly
+- [ ] No type errors (`basedpyright`)
+- [ ] No lint errors (`ruff`)
+
+### Functional Validation
+
+- [ ] CUDA path works (no regression)
+- [ ] CPU path works (no regression)
+- [ ] ROCm detection works
+- [ ] PyTorch fallback works
+- [ ] gRPC configuration works
+- [ ] Device switching works
+
+### Documentation
+
+- [ ] Sprint README complete
+- [ ] Implementation checklist complete
+- [ ] Installation guide complete
+- [ ] API documentation updated
+
+---
+
+## Notes
+
+### Files Created
+
+| File | Status |
+|------|--------|
+| `src/noteflow/domain/ports/gpu.py` | ❌ |
+| `src/noteflow/domain/ports/asr.py` | optional (only if relocating protocol) |
+| `src/noteflow/infrastructure/gpu/__init__.py` | ❌ |
+| `src/noteflow/infrastructure/gpu/detection.py` | ❌ |
+| `src/noteflow/infrastructure/asr/pytorch_engine.py` | ❌ |
+| `src/noteflow/infrastructure/asr/rocm_engine.py` | ❌ |
+| `src/noteflow/infrastructure/asr/factory.py` | ❌ |
+| `docker/Dockerfile.rocm` | ❌ |
+| `docs/installation/rocm.md` | ❌ |
+
+### Files Modified
+
+| File | Status |
+|------|--------|
+| `application/services/asr_config/types.py` | ❌ |
+| `application/services/asr_config/_engine_manager.py` | ❌ |
+| `infrastructure/diarization/engine/_device_mixin.py` | ❌ |
+| `infrastructure/metrics/system_resources.py` | ❌ |
+| `infrastructure/asr/engine.py` | ❌ |
+| `infrastructure/asr/protocols.py` | ❌ |
+| `grpc/proto/noteflow.proto` | ❌ |
+| `grpc/mixins/asr_config.py` | ❌ |
+| `config/settings/_features.py` | ❌ |
+| `pyproject.toml` | ❌ |
--- a/docs/sprints/phase-5-evolution/sprint-18.5-rocm-support/README.md
+++ b/docs/sprints/phase-5-evolution/sprint-18.5-rocm-support/README.md
@@ -0,0 +1,786 @@
+# Sprint 18.5: ROCm GPU Backend Support
+
+> **Size**: L | **Owner**: Backend | **Prerequisites**: None
+> **Phase**: 5 - Platform Evolution
+
+---
+
+## Validation Status (2025-01-17)
+
+### Research Complete — Implementation Ready
+
+Note: Hardware/driver compatibility and ROCm wheel availability are time-sensitive.
+Re-verify against AMD ROCm compatibility matrices and PyTorch ROCm install guidance
+before implementation.
+
+### Repo Alignment Notes (current tree)
+
+- ASR protocol already exists at `src/noteflow/infrastructure/asr/protocols.py` and
+  returns `AsrResult` from `src/noteflow/infrastructure/asr/dto.py`. Extend these
+  instead of adding parallel `domain/ports/asr.py` types unless we plan a broader
+  layering refactor.
+- gRPC mixins live under `src/noteflow/grpc/mixins/` (not `_mixins`).
+- Tests live under `tests/infrastructure/` and `tests/application/` (no `tests/unit/`).
+
+| Prerequisite | Status | Impact |
+|--------------|--------|--------|
+| PyTorch ROCm support | ✅ Available | PyTorch HIP layer works with existing `torch.cuda` API |
+| CTranslate2 ROCm support | ⚠️ Community fork | No official support; requires alternative engine strategy |
+| pyannote.audio ROCm support | ✅ Available | Pure PyTorch, works out of box |
+| diart ROCm support | ✅ Available | Pure PyTorch, works out of box |
+
+| Component | Status | Notes |
+|-----------|--------|-------|
+| Device abstraction layer | ❌ Not implemented | Need `GpuBackend` enum |
+| ASR engine protocol | ⚠️ Partial | AsrEngine exists; extend with device/compute metadata |
+| ROCm detection | ❌ Not implemented | Need `torch.version.hip` check |
+| gRPC proto updates | ❌ Not implemented | Need `ASR_DEVICE_ROCM` |
+| PyTorch Whisper fallback | ❌ Not implemented | Fallback for universal compatibility |
+
+**Action required**: Implement device abstraction layer and engine protocol pattern.
+
+---
+
+## Objective
+
+Enable NoteFlow backend to run on AMD GPUs via ROCm, providing GPU-accelerated ASR and diarization for users without NVIDIA hardware. This extends hardware support while maintaining full CUDA compatibility.
+
+---
+
+## Feasibility Analysis
+
+### Component Compatibility Matrix
+
+| Component | Library | CUDA | ROCm | Notes |
+|-----------|---------|------|------|-------|
+| ASR (Speech-to-Text) | faster-whisper (CTranslate2) | ✅ Official | ⚠️ Fork | [CTranslate2-ROCm](https://github.com/arlo-phoenix/CTranslate2-rocm) |
+| Diarization (Streaming) | diart (PyTorch) | ✅ | ✅* | HIP layer transparent; validate in our stack |
+| Diarization (Offline) | pyannote.audio (PyTorch) | ✅ | ✅ | [AMD tested](https://rocm.blogs.amd.com/artificial-intelligence/speech_models/README.html) |
+| NER | spaCy transformers (PyTorch) | ✅ | ✅ | HIP layer transparent |
+| Metrics/VRAM | torch.cuda.* | ✅ | ✅ | Works via HIP |
+
+### Hardware Support (Source of Truth)
+
+ROCm GPU support is version- and distro-specific. Use AMD's official compatibility
+matrices and the Radeon ROCm guide as the source of truth.
+
+- Datacenter (Instinct/MI): Refer to the ROCm compatibility matrices for exact
+  ROCm release + GPU + OS combinations.
+- Radeon/Workstation: Official Radeon ROCm support is narrower; check the latest
+  Radeon ROCm guide for the exact RX series and Linux distro support. Older Radeon
+  generations are often not listed and may require overrides or community builds.
+
+### Key Technical Insights
+
+1. **ROCm HIP Layer**: PyTorch's ROCm build [reuses the `torch.cuda` interface](https://docs.pytorch.org/docs/stable/notes/hip.html). Existing code using `torch.cuda.is_available()` and `torch.device("cuda")` works without modification (there is no `torch.device("rocm")`).
+
+2. **CTranslate2 Blocker**: faster-whisper's [CTranslate2](https://github.com/OpenNMT/CTranslate2) dependency has no official ROCm support. Options:
+   - Use [CTranslate2-ROCm fork](https://github.com/arlo-phoenix/CTranslate2-rocm) (community maintained)
+   - Implement PyTorch Whisper fallback (official, slower)
+   - Use [insanely-fast-whisper-rocm](https://github.com/beecave-homelab/insanely-fast-whisper-rocm) (community project)
+
+3. **Detection Logic**: ROCm can be detected via:
+   ```python
+   if torch.cuda.is_available() and hasattr(torch.version, 'hip') and torch.version.hip:
+       # ROCm/HIP backend
+   ```
+
+---
+
+## What Already Exists
+
+### Device Detection Infrastructure
+
+| Asset | Location | Reuse Strategy |
+|-------|----------|----------------|
+| CUDA detection | `application/services/asr_config/_engine_manager.py:61-72` | Extend with ROCm check |
+| Device preference | `infrastructure/diarization/engine/_device_mixin.py:18-36` | Add ROCm to detection cascade |
+| Device types | `application/services/asr_config/types.py:22-27` | Add `ROCM` enum value |
+| Compute types | `application/services/asr_config/types.py:37-44` | Add ROCm compute types |
+| ASR protocol | `infrastructure/asr/protocols.py` | Extend existing `AsrEngine` (avoid duplicate protocol) |
+| ASR DTOs | `infrastructure/asr/dto.py` | Reuse `AsrResult`/`WordTiming` (avoid duplicate types) |
+
+### ASR Engine Architecture
+
+| Asset | Location | Reuse Strategy |
+|-------|----------|----------------|
+| FasterWhisperEngine | `infrastructure/asr/engine.py` | Keep as CUDA/CPU backend |
+| AsrEngine protocol (existing) | `infrastructure/asr/protocols.py` | Extend with device/compute metadata |
+| Engine manager | `application/services/asr_config/_engine_manager.py` | Extend for engine selection |
+
+### gRPC Configuration
+
+| Asset | Location | Reuse Strategy |
+|-------|----------|----------------|
+| ASR device enum | `grpc/proto/noteflow.proto` | Add `ASR_DEVICE_ROCM` |
+| ASR config messages | `grpc/proto/noteflow.proto` | Add `rocm_available` field |
+| Config RPC handlers | `grpc/mixins/asr_config.py` | Extend for ROCm |
+
+---
+
+## Architecture Decision: Engine Protocol Pattern
+
+### Decision
+
+Extend the existing `AsrEngine` protocol (or move it to `domain/ports` if we want a
+cleaner layering boundary) to abstract ASR backends and enable swappable implementations.
+
+### Rationale
+
+1. **Separation of Concerns**: Hardware-specific code isolated in engine implementations
+2. **Testability**: Mock engines for testing without GPU
+3. **Extensibility**: Future backends (TPU, Apple Neural Engine) follow same pattern
+4. **Gradual Migration**: Existing code continues working while new engines added
+
+### Engine Hierarchy
+
+```
+AsrEngine (Protocol)
+├── FasterWhisperEngine (CUDA/CPU) — existing, extend protocol
+├── FasterWhisperRocmEngine (ROCm) — new, uses CTranslate2-ROCm fork
+└── WhisperPyTorchEngine (Universal) — new, pure PyTorch fallback
+```
+
+---
+
+## Scope
+
+### Phase 1: Device Abstraction Layer (M)
+
+| Task | Effort | Files |
+|------|--------|-------|
+| Add `GpuBackend` enum | S | `domain/ports/gpu.py` (new) |
+| Add ROCm detection utility | S | `infrastructure/gpu/detection.py` (new) |
+| Extend `AsrDevice` enum | S | `application/services/asr_config/types.py` |
+| Update device mixin | S | `infrastructure/diarization/engine/_device_mixin.py` |
+| Update gRPC proto | S | `grpc/proto/noteflow.proto` |
+
+### Phase 2: ASR Engine Protocol (M)
+
+| Task | Effort | Files |
+|------|--------|-------|
+| Extend `AsrEngine` protocol (or relocate) | M | `infrastructure/asr/protocols.py` (extend) or `domain/ports/asr.py` (move) |
+| Refactor `FasterWhisperEngine` to protocol | M | `infrastructure/asr/engine.py` |
+| Create `WhisperPyTorchEngine` fallback | M | `infrastructure/asr/pytorch_engine.py` (new) |
+| Create engine factory | S | `infrastructure/asr/factory.py` (new) |
+| Update engine manager | M | `application/services/asr_config/_engine_manager.py` |
+
+### Phase 3: ROCm-Specific Engine (M)
+
+| Task | Effort | Files |
+|------|--------|-------|
+| Create `FasterWhisperRocmEngine` | M | `infrastructure/asr/rocm_engine.py` (new) |
+| Add conditional CTranslate2-ROCm import | S | `infrastructure/asr/rocm_engine.py` |
+| Update engine factory for ROCm | S | `infrastructure/asr/factory.py` |
+| Add ROCm installation detection | S | `infrastructure/gpu/detection.py` |
+
+### Phase 4: Configuration & Distribution (L)
+
+| Task | Effort | Files |
+|------|--------|-------|
+| Add ROCm feature flag | S | `config/settings/_features.py` |
+| Update gRPC config handlers | M | `grpc/mixins/asr_config.py` |
+| Create ROCm Docker image | M | `docker/Dockerfile.rocm` (new) |
+| Update pyproject.toml extras | S | `pyproject.toml` |
+| Add ROCm installation docs | M | `docs/installation/rocm.md` (new) |
+
+---
+
+## Deliverables
+
+### New Files
+
+| File | Purpose |
+|------|---------|
+| `src/noteflow/domain/ports/gpu.py` | GPU backend types and detection protocol |
+| `src/noteflow/infrastructure/gpu/__init__.py` | GPU package init |
+| `src/noteflow/infrastructure/gpu/detection.py` | GPU detection utilities |
+| `src/noteflow/infrastructure/asr/pytorch_engine.py` | Pure PyTorch Whisper engine |
+| `src/noteflow/infrastructure/asr/rocm_engine.py` | ROCm-specific faster-whisper engine |
+| `src/noteflow/infrastructure/asr/factory.py` | Engine factory for backend selection |
+| `docker/Dockerfile.rocm` | ROCm-enabled Docker image |
+| `docs/installation/rocm.md` | ROCm setup documentation |
+
+### Modified Files
+
+| File | Changes |
+|------|---------|
+| `application/services/asr_config/types.py` | Add `ROCM` to `AsrDevice`, ROCm compute types |
+| `application/services/asr_config/_engine_manager.py` | Use engine factory, extend detection |
+| `infrastructure/diarization/engine/_device_mixin.py` | Add ROCm to detection cascade |
+| `infrastructure/metrics/system_resources.py` | Handle ROCm VRAM queries |
+| `infrastructure/asr/protocols.py` | Extend `AsrEngine` with device/compute_type metadata |
+| `grpc/proto/noteflow.proto` | Add `ASR_DEVICE_ROCM`, `rocm_available`, `gpu_backend` |
+| `grpc/mixins/asr_config.py` | Map ROCm device and capabilities |
+| `config/settings/_features.py` | Add `NOTEFLOW_FEATURE_ROCM_ENABLED` |
+| `pyproject.toml` | Add `rocm` extras group |
+
+---
+
+## Implementation Details
+
+### 1. GPU Detection Module
+
+**File**: `src/noteflow/infrastructure/gpu/detection.py`
+
+```python
+"""GPU backend detection utilities."""
+
+from __future__ import annotations
+
+from noteflow.domain.ports.gpu import GpuBackend, GpuInfo
+
+
+def detect_gpu_backend() -> GpuBackend:
+    """Detect the available GPU backend.
+
+    Returns:
+        GpuBackend enum indicating the detected backend.
+    """
+    try:
+        import torch
+    except ImportError:
+        return GpuBackend.NONE
+
+    if not torch.cuda.is_available():
+        if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+            return GpuBackend.MPS
+        return GpuBackend.NONE
+
+    # Check if this is ROCm masquerading as CUDA
+    if hasattr(torch.version, 'hip') and torch.version.hip:
+        return GpuBackend.ROCM
+
+    return GpuBackend.CUDA
+
+
+def get_gpu_info() -> GpuInfo | None:
+    """Get detailed GPU information.
+
+    Returns:
+        GpuInfo if a GPU is available, None otherwise.
+    """
+    backend = detect_gpu_backend()
+
+    if backend == GpuBackend.NONE:
+        return None
+
+    import torch
+
+    if backend in (GpuBackend.CUDA, GpuBackend.ROCM):
+        props = torch.cuda.get_device_properties(0)
+        vram_mb = props.total_memory // (1024 * 1024)
+
+        driver_version = ""
+        if backend == GpuBackend.ROCM:
+            driver_version = getattr(torch.version, 'hip', 'unknown')
+        else:
+            driver_version = torch.version.cuda or 'unknown'
+
+        return GpuInfo(
+            backend=backend,
+            device_name=props.name,
+            vram_total_mb=vram_mb,
+            driver_version=driver_version,
+        )
+
+    if backend == GpuBackend.MPS:
+        return GpuInfo(
+            backend=backend,
+            device_name="Apple Metal",
+            vram_total_mb=0,  # MPS doesn't expose VRAM
+            driver_version="mps",
+        )
+
+    return None
+```
+
+### 2. ASR Engine Protocol
+
+**File**: `src/noteflow/infrastructure/asr/protocols.py` (extend)
+
+```python
+"""ASR protocols defining contracts for ASR components."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from typing import TYPE_CHECKING, Protocol
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    import numpy as np
+    from numpy.typing import NDArray
+    from noteflow.infrastructure.asr.dto import AsrResult
+
+
+class AsrEngine(Protocol):
+    """Protocol for ASR transcription engine implementations."""
+
+    @property
+    def device(self) -> str:
+        """Return the requested device ("cpu", "cuda", "rocm")."""
+        ...
+
+    @property
+    def compute_type(self) -> str:
+        """Return the compute type (int8, float16, float32)."""
+        ...
+
+    @property
+    def model_size(self) -> str | None:
+        """Return the loaded model size, or None if not loaded."""
+        ...
+
+    @property
+    def is_loaded(self) -> bool:
+        """Return True if model is loaded."""
+        ...
+
+    def load_model(self, model_size: str = "base") -> None:
+        """Load the ASR model."""
+        ...
+
+    def transcribe(
+        self,
+        audio: NDArray[np.float32],
+        language: str | None = None,
+    ) -> Iterator[AsrResult]:
+        """Transcribe audio data."""
+        ...
+
+    def unload(self) -> None:
+        """Unload the model and free resources."""
+        ...
+
+    def transcribe_file(
+        self,
+        audio_path: Path,
+        *,
+        language: str | None = None,
+    ) -> Iterator[AsrResult]:
+        """Optional helper for file-based transcription."""
+        ...
+```
+
+### 3. Engine Factory
+
+**File**: `src/noteflow/infrastructure/asr/factory.py`
+
+```python
+"""ASR engine factory for backend selection."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from noteflow.infrastructure.gpu.detection import GpuBackend, detect_gpu_backend
+from noteflow.infrastructure.logging import get_logger
+
+if TYPE_CHECKING:
+    from noteflow.infrastructure.asr.protocols import AsrEngine
+
+logger = get_logger(__name__)
+
+
+class EngineCreationError(Exception):
+    """Raised when engine creation fails."""
+
+
+def create_asr_engine(
+    device: str = "auto",
+    compute_type: str = "int8",
+    *,
+    prefer_faster_whisper: bool = True,
+) -> AsrEngine:
+    """Create an ASR engine for the specified device.
+
+    Args:
+        device: Target device ("auto", "cpu", "cuda", "rocm").
+        compute_type: Compute precision ("int8", "float16", "float32").
+        prefer_faster_whisper: If True, prefer faster-whisper over PyTorch.
+
+    Returns:
+        An ASR engine implementing AsrEngine.
+
+    Raises:
+        EngineCreationError: If engine creation fails.
+    """
+    resolved_device = _resolve_device(device)
+
+    logger.info(
+        "Creating ASR engine",
+        device=device,
+        resolved_device=resolved_device,
+        compute_type=compute_type,
+    )
+
+    if resolved_device == "cpu":
+        return _create_cpu_engine(compute_type)
+
+    if resolved_device == "cuda":
+        return _create_cuda_engine(compute_type, prefer_faster_whisper)
+
+    if resolved_device == "rocm":
+        return _create_rocm_engine(compute_type, prefer_faster_whisper)
+
+    msg = f"Unsupported device: {resolved_device}"
+    raise EngineCreationError(msg)
+
+
+def _resolve_device(device: str) -> str:
+    """Resolve 'auto' device to actual backend."""
+    if device != "auto":
+        return device
+
+    backend = detect_gpu_backend()
+
+    if backend == GpuBackend.CUDA:
+        return "cuda"
+    if backend == GpuBackend.ROCM:
+        return "rocm"
+
+    return "cpu"
+
+
+def _create_cpu_engine(compute_type: str) -> AsrEngine:
+    """Create CPU engine (always uses faster-whisper)."""
+    from noteflow.infrastructure.asr.engine import FasterWhisperEngine
+
+    # CPU only supports int8 and float32
+    if compute_type == "float16":
+        compute_type = "float32"
+
+    return FasterWhisperEngine(device="cpu", compute_type=compute_type)
+
+
+def _create_cuda_engine(
+    compute_type: str,
+    prefer_faster_whisper: bool,
+) -> AsrEngine:
+    """Create CUDA engine."""
+    if prefer_faster_whisper:
+        from noteflow.infrastructure.asr.engine import FasterWhisperEngine
+        return FasterWhisperEngine(device="cuda", compute_type=compute_type)
+
+    from noteflow.infrastructure.asr.pytorch_engine import WhisperPyTorchEngine
+    return WhisperPyTorchEngine(device="cuda", compute_type=compute_type)
+
+
+def _create_rocm_engine(
+    compute_type: str,
+    prefer_faster_whisper: bool,
+) -> AsrEngine:
+    """Create ROCm engine.
+
+    Attempts to use CTranslate2-ROCm fork if available,
+    falls back to PyTorch Whisper otherwise.
+    """
+    if prefer_faster_whisper:
+        try:
+            from noteflow.infrastructure.asr.rocm_engine import FasterWhisperRocmEngine
+            return FasterWhisperRocmEngine(compute_type=compute_type)
+        except ImportError:
+            logger.warning(
+                "CTranslate2-ROCm not installed, falling back to PyTorch Whisper"
+            )
+
+    from noteflow.infrastructure.asr.pytorch_engine import WhisperPyTorchEngine
+    return WhisperPyTorchEngine(device="cuda", compute_type=compute_type)
+```
+
+### 4. Extended Device Types
+
+**File**: `src/noteflow/application/services/asr_config/types.py` (modifications)
+
+```python
+class AsrDevice(str, Enum):
+    """Supported ASR devices."""
+
+    CPU = "cpu"
+    CUDA = "cuda"
+    ROCM = "rocm"
+
+
+DEVICE_COMPUTE_TYPES: Final[dict[AsrDevice, tuple[AsrComputeType, ...]]] = {
+    AsrDevice.CPU: (AsrComputeType.INT8, AsrComputeType.FLOAT32),
+    AsrDevice.CUDA: (
+        AsrComputeType.INT8,
+        AsrComputeType.FLOAT16,
+        AsrComputeType.FLOAT32,
+    ),
+    AsrDevice.ROCM: (
+        AsrComputeType.INT8,
+        AsrComputeType.FLOAT16,
+        AsrComputeType.FLOAT32,
+    ),
+}
+```
+
+### 5. Updated Device Mixin
+
+**File**: `src/noteflow/infrastructure/diarization/engine/_device_mixin.py` (modifications)
+
+```python
+def _detect_available_device(self) -> str:
+    """Detect the best available device for computation."""
+    if self._device_preference != "auto":
+        return self._device_preference
+
+    import torch
+
+    if torch.cuda.is_available():
+        # ROCm uses the same torch.cuda device string; keep "cuda" here
+        # and expose backend (cuda vs rocm) via a separate detection helper.
+        return "cuda"
+
+    if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+        return "mps"
+
+    return "cpu"
+```
+
+### 6. Proto Updates
+
+**File**: `src/noteflow/grpc/proto/noteflow.proto` (additions)
+
+```protobuf
+enum AsrDevice {
+  ASR_DEVICE_UNSPECIFIED = 0;
+  ASR_DEVICE_CPU = 1;
+  ASR_DEVICE_CUDA = 2;
+  ASR_DEVICE_ROCM = 3;  // NEW
+}
+
+message AsrConfiguration {
+  // ... existing fields ...
+  bool rocm_available = 8;  // NEW: ROCm/HIP detected
+  string gpu_backend = 9;   // NEW: "cuda", "rocm", "mps", or "none"
+}
+```
+
+Note: `gpu_backend` reflects detected hardware/runtime, while `device` reflects the
+configured ASR target (which may still be "cuda" even on ROCm).
+
+---
+
+## Quality Gates
+
+- [ ] `pytest tests/quality/` passes
+- [ ] `make quality-py` passes
+- [ ] Proto regenerated and stubs patched
+- [ ] ROCm detection works on AMD hardware
+- [ ] PyTorch Whisper fallback functional
+- [ ] CUDA path unchanged (no regression)
+- [ ] Docker ROCm image builds successfully
+- [ ] Meets `docs/sprints/QUALITY_STANDARDS.md`
+
+---
+
+## Test Plan
+
+### Unit Tests
+
+**File**: `tests/infrastructure/gpu/test_detection.py`
+
+```python
+import pytest
+from unittest.mock import patch, MagicMock
+
+from noteflow.infrastructure.gpu.detection import (
+    GpuBackend,
+    detect_gpu_backend,
+    get_gpu_info,
+)
+
+
+class TestDetectGpuBackend:
+    """Tests for GPU backend detection."""
+
+    def test_no_torch_returns_none(self) -> None:
+        """Return NONE when torch not installed."""
+        with patch.dict("sys.modules", {"torch": None}):
+            # Force reimport
+            from importlib import reload
+            from noteflow.infrastructure.gpu import detection
+            reload(detection)
+            assert detection.detect_gpu_backend() == GpuBackend.NONE
+
+    def test_cuda_available_returns_cuda(self) -> None:
+        """Return CUDA when CUDA available."""
+        mock_torch = MagicMock()
+        mock_torch.cuda.is_available.return_value = True
+        mock_torch.version.hip = None
+
+        with patch.dict("sys.modules", {"torch": mock_torch}):
+            assert detect_gpu_backend() == GpuBackend.CUDA
+
+    def test_hip_available_returns_rocm(self) -> None:
+        """Return ROCM when HIP version present."""
+        mock_torch = MagicMock()
+        mock_torch.cuda.is_available.return_value = True
+        mock_torch.version.hip = "6.0.0"
+
+        with patch.dict("sys.modules", {"torch": mock_torch}):
+            assert detect_gpu_backend() == GpuBackend.ROCM
+
+    def test_mps_available_returns_mps(self) -> None:
+        """Return MPS on Apple Silicon."""
+        mock_torch = MagicMock()
+        mock_torch.cuda.is_available.return_value = False
+        mock_torch.backends.mps.is_available.return_value = True
+
+        with patch.dict("sys.modules", {"torch": mock_torch}):
+            assert detect_gpu_backend() == GpuBackend.MPS
+```
+
+### Integration Tests
+
+**File**: `tests/integration/test_engine_factory.py`
+
+```python
+import pytest
+from unittest.mock import patch
+
+from noteflow.infrastructure.asr.factory import create_asr_engine
+
+
+@pytest.mark.integration
+class TestEngineFactory:
+    """Integration tests for ASR engine factory."""
+
+    def test_auto_device_selects_available(self) -> None:
+        """Auto device resolves to available backend."""
+        engine = create_asr_engine(device="auto")
+        # Note: ROCm may still report "cuda" at the torch device level.
+        assert engine.device in ("cpu", "cuda", "rocm")
+
+    def test_cpu_engine_always_works(self) -> None:
+        """CPU engine works regardless of GPU."""
+        engine = create_asr_engine(device="cpu")
+        assert engine.device == "cpu"
+
+    @pytest.mark.skipif(
+        not _rocm_available(),
+        reason="ROCm not available",
+    )
+    def test_rocm_engine_creation(self) -> None:
+        """ROCm engine creates on AMD hardware."""
+        engine = create_asr_engine(device="rocm")
+        assert engine.device == "rocm"
+
+
+def _rocm_available() -> bool:
+    """Check if ROCm is available for testing."""
+    try:
+        import torch
+        return (
+            torch.cuda.is_available()
+            and hasattr(torch.version, 'hip')
+            and torch.version.hip is not None
+        )
+    except ImportError:
+        return False
+```
+
+---
+
+## Failure Modes
+
+| Failure | Detection | Recovery |
+|---------|-----------|----------|
+| CTranslate2-ROCm not installed | Import error in factory | Fall back to PyTorch Whisper |
+| ROCm driver version mismatch | HIP error at runtime | Log error, suggest driver update |
+| GPU OOM on large model | CUDA/HIP OOM error | Reduce model size or use CPU |
+| Unsupported GPU architecture | "invalid device function" | Fall back to CPU with warning |
+| PyTorch ROCm not installed | Import error | CPU-only mode |
+
+---
+
+## Dependencies
+
+### Python Packages
+
+| Package | Version | Purpose | Notes |
+|---------|---------|---------|-------|
+| `torch` | >=2.0 | PyTorch with ROCm | Version must match ROCm runtime |
+| `torchaudio` | >=2.0 | Audio processing | Version must match ROCm runtime |
+| `faster-whisper` | >=1.0 | CUDA/CPU ASR | Existing |
+| `ctranslate2-rocm` | (fork) | ROCm ASR | Optional, install from GitHub fork |
+| `openai-whisper` | >=20231117 | PyTorch fallback | Optional (slower) |
+
+### System Requirements
+
+| Requirement | Minimum | Notes |
+|-------------|---------|-------|
+| ROCm | Per AMD docs | Must match PyTorch ROCm wheel version |
+| Linux kernel | Per AMD docs | Depends on ROCm release + distro |
+| AMD GPU driver | amdgpu | Included with ROCm packages |
+
+---
+
+## Installation Guide
+
+### PyTorch ROCm Installation
+
+```bash
+# Example (replace <rocm-version> with your installed ROCm version)
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm<rocm-version>
+```
+
+### CTranslate2-ROCm Installation (Optional)
+
+```bash
+# Install from fork for faster-whisper ROCm support
+pip install git+https://github.com/arlo-phoenix/CTranslate2-rocm.git
+pip install faster-whisper
+```
+
+### Docker ROCm Image
+
+```dockerfile
+# Example base image (pin to a ROCm/PyTorch combo that matches your host)
+FROM rocm/pytorch:rocm6.x_ubuntu22.04_py3.10_pytorch_2.x.x
+
+WORKDIR /app
+
+# Install NoteFlow
+COPY . .
+RUN pip install -e ".[rocm]"
+
+CMD ["python", "-m", "noteflow.grpc.server"]
+```
+
+---
+
+## Post-Sprint
+
+- ROCm performance benchmarking vs CUDA
+- Multi-GPU support (device index selection)
+- ROCm-specific memory optimization
+- Apple Neural Engine support investigation
+- Windows ROCm support (if/when officially available)
+
+---
+
+## References
+
+### Official Documentation
+
+- [PyTorch HIP Semantics](https://docs.pytorch.org/docs/stable/notes/hip.html)
+- [ROCm Compatibility Matrix](https://rocm.docs.amd.com/en/latest/compatibility/compatibility-matrix.html)
+- [AMD ROCm PyTorch Installation](https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/install/installrad/native_linux/install-pytorch.html)
+
+### AMD Blog Posts
+
+- [Speech-to-Text on AMD GPU with Whisper](https://rocm.blogs.amd.com/artificial-intelligence/whisper/README.html)
+- [CTranslate2 on AMD GPUs](https://rocm.blogs.amd.com/artificial-intelligence/ctranslate2/README.html)
+- [Fine-tuning Speech Models on ROCm](https://rocm.blogs.amd.com/artificial-intelligence/speech_models/README.html)
+
+### Community Projects
+
+- [CTranslate2-ROCm Fork](https://github.com/arlo-phoenix/CTranslate2-rocm)
+- [insanely-fast-whisper-rocm](https://github.com/beecave-homelab/insanely-fast-whisper-rocm)
+- [wyoming-faster-whisper-rocm](https://github.com/Donkey545/wyoming-faster-whisper-rocm)
+- [ROCM SDK Builder](https://github.com/lamikr/rocm_sdk_builder)
--- a/docs/sprints/phase-5-evolution/sprint-18.5-rocm-support/TECHNICAL_DEEP_DIVE.md
+++ b/docs/sprints/phase-5-evolution/sprint-18.5-rocm-support/TECHNICAL_DEEP_DIVE.md
@@ -0,0 +1,528 @@
+# ROCm Support: Technical Deep Dive
+
+This document provides detailed technical analysis of the ROCm integration, including architecture decisions, code patterns, and potential challenges.
+
+Alignment note: the current codebase already defines `AsrEngine` in
+`src/noteflow/infrastructure/asr/protocols.py` and uses `AsrResult` from
+`src/noteflow/infrastructure/asr/dto.py`. Prefer extending those instead of
+adding parallel protocol/DTO types unless we plan a broader refactor.
+
+---
+
+## Table of Contents
+
+1. [ROCm/HIP Architecture Overview](#rocmhip-architecture-overview)
+2. [PyTorch HIP Transparency](#pytorch-hip-transparency)
+3. [CTranslate2 Challenge](#ctranslate2-challenge)
+4. [Engine Protocol Design](#engine-protocol-design)
+5. [Detection Strategy](#detection-strategy)
+6. [Memory Management](#memory-management)
+7. [Performance Considerations](#performance-considerations)
+8. [Migration Path](#migration-path)
+
+---
+
+## ROCm/HIP Architecture Overview
+
+### What is ROCm?
+
+ROCm (Radeon Open Compute) is AMD's open-source software platform for GPU computing. It includes:
+
+- **HIP (Heterogeneous-compute Interface for Portability)**: A C++ runtime API and kernel language that allows developers to write portable code for AMD and NVIDIA GPUs.
+- **rocBLAS/rocFFT/etc.**: Optimized math libraries equivalent to cuBLAS/cuFFT.
+- **MIOpen**: Deep learning primitives library (equivalent to cuDNN).
+
+### HIP Compilation Modes
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        HIP Source Code                         │
+└─────────────────────────────────────────────────────────────────┘
+                              │
+              ┌───────────────┴───────────────┐
+              ▼                               ▼
+┌─────────────────────────┐     ┌─────────────────────────┐
+│    hipcc (AMD path)     │     │   hipcc (NVIDIA path)   │
+│    Compiles to ROCm     │     │   Compiles to CUDA      │
+└─────────────────────────┘     └─────────────────────────┘
+              │                               │
+              ▼                               ▼
+┌─────────────────────────┐     ┌─────────────────────────┐
+│     AMD GPU binary      │     │   NVIDIA GPU binary     │
+│  (runs on Radeon/MI)    │     │   (runs on GeForce/A)   │
+└─────────────────────────┘     └─────────────────────────┘
+```
+
+### Key Insight for NoteFlow
+
+PyTorch's ROCm build uses HIP to expose the same `torch.cuda.*` API. From Python's perspective, the code is identical:
+
+```python
+# This works on both CUDA and ROCm PyTorch builds
+device = torch.device("cuda")
+tensor = torch.randn(1000, 1000, device=device)
+```
+
+---
+
+## PyTorch HIP Transparency
+
+### How PyTorch Achieves Compatibility
+
+PyTorch's ROCm build maps CUDA APIs to HIP:
+
+| CUDA API | HIP API (internal) | Python API (unchanged) |
+|----------|-------------------|------------------------|
+| `cudaMalloc` | `hipMalloc` | `torch.cuda.memory_allocated()` |
+| `cudaMemcpy` | `hipMemcpy` | `tensor.cuda()` |
+| `cudaDeviceGetAttribute` | `hipDeviceGetAttribute` | `torch.cuda.get_device_properties()` |
+| `cudaGetDeviceCount` | `hipGetDeviceCount` | `torch.cuda.device_count()` |
+
+### Detection via torch.version.hip
+
+```python
+import torch
+
+# Standard CUDA check (works on both CUDA and ROCm)
+if torch.cuda.is_available():
+    # Distinguish between CUDA and ROCm
+    if hasattr(torch.version, 'hip') and torch.version.hip:
+        print(f"ROCm {torch.version.hip}")
+        backend = "rocm"
+    else:
+        print(f"CUDA {torch.version.cuda}")
+        backend = "cuda"
+```
+
+### Implications for NoteFlow
+
+**Components that work unchanged:**
+
+| Component | Why It Works |
+|-----------|--------------|
+| pyannote.audio | Pure PyTorch, uses `torch.cuda` API |
+| diart | Pure PyTorch, uses `torch.cuda` API |
+| spaCy transformers | Uses PyTorch backend |
+| Diarization engine | Uses `torch.device("cuda")` |
+| VRAM monitoring | Uses `torch.cuda.mem_get_info()` |
+
+**Components that need modification:**
+
+| Component | Issue | Solution |
+|-----------|-------|----------|
+| faster-whisper | Uses CTranslate2, no ROCm | Engine abstraction + fallback |
+| Device detection | Doesn't distinguish ROCm | Add `torch.version.hip` check |
+| gRPC config | No ROCM device type | Add to proto enum |
+
+---
+
+## CTranslate2 Challenge
+
+### Why CTranslate2 Doesn't Support ROCm
+
+CTranslate2 is a C++ inference engine optimized for transformer models. Its CUDA support is:
+
+1. **Compiled against CUDA toolkit**: Uses cuBLAS, cuDNN directly
+2. **No HIP port**: Would require significant rewrite
+3. **Performance-critical paths**: Hand-optimized CUDA kernels
+
+### Community Fork Analysis
+
+The [CTranslate2-ROCm fork](https://github.com/arlo-phoenix/CTranslate2-rocm) uses hipify to convert CUDA code:
+
+```bash
+# How the fork was created (simplified)
+hipify-perl -inplace -print-stats cuda_file.cu  # Convert CUDA to HIP
+hipcc -o output hip_file.cpp                     # Compile with hipcc
+```
+
+**Fork Status (as of research):**
+
+| Aspect | Status |
+|--------|--------|
+| Maintenance | Active, community-driven |
+| ROCm versions | 5.7 - 6.x tested |
+| Performance | Reported faster than whisper.cpp (verify on our hardware) |
+| Whisper support | Full (via faster-whisper) |
+| Stability | Community reports vary; validate for our workloads |
+
+### Why We Need the Protocol Pattern
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                       AsrEngine                                │
+│  - transcribe(audio) -> segments                                │
+│  - load_model(size)                                            │
+│  - device, compute_type, is_loaded                             │
+└─────────────────────────────────────────────────────────────────┘
+                              │
+          ┌───────────────────┼───────────────────┐
+          │                   │                   │
+          ▼                   ▼                   ▼
+┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
+│FasterWhisper    │ │FasterWhisper    │ │WhisperPyTorch   │
+│Engine           │ │RocmEngine       │ │Engine           │
+│(CUDA/CPU)       │ │(ROCm fork)      │ │(Universal)      │
+│                 │ │                 │ │                 │
+│Uses: ctranslate2│ │Uses: ctranslate2│ │Uses: openai-    │
+│      (official) │ │      -rocm      │ │      whisper    │
+└─────────────────┘ └─────────────────┘ └─────────────────┘
+         │                   │                   │
+         ▼                   ▼                   ▼
+    NVIDIA GPU          AMD GPU           Any PyTorch
+                                           Device
+```
+
+---
+
+## Engine Protocol Design
+
+### Protocol Definition Rationale
+
+The `AsrEngine` protocol uses Python's `Protocol` for structural subtyping:
+
+```python
+from typing import Protocol, runtime_checkable
+
+@runtime_checkable
+class AsrEngine(Protocol):
+    """All ASR engines must implement this interface."""
+
+    @property
+    def device(self) -> str: ...
+
+    @property
+    def is_loaded(self) -> bool: ...
+
+    def transcribe(self, audio: NDArray[np.float32]) -> Iterator[AsrResult]: ...
+```
+
+**Why Protocol over ABC?**
+
+| Approach | Pros | Cons |
+|----------|------|------|
+| `Protocol` | Structural typing, no inheritance required | Harder to enforce at definition time |
+| `ABC` | Explicit contract, IDE support | Requires inheritance, tighter coupling |
+
+We choose `Protocol` because:
+1. `FasterWhisperEngine` already exists without base class
+2. Third-party engines can implement without importing our types
+3. `@runtime_checkable` enables `isinstance()` checks
+
+### Factory Pattern
+
+```python
+def create_asr_engine(device: str = "auto") -> AsrEngine:
+    """Factory function for ASR engine creation.
+
+    Encapsulates:
+    1. Device detection (CUDA vs ROCm vs CPU)
+    2. Library availability (CTranslate2-ROCm installed?)
+    3. Fallback logic (PyTorch Whisper if no native engine)
+    """
+```
+
+**Why factory function over class?**
+
+- Simpler API: `create_asr_engine("auto")` vs `AsrEngineFactory().create("auto")`
+- No state needed between creations
+- Easier to test with mocks
+
+---
+
+## Detection Strategy
+
+### Multi-Layer Detection
+
+```python
+def get_full_device_info() -> DeviceInfo:
+    """Complete device detection with all relevant information."""
+
+    # Layer 1: GPU Backend
+    backend = detect_gpu_backend()  # CUDA, ROCm, MPS, or NONE
+
+    # Layer 2: Library Availability
+    has_ctranslate2 = _check_ctranslate2()
+    has_ctranslate2_rocm = _check_ctranslate2_rocm()
+    has_openai_whisper = _check_openai_whisper()
+
+    # Layer 3: Hardware Capabilities
+    if backend in (GpuBackend.CUDA, GpuBackend.ROCM):
+        props = torch.cuda.get_device_properties(0)
+        compute_capability = (props.major, props.minor)
+        vram_gb = props.total_memory / (1024**3)
+
+    return DeviceInfo(
+        backend=backend,
+        has_ctranslate2=has_ctranslate2,
+        has_ctranslate2_rocm=has_ctranslate2_rocm,
+        has_openai_whisper=has_openai_whisper,
+        compute_capability=compute_capability,
+        vram_gb=vram_gb,
+    )
+```
+
+### GPU Architecture Detection for ROCm
+
+AMD GPU architectures use "gfx" naming:
+
+| Architecture | gfx ID | Example GPUs |
+|--------------|--------|--------------|
+| RDNA 3 | gfx1100, gfx1101, gfx1102 | RX 7900 XTX, RX 7600 |
+| RDNA 2 | gfx1030, gfx1031, gfx1032 | RX 6800 XT, RX 6600 |
+| CDNA 2 | gfx90a | MI210, MI250 |
+| CDNA 3 | gfx942 | MI300X |
+
+```python
+def get_amd_gpu_architecture() -> str | None:
+    """Get AMD GPU architecture string."""
+    if not _is_rocm():
+        return None
+
+    props = torch.cuda.get_device_properties(0)
+    # props.name on ROCm returns architecture, e.g., "gfx1100"
+    return props.name
+```
+
+### Handling Unsupported GPUs
+
+ROCm builds ship with kernels for specific architectures. If your GPU's gfx ID isn't supported:
+
+```
+RuntimeError: HIP error: invalid device function
+```
+
+**Detection strategy:**
+
+```python
+OFFICIALLY_SUPPORTED_GFX = {
+    "gfx900", "gfx906", "gfx908", "gfx90a",  # MI series
+    "gfx1030", "gfx1100", "gfx1101", "gfx1102",  # Consumer RDNA
+}
+
+def check_rocm_support(gfx_id: str) -> tuple[bool, str]:
+    """Check if GPU architecture is officially supported."""
+    if gfx_id in OFFICIALLY_SUPPORTED_GFX:
+        return True, "Officially supported"
+
+    # Check for compatible override (HSA_OVERRIDE_GFX_VERSION)
+    if os.environ.get("HSA_OVERRIDE_GFX_VERSION"):
+        return True, "Override enabled (may be unstable)"
+
+    return False, f"Architecture {gfx_id} not officially supported"
+```
+
+---
+
+## Memory Management
+
+### VRAM Monitoring
+
+ROCm exposes memory through the same PyTorch API:
+
+```python
+def get_gpu_memory() -> tuple[int, int]:
+    """Get (free, total) GPU memory in bytes."""
+    if not torch.cuda.is_available():
+        return (0, 0)
+
+    # Works identically for CUDA and ROCm
+    free, total = torch.cuda.mem_get_info(device=0)
+    return (free, total)
+```
+
+### Memory Pools
+
+ROCm's memory allocator differs from CUDA:
+
+| Aspect | CUDA | ROCm |
+|--------|------|------|
+| Allocator | cudaMallocAsync (11.2+) | hipMallocAsync (5.2+) |
+| Memory pools | Yes | Yes (but different behavior) |
+| Unified memory | Full support | Limited support |
+
+**Recommendation:** Use PyTorch's caching allocator abstraction rather than direct HIP calls.
+
+### Model Memory Requirements
+
+Approximate figures; validate with our model sizes, batch sizes, and backend.
+
+| Model | VRAM (float16) | VRAM (int8) |
+|-------|----------------|-------------|
+| tiny | ~1 GB | ~0.5 GB |
+| base | ~1.5 GB | ~0.8 GB |
+| small | ~2 GB | ~1 GB |
+| medium | ~5 GB | ~2.5 GB |
+| large-v3 | ~10 GB | ~5 GB |
+
+---
+
+## Performance Considerations
+
+### Expected Performance Comparison
+
+Based on community benchmarks (directional only; verify locally):
+
+| Configuration | Relative Speed | Notes |
+|---------------|----------------|-------|
+| CUDA + CTranslate2 | 1.0x (baseline) | Fastest |
+| ROCm + CTranslate2-fork | 0.9x | ~10% slower |
+| ROCm + PyTorch Whisper | 0.5x | Pure PyTorch overhead |
+| CPU + CTranslate2 (int8) | 0.3x | Reasonable for short audio |
+
+### ROCm-Specific Optimizations
+
+1. **Kernel Compilation Cache:**
+   ```bash
+   export MIOPEN_USER_DB_PATH=/path/to/cache
+   export MIOPEN_FIND_MODE=3  # Fast compilation
+   ```
+
+2. **Memory Optimization:**
+   ```python
+   # Force garbage collection between transcriptions
+   torch.cuda.empty_cache()
+   gc.collect()
+   ```
+
+3. **Compute Type Selection:**
+   - Use `float16` on MI-series (native support)
+   - Use `float32` on RDNA if `float16` shows artifacts
+
+### Batch Processing Recommendations
+
+Example values only; tune for each GPU and model size.
+
+```python
+# ROCm benefits from larger batches due to kernel launch overhead
+BATCH_SIZES = {
+    "cuda": 16,
+    "rocm": 32,  # Larger batch amortizes launch cost
+    "cpu": 8,
+}
+```
+
+---
+
+## Migration Path
+
+### Step 1: Non-Breaking Detection (v1)
+
+Add ROCm detection without changing behavior:
+
+```python
+# In _engine_manager.py
+def detect_gpu_backend(self) -> str:
+    """Detect GPU backend for logging/telemetry."""
+    if not torch.cuda.is_available():
+        return "none"
+    if hasattr(torch.version, 'hip') and torch.version.hip:
+        return "rocm"
+    return "cuda"
+```
+
+### Step 2: Feature-Flagged Engine Selection (v2)
+
+```python
+# Behind NOTEFLOW_FEATURE_ROCM_ENABLED flag
+if settings.feature_flags.rocm_enabled:
+    engine = create_asr_engine(device="auto")  # Uses factory
+else:
+    engine = FasterWhisperEngine(device=device)  # Legacy path
+```
+
+### Step 3: Full Integration (v3)
+
+- Remove feature flag, factory becomes default
+- Add ROCm to gRPC configuration
+- Document ROCm installation
+
+### Rollback Strategy
+
+Each phase can be disabled independently:
+
+```python
+# Emergency rollback
+NOTEFLOW_FEATURE_ROCM_ENABLED=false  # Disables ROCm path
+NOTEFLOW_ASR_DEVICE=cpu              # Forces CPU
+```
+
+---
+
+## Testing Strategy
+
+### Unit Tests (No GPU Required)
+
+```python
+class TestGpuDetection:
+    """Mock-based tests for detection logic."""
+
+    def test_rocm_detection_with_hip_version(self, mock_torch):
+        mock_torch.version.hip = "6.0.0"
+        mock_torch.cuda.is_available.return_value = True
+        assert detect_gpu_backend() == GpuBackend.ROCM
+```
+
+### Integration Tests (GPU Required)
+
+```python
+@pytest.mark.skipif(
+    not _rocm_available(),
+    reason="ROCm not available"
+)
+class TestRocmEngine:
+    """Tests that require actual ROCm hardware."""
+
+    def test_model_loading(self):
+        engine = create_asr_engine(device="rocm")
+        engine.load_model("tiny")
+        assert engine.is_loaded
+```
+
+### CI/CD Considerations
+
+```yaml
+# GitHub Actions example
+jobs:
+  test-rocm:
+    runs-on: [self-hosted, rocm]  # Custom runner with AMD GPU
+    steps:
+      - uses: actions/checkout@v4
+      - run: pytest -m rocm tests/
+```
+
+---
+
+## Troubleshooting Guide
+
+### Common Issues
+
+| Error | Cause | Solution |
+|-------|-------|----------|
+| `HIP error: invalid device function` | GPU architecture not supported | Set `HSA_OVERRIDE_GFX_VERSION` or use CPU |
+| `ImportError: libamdhip64.so` | ROCm not installed | Install ROCm or use CPU |
+| `CUDA out of memory` (on ROCm) | VRAM exhausted | Reduce model size or batch |
+| Model loads but transcription fails | Driver mismatch | Update ROCm to match PyTorch version |
+
+### Environment Variables
+
+```bash
+# Debug ROCm issues
+export AMD_LOG_LEVEL=3              # Verbose logging
+export HIP_VISIBLE_DEVICES=0        # Limit to first GPU
+export HSA_OVERRIDE_GFX_VERSION=11.0.0  # Override gfx check
+
+# Performance tuning
+export MIOPEN_FIND_MODE=3           # Fast kernel selection
+export MIOPEN_USER_DB_PATH=/cache   # Kernel cache location
+```
+
+---
+
+## References
+
+- [PyTorch HIP Documentation](https://docs.pytorch.org/docs/stable/notes/hip.html)
+- [ROCm GitHub Repository](https://github.com/RadeonOpenCompute/ROCm)
+- [CTranslate2 GitHub](https://github.com/OpenNMT/CTranslate2)
+- [CTranslate2-ROCm Fork](https://github.com/arlo-phoenix/CTranslate2-rocm)
+- [AMD ROCm Documentation](https://rocm.docs.amd.com/)