diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..fd2c135 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,50 @@ +FROM mcr.microsoft.com/devcontainers/python:3.12-bookworm + +# System packages for UI (Flet/Flutter), tray (pystray), hotkeys (pynput), and audio (sounddevice). +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + pkg-config \ + meson \ + ninja-build \ + dbus-x11 \ + libgtk-3-0 \ + libgirepository1.0-1 \ + libgirepository1.0-dev \ + gobject-introspection \ + libcairo2-dev \ + libglib2.0-dev \ + gir1.2-gtk-3.0 \ + libayatana-appindicator3-1 \ + gir1.2-ayatanaappindicator3-0.1 \ + libnss3 \ + libx11-xcb1 \ + libxcomposite1 \ + libxdamage1 \ + libxrandr2 \ + libxext6 \ + libxi6 \ + libxtst6 \ + libxfixes3 \ + libxrender1 \ + libxinerama1 \ + libxcursor1 \ + libxss1 \ + libxkbcommon0 \ + libxkbcommon-x11-0 \ + libgl1 \ + libegl1 \ + libgbm1 \ + libdrm2 \ + libasound2 \ + libpulse0 \ + libportaudio2 \ + portaudio19-dev \ + libsndfile1 \ + libpango-1.0-0 \ + libpangocairo-1.0-0 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libgdk-pixbuf2.0-0 \ + && rm -rf /var/lib/apt/lists/* + diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..56cbd6e --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,40 @@ +{ + "name": "noteflow", + "build": { + "dockerfile": "Dockerfile" + }, + "features": { + "ghcr.io/devcontainers/features/desktop-lite:1": { + "webPort": "6080" + }, + "ghcr.io/devcontainers/features/docker-outside-of-docker:1": {} + }, + "forwardPorts": [6080], + "portsAttributes": { + "6080": { + "label": "Desktop (noVNC)", + "onAutoForward": "notify" + } + }, + "containerEnv": { + "DISPLAY": ":1", + "XDG_RUNTIME_DIR": "/tmp/runtime-vscode" + }, + "postCreateCommand": ".devcontainer/postCreate.sh", + "remoteUser": "vscode", + "customizations": { + "vscode": { + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.analysis.typeCheckingMode": "strict", + "python.analysis.autoSearchPaths": true, + "python.analysis.diagnosticMode": "workspace" + }, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "charliermarsh.ruff" + ] + } + } +} diff --git a/.devcontainer/postCreate.sh b/.devcontainer/postCreate.sh new file mode 100755 index 0000000..4b0aecb --- /dev/null +++ b/.devcontainer/postCreate.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail + +python -m pip install --upgrade pip +python -m pip install -e ".[dev]" + +# Enable pystray GTK/AppIndicator backend on Linux (optional but recommended for tray UI). +if ! python -m pip install pygobject; then + echo "pygobject install failed; pystray will fall back to X11 backend." >&2 +fi diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..151d7d5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,20 @@ +.git +.gitignore +.venv +__pycache__ +**/__pycache__ +*.pyc +*.pyo +*.pyd +*.pytest_cache +.mypy_cache +.ruff_cache +.pytest_cache +.DS_Store +.env +.env.* +logs/ +.spikes/ +.spike_cache/ +spikes/__pycache__ +spikes/*/__pycache__ diff --git a/.repomixignore b/.repomixignore new file mode 100644 index 0000000..c63e5bc --- /dev/null +++ b/.repomixignore @@ -0,0 +1,4 @@ +# Add patterns to ignore here, one per line +# Example: +# *.log +# tmp/ diff --git a/.serena/.gitignore b/.serena/.gitignore new file mode 100644 index 0000000..14d86ad --- /dev/null +++ b/.serena/.gitignore @@ -0,0 +1 @@ +/cache diff --git a/.serena/memories/completion_checklist.md b/.serena/memories/completion_checklist.md new file mode 100644 index 0000000..f518cd9 --- /dev/null +++ b/.serena/memories/completion_checklist.md @@ -0,0 +1,6 @@ +# Completion checklist + +- Run relevant tests: `pytest` (or `pytest -m "not integration"`). +- Lint: `ruff check .` (optionally `ruff check --fix .`). +- Type check: `mypy src/noteflow` (optional: `basedpyright`). +- If packaging changes: `python -m build`. diff --git a/.serena/memories/project_overview.md b/.serena/memories/project_overview.md new file mode 100644 index 0000000..604bb70 --- /dev/null +++ b/.serena/memories/project_overview.md @@ -0,0 +1,15 @@ +# NoteFlow project overview + +- Purpose: Local-first meeting capture/transcription app with gRPC server + Flet client UI, with persistence, summarization, and diarization support. +- Tech stack: Python 3.12; gRPC; Flet for UI; SQLAlchemy + Alembic for persistence; asyncpg/PostgreSQL for DB; Ruff for lint; mypy/basedpyright for typing; hatchling for packaging. +- Structure: + - `src/noteflow/` main package + - `domain/` entities + ports + - `application/` services/use-cases + - `infrastructure/` audio, ASR, persistence, security, diarization + - `grpc/` proto, server, client + - `client/` Flet UI + - `config/` settings + - `src/noteflow/infrastructure/persistence/migrations/` Alembic migrations + - `tests/` mirrors package areas with `tests/fixtures/` + - `docs/` specs/milestones; `spikes/` experiments; `logs/` local-only diff --git a/.serena/memories/style_conventions.md b/.serena/memories/style_conventions.md new file mode 100644 index 0000000..43dbdf8 --- /dev/null +++ b/.serena/memories/style_conventions.md @@ -0,0 +1,6 @@ +# Style & conventions + +- Python 3.12, 4-space indentation, 100-char line length (Ruff). +- Naming: `snake_case` modules/functions, `PascalCase` classes, `UPPER_SNAKE_CASE` constants. +- Keep typing explicit; compatible with strict mypy. +- Generated `*_pb2.py` files excluded from lint. diff --git a/.serena/memories/suggested_commands.md b/.serena/memories/suggested_commands.md new file mode 100644 index 0000000..b941363 --- /dev/null +++ b/.serena/memories/suggested_commands.md @@ -0,0 +1,9 @@ +# Suggested commands + +- Install dev deps: `python -m pip install -e ".[dev]"` +- Run gRPC server: `python -m noteflow.grpc.server --help` +- Run Flet client: `python -m noteflow.client.app --help` +- Tests: `pytest` (or `pytest -m "not integration"` to skip external services) +- Lint: `ruff check .` (autofix: `ruff check --fix .`) +- Type check: `mypy src/noteflow` (optional: `basedpyright`) +- Build wheel: `python -m build` diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 0000000..7986cb0 --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,84 @@ +# list of languages for which language servers are started; choose from: +# al bash clojure cpp csharp csharp_omnisharp +# dart elixir elm erlang fortran go +# haskell java julia kotlin lua markdown +# nix perl php python python_jedi r +# rego ruby ruby_solargraph rust scala swift +# terraform typescript typescript_vts yaml zig +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# Special requirements: +# - csharp: Requires the presence of a .sln file in the project folder. +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. +languages: +- python + +# the encoding used by text files in the project +# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings +encoding: "utf-8" + +# whether to use the project's gitignore file to ignore files +# Added on 2025-04-07 +ignore_all_files_in_gitignore: true + +# list of additional paths to ignore +# same syntax as gitignore, so you can use * and ** +# Was previously called `ignored_dirs`, please update your config if you are using that. +# Added (renamed) on 2025-04-07 +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + +# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details. +# Below is the complete list of tools for convenience. +# To make sure you have the latest list of tools, and to view their descriptions, +# execute `uv run scripts/print_tool_overview.py`. +# +# * `activate_project`: Activates a project by name. +# * `check_onboarding_performed`: Checks whether project onboarding was already performed. +# * `create_text_file`: Creates/overwrites a file in the project directory. +# * `delete_lines`: Deletes a range of lines within a file. +# * `delete_memory`: Deletes a memory from Serena's project-specific memory store. +# * `execute_shell_command`: Executes a shell command. +# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced. +# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type). +# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type). +# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. +# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. +# * `initial_instructions`: Gets the initial instructions for the current project. +# Should only be used in settings where the system prompt cannot be set, +# e.g. in clients you have no control over, like Claude Desktop. +# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. +# * `insert_at_line`: Inserts content at a given line in a file. +# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. +# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). +# * `list_memories`: Lists memories in Serena's project-specific memory store. +# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). +# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context). +# * `read_file`: Reads a file within the project directory. +# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store. +# * `remove_project`: Removes a project from the Serena configuration. +# * `replace_lines`: Replaces a range of lines within a file with new content. +# * `replace_symbol_body`: Replaces the full definition of a symbol. +# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen. +# * `search_for_pattern`: Performs a search for a pattern in the project. +# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase. +# * `switch_modes`: Activates modes by providing a list of their names +# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information. +# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task. +# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed. +# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store. +excluded_tools: [] + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: "" + +project_name: "noteflow" +included_optional_tools: [] diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 0000000..a5006e2 --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,25 @@ +FROM python:3.12-bookworm + +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +# Core build/runtime deps for project packages (sounddevice, asyncpg, cryptography). +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + pkg-config \ + portaudio19-dev \ + libsndfile1 \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +COPY . /workspace + +RUN python -m pip install --upgrade pip \ + && python -m pip install -e ".[dev]" watchfiles + +EXPOSE 50051 + +CMD ["python", "scripts/dev_watch_server.py"] diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..39509c4 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,31 @@ +services: + server: + build: + context: . + dockerfile: Dockerfile.dev + ports: + - "50051:50051" + environment: + NOTEFLOW_DATABASE_URL: postgresql+asyncpg://noteflow:noteflow@db:5432/noteflow + volumes: + - .:/workspace + depends_on: + db: + condition: service_healthy + + db: + image: postgres:15 + environment: + POSTGRES_DB: noteflow + POSTGRES_USER: noteflow + POSTGRES_PASSWORD: noteflow + volumes: + - noteflow_pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U noteflow -d noteflow"] + interval: 5s + timeout: 5s + retries: 10 + +volumes: + noteflow_pg_data: diff --git a/docs/milestones.md b/docs/milestones.md index 1e7688e..5927b02 100644 --- a/docs/milestones.md +++ b/docs/milestones.md @@ -3,7 +3,7 @@ **Architecture:** Client-Server with gRPC (evolved from original single-process design) **Core principles:** Local-first, mic capture baseline, partial→final transcripts, evidence-linked summaries with strict citation enforcement. -**Last updated:** December 2024 +**Last updated:** December 2025 --- @@ -103,11 +103,11 @@ --- -### Milestone 3 — Partial→Final transcription + transcript persistence ⚠️ PARTIAL +### Milestone 3 — Partial→Final transcription + transcript persistence ✅ COMPLETE **Goal:** near real-time transcription with stability rules. -**Deliverables:** +**Deliverables:** ✅ ALL COMPLETE * [x] ASR wrapper service (faster-whisper with word timestamps) * Location: `src/noteflow/infrastructure/asr/engine.py` @@ -115,37 +115,28 @@ * [x] VAD + segment finalization logic * EnergyVad: `src/noteflow/infrastructure/asr/streaming_vad.py` * Segmenter: `src/noteflow/infrastructure/asr/segmenter.py` -* [ ] **Partial transcript feed to UI** ← GAP +* [x] Partial transcript feed to UI + * Server: `_maybe_emit_partial()` called during streaming (`service.py:601`) + * 2-second cadence with text deduplication + * Client: Handles `is_final=False` in `client.py:458-467` + * UI: `[LIVE]` row with blue styling (`transcript.py:182-219`) * [x] Final segments persisted to PostgreSQL + pgvector * Repository: `src/noteflow/infrastructure/persistence/repositories/segment.py` * [x] Post-meeting transcript view * Component: `src/noteflow/client/components/transcript.py` -**Current status:** +**Implementation details:** -Final segments are emitted and persisted. **Partial streaming is proto-defined but not wired end-to-end.** +* Server emits `UPDATE_TYPE_PARTIAL` every 2 seconds during speech activity +* Minimum 0.5 seconds of audio before partial inference +* Partial text deduplicated (only emitted when changed) +* Client renders partials with `is_final=False` flag +* UI displays `[LIVE]` indicator with blue background, grey italic text +* Partial row cleared when final segment arrives -**What exists for partials:** -* Proto: `UPDATE_TYPE_PARTIAL` defined in `noteflow.proto` (line 80) -* Proto: `TranscriptUpdate.partial_text` field defined (line 69) -* Client: `TranscriptSegment.is_final` field ready to distinguish partials -* Server: `_maybe_emit_partial()` method exists but not invoked during streaming +**Exit criteria:** ✅ ALL MET -**Remaining work to complete partials:** - -1. **Server** (`src/noteflow/grpc/service.py`): - * In `StreamTranscription`, call `_maybe_emit_partial()` every 2 seconds during speech - * Yield `TranscriptUpdate` with `update_type=UPDATE_TYPE_PARTIAL` and `partial_text` - * Complexity: Low (infrastructure exists) - -2. **Client** (`src/noteflow/client/components/transcript.py`): - * Render partial text in grey at bottom of transcript list - * Replace on each partial update; clear on final segment - * Complexity: Low (client already handles `is_final` field) - -**Exit criteria:** - -* [ ] Live view shows partial text that settles into final segments +* [x] Live view shows partial text that settles into final segments * [x] After restart, final segments are still present and searchable within the meeting --- @@ -185,7 +176,7 @@ Final segments are emitted and persisted. **Partial streaming is proto-defined b --- -### Milestone 5 — Smart triggers (confidence model) + snooze/suppression ⚠️ DESIGNED, NOT INTEGRATED +### Milestone 5 — Smart triggers (confidence model) + snooze/suppression ⚠️ PARTIALLY INTEGRATED **Goal:** prompts that are helpful, not annoying. @@ -197,29 +188,42 @@ Final segments are emitted and persisted. **Partial streaming is proto-defined b * `TriggerSignal`, `TriggerDecision`, `TriggerAction` (IGNORE, NOTIFY, AUTO_START) * [x] `SignalProvider` protocol defined * Location: `src/noteflow/domain/triggers/ports.py` -* [ ] **Foreground app detector integration** ← GAP - * Infrastructure exists: `src/noteflow/infrastructure/triggers/foreground_app.py` - * Not wired to client -* [ ] **Audio activity detector integration** ← GAP - * Infrastructure exists: `src/noteflow/infrastructure/triggers/audio_activity.py` - * Not wired to client +* [x] Foreground app detector integration + * Infrastructure: `src/noteflow/infrastructure/triggers/foreground_app.py` + * Wired via `TriggerMixin`: `src/noteflow/client/_trigger_mixin.py` +* [x] Audio activity detector integration + * Infrastructure: `src/noteflow/infrastructure/triggers/audio_activity.py` + * Wired via `TriggerMixin`: `src/noteflow/client/_trigger_mixin.py` * [ ] Optional calendar connector stub (disabled by default) -* [ ] **Prompt notification + snooze + suppress per-app** ← GAP - * Application logic complete in `TriggerService` - * No UI integration (no tray prompts) +* [x] Trigger prompts + snooze (AlertDialog, not system notifications) + * `TriggerMixin._show_trigger_prompt()` displays AlertDialog + * Snooze button integrated + * Rate limiting active +* [ ] **System tray integration** ← GAP +* [ ] **Global hotkeys** ← GAP * [x] Settings for sensitivity and auto-start opt-in (in `TriggerService`) -**What exists (application layer complete):** +**Current integration status:** + +* Client app inherits from `TriggerMixin` (`app.py:65`) +* Signal providers initialized in `_initialize_triggers()` method +* Background trigger check loop runs via `_trigger_check_loop()` +* Handles NOTIFY and AUTO_START actions +* Prompts shown via Flet AlertDialog (not system notifications) + +**What works:** * Confidence scoring with configurable thresholds (0.40 notify, 0.80 auto-start) * Rate limiting between triggers * Snooze functionality with remaining time tracking * Per-app suppression config +* Foreground app detection (PyWinCtl) +* Audio activity detection (RMS sliding window) **Remaining work:** 1. **System Tray Integration** (New file: `src/noteflow/client/tray.py`) * Integrate pystray for minimize-to-tray - * Show trigger prompts as notifications + * Show trigger prompts as system notifications * Recording indicator icon * Complexity: Medium (spike validated in `spikes/spike_01_ui_tray_hotkeys/`) @@ -227,14 +231,12 @@ Final segments are emitted and persisted. **Partial streaming is proto-defined b * Integrate pynput for start/stop/annotation hotkeys * Complexity: Medium (spike validated) -3. **Wire Signal Providers to Client** - * Connect `AudioActivitySignalProvider` + `ForegroundAppSignalProvider` in `app.py` - * Complexity: Medium - **Exit criteria:** -* [ ] Trigger prompts happen when expected and can be snoozed -* [ ] Prompt rate-limited to prevent spam +* [x] Trigger prompts happen when expected and can be snoozed +* [x] Prompt rate-limited to prevent spam +* [ ] System tray notifications (currently AlertDialog only) +* [ ] Global hotkeys for quick actions --- @@ -341,21 +343,64 @@ Final segments are emitted and persisted. **Partial streaming is proto-defined b --- -### Milestone 8 (Optional pre‑release) — Post-meeting anonymous diarization ❌ NOT STARTED +### Milestone 8 (Optional pre‑release) — Post-meeting anonymous diarization ✅ COMPLETE **Goal:** "Speaker A/B/C" best-effort labeling. **Deliverables:** -* [ ] Background diarization job -* [ ] Align speaker turns to transcript -* [ ] UI display + rename speakers per meeting +* [x] Diarization engine with streaming + offline modes + * Location: `src/noteflow/infrastructure/diarization/engine.py` (315 lines) + * Streaming: `diart` library for real-time processing + * Offline: `pyannote.audio` for post-meeting refinement + * Device support: auto, cpu, cuda, mps +* [x] Speaker assignment logic + * Location: `src/noteflow/infrastructure/diarization/assigner.py` + * `assign_speaker()` maps time ranges via maximum overlap + * `assign_speakers_batch()` for bulk assignment + * Confidence scoring based on overlap duration +* [x] Data transfer objects + * Location: `src/noteflow/infrastructure/diarization/dto.py` + * `SpeakerTurn` with validation and overlap methods +* [x] Domain entity updates + * `Segment.speaker_id: str | None` and `speaker_confidence: float` +* [x] Proto/gRPC definitions + * `FinalSegment.speaker_id` and `speaker_confidence` fields + * `ServerInfo.diarization_enabled` and `diarization_ready` flags + * `RefineSpeakerDiarization` and `RenameSpeaker` RPCs +* [x] gRPC refinement RPC + * `refine_speaker_diarization()` in `service.py` for post-meeting processing + * `rename_speaker()` for user-friendly speaker labels +* [x] Configuration/settings + * `diarization_enabled`, `diarization_hf_token`, `diarization_device` + * `diarization_streaming_latency`, `diarization_min/max_speakers` +* [x] Dependencies added + * Optional extra `[diarization]`: pyannote.audio, diart, torch +* [x] UI display + * Speaker labels with color coding in `transcript.py` + * "Analyze Speakers" and "Rename Speakers" buttons in `meeting_library.py` +* [x] Server initialization + * `DiarizationEngine` wired in `server.py` with CLI args + * `--diarization`, `--diarization-hf-token`, `--diarization-device` flags +* [x] Client integration + * `refine_speaker_diarization()` and `rename_speaker()` methods in `client.py` + * `DiarizationResult` and `RenameSpeakerResult` DTOs +* [x] Tests + * 24 unit tests in `tests/infrastructure/test_diarization.py` + * Covers `SpeakerTurn`, `assign_speaker()`, `assign_speakers_batch()` -**Status:** Not implemented. Marked as optional pre-release feature. +**Deferred (optional future enhancement):** -**Exit criteria:** +* [ ] **Streaming integration** - Real-time speaker labels during recording + * Feed audio chunks to diarization during `StreamTranscription` + * Emit speaker changes in real-time + * Complexity: High (requires significant latency tuning) -* [ ] If diarization fails, app degrades gracefully to "Unknown." +**Exit criteria:** ✅ ALL MET + +* [x] If diarization fails, app degrades gracefully to "Unknown." +* [x] Post-meeting diarization refinement works end-to-end +* [ ] (Optional) Streaming diarization shows live speaker labels — deferred --- @@ -899,12 +944,12 @@ class Job(Protocol): | M0 Spikes | ✅ Complete | 100% | | M1 Repo Foundation | ✅ Complete | 100% | | M2 Meeting Lifecycle | ✅ Complete | 100% | -| M3 Transcription | ⚠️ Partial | 80% (finals done, partials not wired) | +| M3 Transcription | ✅ Complete | 100% | | M4 Review UX | ✅ Complete | 100% | -| M5 Triggers | ⚠️ Designed | 50% (application layer done, UI not) | +| M5 Triggers | ⚠️ Partial | 70% (integrated via mixin, tray/hotkeys not) | | M6 Summarization | ✅ Complete | 100% | | M7 Packaging | ⚠️ Partial | 40% (retention done, packaging not) | -| M8 Diarization | ❌ Not Started | 0% (optional) | +| M8 Diarization | ⚠️ Partial | 55% (infrastructure done, wiring not) | ### Layer-by-Layer Status @@ -926,24 +971,25 @@ class Job(Protocol): - [x] `ExportService` - Markdown, HTML - [x] `RecoveryService` - crash recovery -**Infrastructure Layer** ✅ 95% +**Infrastructure Layer** ✅ 98% - [x] Audio: capture, ring buffer, levels, playback, encrypted writer/reader - [x] ASR: faster-whisper engine, VAD, segmenter - [x] Persistence: SQLAlchemy + pgvector, Alembic migrations - [x] Security: AES-256-GCM, keyring keystore - [x] Summarization: Mock, Ollama, Cloud providers + citation verifier - [x] Export: Markdown, HTML formatters -- [ ] Triggers: signal providers exist but not integrated +- [x] Triggers: signal providers wired via TriggerMixin +- [x] Diarization: engine, assigner, DTOs (not wired to server) -**gRPC Layer** ✅ 95% +**gRPC Layer** ✅ 100% - [x] Proto definitions with bidirectional streaming - [x] Server: StreamTranscription, CreateMeeting, StopMeeting, etc. - [x] Client wrapper with connection management - [x] Meeting store (in-memory + DB modes) - [x] GenerateSummary RPC wired to SummarizationService -- [ ] Partial transcript streaming not emitted +- [x] Partial transcript streaming (2-second cadence, deduplication) -**Client Layer** ✅ 80% +**Client Layer** ✅ 85% - [x] Flet app with state management - [x] VU meter, recording timer, transcript - [x] Playback controls + sync controller @@ -951,6 +997,7 @@ class Job(Protocol): - [x] Meeting library - [x] Summary panel with clickable citations - [x] Connection panel with auto-reconnect +- [x] Trigger detection via TriggerMixin (AlertDialog prompts) - [ ] System tray integration (spike validated, not integrated) - [ ] Global hotkeys (spike validated, not integrated) @@ -958,23 +1005,25 @@ class Job(Protocol): ## 11) Remaining Work Summary -### High Priority (Core UX Gaps) - -| # | Task | Files | Complexity | Blocker For | -|---|------|-------|------------|-------------| -| 1 | **Partial Transcript Streaming** | `src/noteflow/grpc/service.py` | Low | Real-time UX | -| | Emit `UPDATE_TYPE_PARTIAL` during speech at 2-second cadence | | | | - ### Medium Priority (Platform Features) | # | Task | Files | Complexity | Blocker For | |---|------|-------|------------|-------------| -| 3 | **System Tray Integration** | New: `src/noteflow/client/tray.py` | Medium | M5 triggers | -| | Integrate pystray for minimize-to-tray, recording indicator | | | | -| 4 | **Global Hotkeys** | New: `src/noteflow/client/hotkeys.py` | Medium | M5 triggers | +| 1 | **System Tray Integration** | New: `src/noteflow/client/tray.py` | Medium | M5 completion | +| | Integrate pystray for minimize-to-tray, system notifications, recording indicator | | | | +| 2 | **Global Hotkeys** | New: `src/noteflow/client/hotkeys.py` | Medium | M5 completion | | | Integrate pynput for start/stop/annotation hotkeys | | | | -| 5 | **Trigger Signal Integration** | `src/noteflow/client/app.py` | Medium | M5 completion | -| | Wire AudioActivity + ForegroundApp signal providers | | | | + +### Medium Priority (Diarization Wiring) + +| # | Task | Files | Complexity | Blocker For | +|---|------|-------|------------|-------------| +| 3 | **Diarization Application Service** | New: `application/services/diarization_service.py` | Medium | M8 completion | +| | Orchestrate diarization workflow, model management | | | | +| 4 | **Diarization Server Wiring** | `src/noteflow/grpc/server.py` | Low | M8 completion | +| | Initialize DiarizationEngine on startup when enabled | | | | +| 5 | **Diarization Tests** | New: `tests/infrastructure/diarization/` | Medium | M8 stability | +| | Unit tests for engine, assigner, DTOs | | | | ### Lower Priority (Shipping) @@ -991,11 +1040,10 @@ class Job(Protocol): ### Recommended Implementation Order -1. **Partial Transcript Streaming** (Low effort, high impact on UX) -2. **System Tray + Hotkeys** (Can be done in parallel) -3. **Trigger Signal Integration** (Depends on tray) -4. **PyInstaller Packaging** (Enables distribution) -5. **Remaining M7 items** (Polish for release) +1. **System Tray + Hotkeys** (Can be done in parallel, completes M5) +2. **Diarization Wiring** (Server init + tests, completes M8 core) +3. **PyInstaller Packaging** (Enables distribution) +4. **Remaining M7 items** (Polish for release) --- diff --git a/docs/triage.md b/docs/triage.md new file mode 100644 index 0000000..f502d63 --- /dev/null +++ b/docs/triage.md @@ -0,0 +1,265 @@ +This is a comprehensive code review of the `NoteFlow` repository. + +Overall, this codebase demonstrates a high level of engineering maturity. It effectively utilizes Clean Architecture concepts (Entities, Use Cases, Ports/Adapters), leveraging strong typing, Pydantic for validation, and SQLAlchemy/Alembic for persistence. The integration test setup using `testcontainers` is particularly robust. + +However, there are critical performance bottlenecks regarding async/sync bridging in the ASR engine, potential concurrency issues in the UI state management, and specific security considerations regarding the encryption implementation. + +Below is the review categorized into actionable feedback, formatted to be convertible into Git issues. + +--- + +## 1. Critical Architecture & Performance Issues + +### Issue 1: Blocking ASR Inference in Async gRPC Server +**Severity:** Critical +**Location:** `src/noteflow/grpc/service.py`, `src/noteflow/infrastructure/asr/engine.py` + +**The Problem:** +The `NoteFlowServer` uses `grpc.aio` (AsyncIO), but the `FasterWhisperEngine.transcribe` method is blocking (synchronous CPU-bound operation). +In `NoteFlowServicer._maybe_emit_partial` and `_process_audio_segment`, the code calls: +```python +# src/noteflow/grpc/service.py +partial_text = " ".join(result.text for result in self._asr_engine.transcribe(combined)) +``` +Since `transcribe` performs heavy computation, executing it directly within an `async def` method freezes the entire Python AsyncIO event loop. This blocks heartbeats, other RPC calls, and other concurrent meeting streams until inference completes. + +**Actionable Solution:** +Offload the transcription to a separate thread pool executor. + +1. Modify `FasterWhisperEngine` to remain synchronous (it wraps CTranslate2 which releases the GIL often, but it is still blocking from an asyncio perspective). +2. Update `NoteFlowServicer` to run transcription in an executor. + +```python +# In NoteFlowServicer +from functools import partial + +# Helper method +async def _run_transcription(self, audio): + loop = asyncio.get_running_loop() + # Use a ThreadPoolExecutor specifically for compute-heavy tasks + return await loop.run_in_executor( + None, + partial(list, self._asr_engine.transcribe(audio)) + ) + +# Usage in _maybe_emit_partial +results = await self._run_transcription(combined) +partial_text = " ".join(r.text for r in results) +``` + +### Issue 2: Synchronous `sounddevice` Callbacks in Async Client App +**Severity:** High +**Location:** `src/noteflow/infrastructure/audio/capture.py` + +**The Problem:** +The `sounddevice` library calls the python callback from a C-level background thread. In `SoundDeviceCapture._stream_callback`, you are invoking the user-provided callback: +```python +self._callback(audio_data, timestamp) +``` +In `app.py`, this callback (`_on_audio_frames`) interacts with `self._audio_activity.update` and `self._client.send_audio`. While `queue.put` is thread-safe, any heavy logic or object allocation here happens in the real-time audio thread. If Python garbage collection pauses this thread, audio artifacts (dropouts) will occur. + +**Actionable Solution:** +The callback should strictly put bytes into a thread-safe queue and return immediately. A separate consumer thread/task should process the VAD, VU meter logic, and network sending. + +### Issue 3: Encryption Key Material in Memory +**Severity:** Medium +**Location:** `src/noteflow/infrastructure/security/crypto.py` + +**The Problem:** +The `AesGcmCryptoBox` keeps the master key in memory via `_get_master_cipher`. While inevitable for operation, `secrets.token_bytes` creates immutable bytes objects which cannot be zeroed out (wiped) from memory when no longer needed. Python's GC handles cleanup, but the key lingers in RAM. + +**Actionable Solution:** +While strict memory zeroing is hard in Python, you should minimize the lifespan of the `dek` (Data Encryption Key). +1. In `MeetingAudioWriter`, the `dek` is stored as an instance attribute: `self._dek`. This keeps the unencrypted key in memory for the duration of the meeting. +2. Consider refactoring `ChunkedAssetWriter` to store the `cipher` object (the `AESGCM` context) rather than the raw bytes of the `dek` if the underlying C-library handles memory better, though strictly speaking, the key is still in RAM. +3. **Critical:** Ensure `writer.close()` sets `self._dek = None` immediately (it currently does, which is good practice). + +--- + +## 2. Domain & Infrastructure Logic + +### Issue 4: Fallback Logic in `SummarizationService` +**Severity:** Low +**Location:** `src/noteflow/application/services/summarization_service.py` + +**The Problem:** +The method `_get_provider_with_fallback` iterates through a hardcoded `fallback_order = [SummarizationMode.LOCAL, SummarizationMode.MOCK]`. This ignores the configuration order or user preference if they added new providers. + +**Actionable Solution:** +Allow `SummarizationServiceSettings` to define a `fallback_chain: list[SummarizationMode]`. + +### Issue 5: Race Condition in `MeetingStore` (In-Memory) +**Severity:** Medium +**Location:** `src/noteflow/grpc/meeting_store.py` + +**The Problem:** +The `MeetingStore` uses `threading.RLock`. However, the methods return the actual `Meeting` object reference. +```python +def get(self, meeting_id: str) -> Meeting | None: + with self._lock: + return self._meetings.get(meeting_id) +``` +The caller gets a reference to the mutable `Meeting` entity. If two threads get the meeting and modify it (e.g., `meeting.state = ...`), the `MeetingStore` lock does nothing to protect the entity itself, only the dictionary lookups. + +**Actionable Solution:** +1. Return deep copies of the Meeting object (performance impact). +2. Or, implement specific atomic update methods on the Store (e.g., `update_status(id, status)`), rather than returning the whole object for modification. + +### Issue 6: `pgvector` Dependency Management +**Severity:** Low +**Location:** `src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py` + +**The Problem:** +The migration blindly executes `CREATE EXTENSION IF NOT EXISTS vector`. On managed database services (like RDS or standard Docker Postgres images), the user might not have superuser privileges to install extensions, or the extension binaries might be missing. + +**Actionable Solution:** +Wrap the extension creation in a try/catch block or check capabilities. For the integration tests, ensure the `pgvector/pgvector:pg16` image is strictly pinned (which you have done, good job). + +--- + +## 3. Client & UI (Flet) + +### Issue 7: Massive `app.py` File Size +**Severity:** Medium +**Location:** `src/noteflow/client/app.py` + +**The Problem:** +`app.py` is orchestrating too much. It handles UI layout, audio capture orchestration, gRPC client events, and state updates. It serves as a "God Class" Controller. + +**Actionable Solution:** +Refactor into a `ClientController` class separate from the UI layout construction. +1. `src/noteflow/client/controller.py`: Handles `NoteFlowClient`, `SoundDeviceCapture`, and updates `AppState`. +2. `src/noteflow/client/views.py`: Accepts `AppState` and renders UI. + +### Issue 8: Re-rendering Efficiency in Transcript +**Severity:** Medium +**Location:** `src/noteflow/client/components/transcript.py` + +**The Problem:** +`_render_final_segment` appends controls to `self._list_view.controls`. In Flet, modifying a large list of controls can become slow as the transcript grows (hundreds of segments). + +**Actionable Solution:** +1. Implement a "virtualized" list or pagination if Flet supports it efficiently. +2. If not, implement a sliding window rendering approach where only the last N segments + visible segments are rendered in the DOM, though this is complex in Flet. +3. **Immediate fix:** Ensure `auto_scroll` is handled efficiently. The current implementation clears and re-adds specific rows during search, which is heavy. + +--- + +## 4. Specific Code Feedback (Nitpicks & Bugs) + +### 1. Hardcoded Audio Constants +**File:** `src/noteflow/infrastructure/asr/segmenter.py` +The `SegmenterConfig` defaults to `sample_rate=16000`. +The `SoundDeviceCapture` defaults to `16000`. +**Risk:** If the server is configured for 44.1kHz, the client currently defaults to 16kHz hardcoded in several places. +**Fix:** Ensure `DEFAULT_SAMPLE_RATE` from `src/noteflow/config/constants.py` is used everywhere. + +### 2. Exception Swallowing in Audio Writer +**File:** `src/noteflow/grpc/service.py` -> `_write_audio_chunk_safe` +```python +except Exception as e: + logger.error("Failed to write audio chunk: %s", e) +``` +If the disk fills up or permissions change, the audio writer fails silently (just logging), but the meeting continues. The user might lose the audio recording entirely while thinking it's safe. +**Fix:** This error should probably trigger a circuit breaker that stops the recording or notifies the client via a gRPC status update or a metadata stream update. + +### 3. Trigger Service Rate Limiting Logic +**File:** `src/noteflow/application/services/trigger_service.py` +In `_determine_action`: +```python +if self._last_prompt is not None: + elapsed = now - self._last_prompt + if elapsed < self._settings.rate_limit_seconds: + return TriggerAction.IGNORE +``` +This logic ignores *all* triggers if within the rate limit. If a **high confidence** trigger (Auto-start) comes in 10 seconds after a low confidence prompt, it gets ignored. +**Fix:** The rate limit should likely apply to `NOTIFY` actions, but `AUTO_START` might need to bypass the rate limit or have a shorter one. + +### 4. Database Session Lifecycle in UoW +**File:** `src/noteflow/infrastructure/persistence/unit_of_work.py` +The `__init__` does not create the session, `__aenter__` does. This is correct. However, `SqlAlchemyUnitOfWork` caches repositories: +```python +self._annotations_repo = SqlAlchemyAnnotationRepository(self._session) +``` +If `__aenter__` is called, `__aexit__` closes the session. If the same UoW instance is reused (calling `async with uow:` again), it creates a *new* session but overwrites the repo references. This is generally safe, but verify that `SqlAlchemyUnitOfWork` instances are intended to be reusable or disposable. Currently, they look reusable, which is fine. + +### 5. Frontend Polling vs Events +**File:** `src/noteflow/client/components/playback_sync.py` +`POSITION_POLL_INTERVAL = 0.1`. +Using a thread to poll `self._state.playback.current_position` every 100ms is CPU inefficient in Python (due to GIL). +**Suggestion:** Use the `sounddevice` stream callback time info to update the position state only when audio is actually playing, rather than a separate `while True` loop. + +--- + +## 5. Security Review + +### 1. Keyring Headless Failure +**File:** `src/noteflow/infrastructure/security/keystore.py` +**Risk:** The app crashes if `keyring` cannot find a backend (common in Docker/Headless Linux servers). +**Fix:** +```python +except keyring.errors.KeyringError: + logger.warning("Keyring unavailable, falling back to environment variable or temporary key") + # Implement a fallback strategy or explicit failure +``` +Currently, it raises `RuntimeError`, which crashes the server startup. + +### 2. DEK Handling +**Analysis:** You generate a DEK, wrap it, and store `wrapped_dek` in the DB. The `dek` stays in memory during the stream. +**Verdict:** This is standard envelope encryption practice. Acceptable for this application tier. + +--- + +## 6. Generated Issues for Git + +### Issue: Asynchronous Transcription Processing +**Title:** Refactor ASR Engine to run in ThreadPoolExecutor +**Description:** +The gRPC server uses `asyncio`, but `FasterWhisperEngine.transcribe` is blocking. This freezes the event loop during transcription segments. +**Task:** +1. Inject `asyncio.get_running_loop()` into `NoteFlowServicer`. +2. Wrap `self._asr_engine.transcribe` calls in `loop.run_in_executor`. + +### Issue: Client Audio Callback Optimization +**Title:** Optimize Audio Capture Callback +**Description:** +`SoundDeviceCapture` callback executes application logic (network sending, VAD updates) in the audio thread. +**Task:** +1. Change callback to only `queue.put_nowait()`. +2. Move logic to a dedicated consumer worker thread. + +### Issue: Handle Write Errors in Audio Stream +**Title:** Critical Error Handling for Audio Writer +**Description:** +`_write_audio_chunk_safe` catches exceptions and logs them, potentially resulting in data loss without user feedback. +**Task:** +1. If writing fails, update the meeting state to `ERROR`. +2. Send an error message back to the client via the Transcript stream if possible, or terminate the connection. + +### Issue: Database Extension Installation Check +**Title:** Graceful degradation for `pgvector` +**Description:** +Migration script `6a9d9f408f40` attempts to create an extension. This fails if the DB user isn't superuser. +**Task:** +1. Check if extension exists or if user has permissions. +2. If not, fail with a clear message about required database setup steps. + +### Issue: Foreground App Window Detection on Linux/Headless +**Title:** Handle `pywinctl` dependencies +**Description:** +`pywinctl` requires X11/display headers on Linux. The server might run headless. +**Task:** +1. Wrap `ForegroundAppProvider` imports in try/except blocks. +2. Ensure the app doesn't crash if `pywinctl` fails to load. + +--- + +## 7. Packaging & Deployment (Future) + +Since you mentioned packaging is a WIP: +1. **Dependencies:** Separating `server` deps (torch, faster-whisper) from `client` deps (flet, sounddevice) is crucial. Use `pyproject.toml` extras: `pip install noteflow[server]` vs `noteflow[client]`. +2. **Model Management:** The Docker image for the server will be huge due to Torch/Whisper. Consider a build stage that pre-downloads the "base" model so the container starts faster. + +## Conclusion + +The code is high quality, well-typed, and structurally sound. Fixing the **Blocking ASR** issue is the only mandatory change before any serious load testing or deployment. The rest are robustness and architectural improvements. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 14660b8..69d4ea8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,6 +116,7 @@ asyncio_default_fixture_loop_scope = "function" markers = [ "slow: marks tests as slow (model loading)", "integration: marks tests requiring external services", + "stress: marks stress/concurrency tests", ] filterwarnings = [ "ignore:The @wait_container_is_ready decorator is deprecated.*:DeprecationWarning:testcontainers.core.waiting_utils", @@ -124,4 +125,5 @@ filterwarnings = [ [dependency-groups] dev = [ "ruff>=0.14.9", + "watchfiles>=1.1.1", ] diff --git a/repomix-output.md b/repomix-output.md new file mode 100644 index 0000000..ba72a62 --- /dev/null +++ b/repomix-output.md @@ -0,0 +1,30841 @@ +This file is a merged representation of a subset of the codebase, containing specifically included files, combined into a single document by Repomix. + +# File Summary + +## Purpose +This file contains a packed representation of a subset of the repository's contents that is considered the most important context. +It is designed to be easily consumable by AI systems for analysis, code review, +or other automated processes. + +## File Format +The content is organized as follows: +1. This summary section +2. Repository information +3. Directory structure +4. Repository files (if enabled) +5. Multiple file entries, each consisting of: + a. A header with the file path (## File: path/to/file) + b. The full contents of the file in a code block + +## Usage Guidelines +- This file should be treated as read-only. Any changes should be made to the + original repository files, not this packed version. +- When processing this file, use the file path to distinguish + between different files in the repository. +- Be aware that this file may contain sensitive information. Handle it with + the same level of security as you would the original repository. + +## Notes +- Some files may have been excluded based on .gitignore rules and Repomix's configuration +- Binary files are not included in this packed representation. Please refer to the Repository Structure section for a complete list of file paths, including binary files +- Only files matching these patterns are included: src/, tests/ +- Files matching patterns in .gitignore are excluded +- Files matching default ignore patterns are excluded +- Files are sorted by Git change count (files with more changes are at the bottom) + +# Directory Structure +``` +src/ + noteflow/ + application/ + services/ + __init__.py + export_service.py + meeting_service.py + recovery_service.py + retention_service.py + summarization_service.py + trigger_service.py + __init__.py + cli/ + __init__.py + __main__.py + retention.py + client/ + components/ + __init__.py + _async_mixin.py + _thread_mixin.py + annotation_display.py + annotation_toolbar.py + connection_panel.py + meeting_library.py + playback_controls.py + playback_sync.py + recording_timer.py + summary_panel.py + transcript.py + vu_meter.py + __init__.py + _trigger_mixin.py + app.py + state.py + config/ + __init__.py + constants.py + settings.py + core/ + __init__.py + domain/ + entities/ + __init__.py + annotation.py + meeting.py + segment.py + summary.py + ports/ + __init__.py + repositories.py + unit_of_work.py + summarization/ + __init__.py + ports.py + triggers/ + __init__.py + entities.py + ports.py + __init__.py + value_objects.py + grpc/ + proto/ + __init__.py + noteflow_pb2_grpc.py + noteflow_pb2.py + noteflow_pb2.pyi + noteflow.proto + __init__.py + client.py + meeting_store.py + server.py + service.py + infrastructure/ + asr/ + __init__.py + dto.py + engine.py + protocols.py + segmenter.py + streaming_vad.py + audio/ + __init__.py + capture.py + dto.py + levels.py + playback.py + protocols.py + reader.py + ring_buffer.py + writer.py + converters/ + __init__.py + asr_converters.py + orm_converters.py + diarization/ + __init__.py + assigner.py + dto.py + engine.py + export/ + __init__.py + _formatting.py + html.py + markdown.py + protocols.py + persistence/ + migrations/ + versions/ + 6a9d9f408f40_initial_schema.py + b5c3e8a2d1f0_add_annotations_table.py + c7d4e9f3a2b1_add_speaker_fields_to_segments.py + __init__.py + env.py + README + script.py.mako + repositories/ + __init__.py + _base.py + annotation_repo.py + meeting_repo.py + segment_repo.py + summary_repo.py + __init__.py + database.py + models.py + unit_of_work.py + security/ + __init__.py + crypto.py + keystore.py + protocols.py + summarization/ + __init__.py + _parsing.py + citation_verifier.py + cloud_provider.py + factory.py + mock_provider.py + ollama_provider.py + triggers/ + __init__.py + app_audio.py + audio_activity.py + calendar.py + foreground_app.py + __init__.py + __init__.py + noteflow_pb2.py +tests/ + application/ + __init__.py + test_export_service.py + test_meeting_service.py + test_recovery_service.py + test_retention_service.py + test_summarization_service.py + test_trigger_service.py + client/ + test_async_mixin.py + test_summary_panel.py + test_transcript_component.py + domain/ + __init__.py + test_annotation.py + test_meeting.py + test_segment.py + test_summary.py + test_triggers.py + test_value_objects.py + grpc/ + __init__.py + test_diarization_refine.py + test_generate_summary.py + test_partial_transcription.py + infrastructure/ + asr/ + __init__.py + test_dto.py + test_engine.py + test_segmenter.py + test_streaming_vad.py + audio/ + __init__.py + conftest.py + test_capture.py + test_dto.py + test_levels.py + test_reader.py + test_ring_buffer.py + test_writer.py + export/ + test_formatting.py + test_html.py + test_markdown.py + security/ + test_crypto.py + test_keystore.py + summarization/ + test_citation_verifier.py + test_cloud_provider.py + test_mock_provider.py + test_ollama_provider.py + triggers/ + conftest.py + test_audio_activity.py + test_foreground_app.py + __init__.py + test_converters.py + test_diarization.py + integration/ + __init__.py + conftest.py + test_repositories.py + test_trigger_settings.py + test_unit_of_work.py + __init__.py + conftest.py +``` + +# Files + +## File: src/noteflow/config/constants.py +````python +"""Centralized constants for NoteFlow. + +This module provides shared constants used across the codebase to avoid +magic numbers and ensure consistency. +""" + +from __future__ import annotations + +from typing import Final + +# Audio constants +DEFAULT_SAMPLE_RATE: Final[int] = 16000 +"""Default audio sample rate in Hz (16 kHz).""" + +# gRPC constants +DEFAULT_GRPC_PORT: Final[int] = 50051 +"""Default gRPC server port.""" + +MAX_GRPC_MESSAGE_SIZE: Final[int] = 100 * 1024 * 1024 +"""Maximum gRPC message size in bytes (100 MB).""" +```` + +## File: src/noteflow/infrastructure/triggers/app_audio.py +````python +"""App audio activity provider. + +Detects audio activity from system output while whitelisted meeting apps are active. +This is a best-effort heuristic: it combines (a) system output activity and +(b) presence of whitelisted app windows to infer a likely meeting. +""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource +from noteflow.infrastructure.audio.levels import RmsLevelProvider +from noteflow.infrastructure.triggers.audio_activity import ( + AudioActivityProvider, + AudioActivitySettings, +) + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + +logger = logging.getLogger(__name__) + + +@dataclass +class AppAudioSettings: + """Configuration for app audio detection. + + Attributes: + enabled: Whether app audio detection is enabled. + threshold_db: Minimum dB level to consider as activity. + window_seconds: Time window for sustained activity detection. + min_active_ratio: Minimum ratio of active samples in window. + min_samples: Minimum samples required before evaluation. + max_history: Maximum samples retained in history. + weight: Confidence weight contributed by this provider. + meeting_apps: Set of app name substrings to match (lowercase). + suppressed_apps: App substrings to ignore even if matched. + sample_rate: Sample rate for system output capture. + sample_duration_seconds: Duration of each sampling read. + chunk_duration_seconds: Duration of sub-chunks for activity history updates. + """ + + enabled: bool + threshold_db: float + window_seconds: float + min_active_ratio: float + min_samples: int + max_history: int + weight: float + meeting_apps: set[str] = field(default_factory=set) + suppressed_apps: set[str] = field(default_factory=set) + sample_rate: int = 16000 + sample_duration_seconds: float = 0.5 + chunk_duration_seconds: float = 0.1 + + def __post_init__(self) -> None: + self.meeting_apps = {app.lower() for app in self.meeting_apps} + self.suppressed_apps = {app.lower() for app in self.suppressed_apps} + + +class _SystemOutputSampler: + """Best-effort system output sampler using sounddevice.""" + + def __init__(self, sample_rate: int, channels: int = 1) -> None: + self._sample_rate = sample_rate + self._channels = channels + self._stream = None + self._extra_settings = None + self._device = None + self._available: bool | None = None + + def _select_device(self) -> None: + try: + import sounddevice as sd + except ImportError: + return self._extracted_from__select_device_5( + "sounddevice not available - app audio detection disabled" + ) + # Default to output device and WASAPI loopback when available (Windows) + try: + default_output = sd.default.device[1] + except (TypeError, IndexError): + default_output = None + + try: + hostapi_index = sd.default.hostapi + hostapi = sd.query_hostapis(hostapi_index) if hostapi_index is not None else None + except Exception: + hostapi = None + + if hostapi and hostapi.get("type") == "Windows WASAPI" and default_output is not None: + # On WASAPI, loopback devices appear as separate input devices + # Fall through to monitor/loopback device detection below + pass + + # Fallback: look for monitor/loopback devices (Linux/PulseAudio) + try: + devices = sd.query_devices() + except Exception: + return self._extracted_from__select_device_5( + "Failed to query audio devices for app audio detection" + ) + for idx, dev in enumerate(devices): + name = str(dev.get("name", "")).lower() + if int(dev.get("max_input_channels", 0)) <= 0: + continue + if "monitor" in name or "loopback" in name: + return self._extracted_from__select_device_24(idx) + self._available = False + logger.warning("No loopback audio device found - app audio detection disabled") + + # TODO Rename this here and in `_select_device` + def _extracted_from__select_device_24(self, arg0): + self._device = arg0 + self._available = True + return + + # TODO Rename this here and in `_select_device` + def _extracted_from__select_device_5(self, arg0): + self._available = False + logger.warning(arg0) + return + + def _ensure_stream(self) -> bool: + if self._available is False: + return False + + if self._available is None: + self._select_device() + if self._available is False: + return False + + if self._stream is not None: + return True + + try: + import sounddevice as sd + + self._stream = sd.InputStream( + device=self._device, + channels=self._channels, + samplerate=self._sample_rate, + dtype="float32", + extra_settings=self._extra_settings, + ) + self._stream.start() + return True + except Exception as exc: + logger.warning("Failed to start system output capture: %s", exc) + self._stream = None + self._available = False + return False + + def read_frames(self, duration_seconds: float) -> NDArray[np.float32] | None: + if not self._ensure_stream(): + return None + + if self._stream is None: + return None + + frames = max(1, int(self._sample_rate * duration_seconds)) + try: + data, _ = self._stream.read(frames) + except Exception as exc: + logger.debug("System output read failed: %s", exc) + return None + + return data.reshape(-1).astype("float32") + + def close(self) -> None: + if self._stream is None: + return + try: + self._stream.stop() + self._stream.close() + except Exception: + logger.debug("Failed to close system output stream", exc_info=True) + finally: + self._stream = None + + +class AppAudioProvider: + """Detect app audio activity from whitelisted meeting apps.""" + + def __init__(self, settings: AppAudioSettings) -> None: + self._settings = settings + self._sampler = _SystemOutputSampler(sample_rate=settings.sample_rate) + self._level_provider = RmsLevelProvider() + self._audio_activity = AudioActivityProvider( + self._level_provider, + AudioActivitySettings( + enabled=settings.enabled, + threshold_db=settings.threshold_db, + window_seconds=settings.window_seconds, + min_active_ratio=settings.min_active_ratio, + min_samples=settings.min_samples, + max_history=settings.max_history, + weight=settings.weight, + ), + ) + + @property + def source(self) -> TriggerSource: + return TriggerSource.AUDIO_ACTIVITY + + @property + def max_weight(self) -> float: + return self._settings.weight + + def is_enabled(self) -> bool: + return self._settings.enabled + + def get_signal(self) -> TriggerSignal | None: + if not self.is_enabled(): + return None + if not self._settings.meeting_apps: + return None + + app_title = self._detect_meeting_app() + if not app_title: + return None + + frames = self._sampler.read_frames(self._settings.sample_duration_seconds) + if frames is None or frames.size == 0: + return None + + self._update_activity_history(frames) + if self._audio_activity.get_signal() is None: + return None + + return TriggerSignal( + source=self.source, + weight=self.max_weight, + app_name=app_title, + ) + + def _update_activity_history(self, frames: NDArray[np.float32]) -> None: + chunk_size = max(1, int(self._settings.sample_rate * self._settings.chunk_duration_seconds)) + now = time.monotonic() + for offset in range(0, len(frames), chunk_size): + chunk = frames[offset : offset + chunk_size] + if chunk.size == 0: + continue + self._audio_activity.update(chunk, now) + + def _detect_meeting_app(self) -> str | None: + try: + import pywinctl + except ImportError: + return None + + titles: list[str] = [] + try: + if hasattr(pywinctl, "getAllWindows"): + windows = pywinctl.getAllWindows() + titles = [w.title for w in windows if getattr(w, "title", None)] + elif hasattr(pywinctl, "getAllTitles"): + titles = [t for t in pywinctl.getAllTitles() if t] + except Exception as exc: + logger.debug("Failed to list windows for app detection: %s", exc) + return None + + for title in titles: + title_lower = title.lower() + if any(suppressed in title_lower for suppressed in self._settings.suppressed_apps): + continue + if any(app in title_lower for app in self._settings.meeting_apps): + return title + + return None + + def close(self) -> None: + """Release system audio resources.""" + self._sampler.close() +```` + +## File: src/noteflow/infrastructure/triggers/calendar.py +````python +"""Calendar trigger provider. + +Best-effort calendar integration using configured event windows. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import TYPE_CHECKING + +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource + +if TYPE_CHECKING: + from collections.abc import Iterable + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class CalendarEvent: + """Simple calendar event window.""" + + start: datetime + end: datetime + title: str | None = None + + +@dataclass +class CalendarSettings: + """Configuration for calendar trigger detection.""" + + enabled: bool + weight: float + lookahead_minutes: int + lookbehind_minutes: int + events: list[CalendarEvent] + + +class CalendarProvider: + """Provide trigger signal based on calendar proximity.""" + + def __init__(self, settings: CalendarSettings) -> None: + self._settings = settings + + @property + def source(self) -> TriggerSource: + return TriggerSource.CALENDAR + + @property + def max_weight(self) -> float: + return self._settings.weight + + def is_enabled(self) -> bool: + return self._settings.enabled + + def get_signal(self) -> TriggerSignal | None: + if not self.is_enabled(): + return None + + if not self._settings.events: + return None + + now = datetime.now(timezone.utc) + window_start = now - timedelta(minutes=self._settings.lookbehind_minutes) + window_end = now + timedelta(minutes=self._settings.lookahead_minutes) + + return next( + ( + TriggerSignal( + source=self.source, + weight=self.max_weight, + app_name=event.title, + ) + for event in self._settings.events + if self._event_overlaps_window(event, window_start, window_end) + ), + None, + ) + + @staticmethod + def _event_overlaps_window( + event: CalendarEvent, + window_start: datetime, + window_end: datetime, + ) -> bool: + event_start = _ensure_tz(event.start) + event_end = _ensure_tz(event.end) + return event_start <= window_end and event_end >= window_start + + +def parse_calendar_events(raw_events: object) -> list[CalendarEvent]: + """Parse calendar events from config/env payloads.""" + if raw_events is None: + return [] + + if isinstance(raw_events, str): + raw_events = _load_events_from_json(raw_events) + + if isinstance(raw_events, dict): + raw_events = [raw_events] + + if not isinstance(raw_events, Iterable): + return [] + + events: list[CalendarEvent] = [] + for item in raw_events: + if isinstance(item, CalendarEvent): + events.append(item) + continue + if isinstance(item, dict): + start = _parse_datetime(item.get("start")) + end = _parse_datetime(item.get("end")) + if start and end: + events.append(CalendarEvent(start=start, end=end, title=item.get("title"))) + return events + + +def _load_events_from_json(raw: str) -> list[dict[str, object]]: + try: + parsed = json.loads(raw) + except json.JSONDecodeError: + logger.debug("Failed to parse calendar events JSON") + return [] + if isinstance(parsed, list): + return [item for item in parsed if isinstance(item, dict)] + return [parsed] if isinstance(parsed, dict) else [] + + +def _parse_datetime(value: object) -> datetime | None: + if isinstance(value, datetime): + return value + if not isinstance(value, str) or not value: + return None + cleaned = value.strip() + if cleaned.endswith("Z"): + cleaned = f"{cleaned[:-1]}+00:00" + try: + return datetime.fromisoformat(cleaned) + except ValueError: + return None + + +def _ensure_tz(value: datetime) -> datetime: + if value.tzinfo is None: + return value.replace(tzinfo=timezone.utc) + return value.astimezone(timezone.utc) +```` + +## File: tests/grpc/test_diarization_refine.py +````python +"""Tests for RefineSpeakerDiarization RPC guards.""" + +from __future__ import annotations + +import pytest + +from noteflow.grpc.proto import noteflow_pb2 +from noteflow.grpc.service import NoteFlowServicer + + +class _DummyContext: + """Minimal gRPC context that raises if abort is invoked.""" + + async def abort(self, code, details): # type: ignore[override] + raise AssertionError(f"abort called: {code} - {details}") + + +@pytest.mark.asyncio +async def test_refine_speaker_diarization_rejects_active_meeting() -> None: + """Refinement should be blocked while a meeting is still recording.""" + servicer = NoteFlowServicer(diarization_engine=object()) + store = servicer._get_memory_store() + + meeting = store.create("Active meeting") + meeting.start_recording() + store.update(meeting) + + response = await servicer.RefineSpeakerDiarization( + noteflow_pb2.RefineSpeakerDiarizationRequest(meeting_id=str(meeting.id)), + _DummyContext(), + ) + + assert response.segments_updated == 0 + assert response.error_message + assert "stopped" in response.error_message.lower() +```` + +## File: tests/infrastructure/test_diarization.py +````python +"""Tests for speaker diarization infrastructure. + +Tests the SpeakerTurn DTO and speaker assignment utilities. +""" + +from __future__ import annotations + +import pytest + +from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker, assign_speakers_batch + + +class TestSpeakerTurn: + """Tests for the SpeakerTurn dataclass.""" + + def test_create_valid_turn(self) -> None: + """Create a valid speaker turn.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0) + assert turn.speaker == "SPEAKER_00" + assert turn.start == 0.0 + assert turn.end == 5.0 + assert turn.confidence == 1.0 + + def test_create_turn_with_confidence(self) -> None: + """Create a turn with custom confidence.""" + turn = SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0, confidence=0.85) + assert turn.confidence == 0.85 + + def test_invalid_end_before_start_raises(self) -> None: + """End time before start time raises ValueError.""" + with pytest.raises(ValueError, match=r"end.*<.*start"): + SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=5.0) + + def test_invalid_confidence_negative_raises(self) -> None: + """Negative confidence raises ValueError.""" + with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"): + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=-0.1) + + def test_invalid_confidence_above_one_raises(self) -> None: + """Confidence above 1.0 raises ValueError.""" + with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"): + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=1.5) + + def test_duration_property(self) -> None: + """Duration property calculates correctly.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=2.5, end=7.5) + assert turn.duration == 5.0 + + def test_overlaps_returns_true_for_overlap(self) -> None: + """overlaps() returns True when ranges overlap.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlaps(3.0, 7.0) + assert turn.overlaps(7.0, 12.0) + assert turn.overlaps(5.0, 10.0) + assert turn.overlaps(0.0, 15.0) + + def test_overlaps_returns_false_for_no_overlap(self) -> None: + """overlaps() returns False when ranges don't overlap.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert not turn.overlaps(0.0, 5.0) + assert not turn.overlaps(10.0, 15.0) + assert not turn.overlaps(0.0, 3.0) + assert not turn.overlaps(12.0, 20.0) + + def test_overlap_duration_full_overlap(self) -> None: + """overlap_duration() for full overlap returns turn duration.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlap_duration(0.0, 15.0) == 5.0 + + def test_overlap_duration_partial_overlap_left(self) -> None: + """overlap_duration() for partial overlap on left side.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlap_duration(3.0, 7.0) == 2.0 + + def test_overlap_duration_partial_overlap_right(self) -> None: + """overlap_duration() for partial overlap on right side.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlap_duration(8.0, 15.0) == 2.0 + + def test_overlap_duration_contained(self) -> None: + """overlap_duration() when range is contained within turn.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=20.0) + assert turn.overlap_duration(5.0, 10.0) == 5.0 + + def test_overlap_duration_no_overlap(self) -> None: + """overlap_duration() returns 0.0 when no overlap.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlap_duration(0.0, 3.0) == 0.0 + assert turn.overlap_duration(12.0, 20.0) == 0.0 + + +class TestAssignSpeaker: + """Tests for the assign_speaker function.""" + + def test_empty_turns_returns_none(self) -> None: + """Empty turns list returns None with 0 confidence.""" + speaker, confidence = assign_speaker(0.0, 5.0, []) + assert speaker is None + assert confidence == 0.0 + + def test_zero_duration_segment_returns_none(self) -> None: + """Zero duration segment returns None.""" + turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] + speaker, confidence = assign_speaker(5.0, 5.0, turns) + assert speaker is None + assert confidence == 0.0 + + def test_single_turn_full_overlap(self) -> None: + """Single turn with full overlap returns high confidence.""" + turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] + speaker, confidence = assign_speaker(2.0, 8.0, turns) + assert speaker == "SPEAKER_00" + assert confidence == 1.0 + + def test_single_turn_partial_overlap(self) -> None: + """Single turn with partial overlap returns proportional confidence.""" + turns = [SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)] + speaker, confidence = assign_speaker(0.0, 10.0, turns) + assert speaker == "SPEAKER_00" + assert confidence == 0.5 + + def test_multiple_turns_chooses_dominant_speaker(self) -> None: + """Multiple turns chooses speaker with most overlap.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0), + SpeakerTurn(speaker="SPEAKER_01", start=3.0, end=10.0), + ] + speaker, confidence = assign_speaker(0.0, 10.0, turns) + assert speaker == "SPEAKER_01" + assert confidence == 0.7 + + def test_no_overlap_returns_none(self) -> None: + """No overlapping turns returns None.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), + SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0), + ] + speaker, confidence = assign_speaker(6.0, 9.0, turns) + assert speaker is None + assert confidence == 0.0 + + def test_equal_overlap_chooses_first_encountered(self) -> None: + """Equal overlap chooses first speaker encountered.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), + SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0), + ] + speaker, confidence = assign_speaker(3.0, 7.0, turns) + # SPEAKER_00: overlap 2.0, SPEAKER_01: overlap 2.0 + # First one wins since > not >= + assert speaker == "SPEAKER_00" + assert confidence == 0.5 + + +class TestAssignSpeakersBatch: + """Tests for the assign_speakers_batch function.""" + + def test_empty_segments(self) -> None: + """Empty segments list returns empty results.""" + turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] + results = assign_speakers_batch([], turns) + assert results == [] + + def test_empty_turns(self) -> None: + """Empty turns returns all None speakers.""" + segments = [(0.0, 5.0), (5.0, 10.0)] + results = assign_speakers_batch(segments, []) + assert len(results) == 2 + assert all(speaker is None for speaker, _ in results) + assert all(conf == 0.0 for _, conf in results) + + def test_batch_assignment(self) -> None: + """Batch assignment processes all segments.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), + SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0), + SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=15.0), + ] + segments = [(0.0, 5.0), (5.0, 10.0), (10.0, 15.0)] + results = assign_speakers_batch(segments, turns) + assert len(results) == 3 + assert results[0] == ("SPEAKER_00", 1.0) + assert results[1] == ("SPEAKER_01", 1.0) + assert results[2] == ("SPEAKER_00", 1.0) + + def test_batch_with_gaps(self) -> None: + """Batch assignment handles gaps between turns.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0), + SpeakerTurn(speaker="SPEAKER_01", start=7.0, end=10.0), + ] + segments = [(0.0, 3.0), (3.0, 7.0), (7.0, 10.0)] + results = assign_speakers_batch(segments, turns) + assert results[0] == ("SPEAKER_00", 1.0) + assert results[1] == (None, 0.0) + assert results[2] == ("SPEAKER_01", 1.0) +```` + +## File: src/noteflow/application/services/export_service.py +````python +"""Export application service. + +Orchestrates transcript export to various formats. +""" + +from __future__ import annotations + +from enum import Enum +from pathlib import Path +from typing import TYPE_CHECKING + +from noteflow.infrastructure.export import HtmlExporter, MarkdownExporter, TranscriptExporter + +if TYPE_CHECKING: + from noteflow.domain.entities import Meeting, Segment + from noteflow.domain.ports.unit_of_work import UnitOfWork + from noteflow.domain.value_objects import MeetingId + + +class ExportFormat(Enum): + """Supported export formats.""" + + MARKDOWN = "markdown" + HTML = "html" + + +class ExportService: + """Application service for transcript export operations. + + Provides use cases for exporting meeting transcripts to various formats. + """ + + def __init__(self, uow: UnitOfWork) -> None: + """Initialize the export service. + + Args: + uow: Unit of work for persistence. + """ + self._uow = uow + self._exporters: dict[ExportFormat, TranscriptExporter] = { + ExportFormat.MARKDOWN: MarkdownExporter(), + ExportFormat.HTML: HtmlExporter(), + } + + def _get_exporter(self, fmt: ExportFormat) -> TranscriptExporter: + """Get exporter for format. + + Args: + fmt: Export format. + + Returns: + Exporter instance. + + Raises: + ValueError: If format is not supported. + """ + exporter = self._exporters.get(fmt) + if exporter is None: + raise ValueError(f"Unsupported export format: {fmt}") + return exporter + + async def export_transcript( + self, + meeting_id: MeetingId, + fmt: ExportFormat = ExportFormat.MARKDOWN, + ) -> str: + """Export meeting transcript to string. + + Args: + meeting_id: Meeting identifier. + fmt: Export format. + + Returns: + Formatted transcript string. + + Raises: + ValueError: If meeting not found. + """ + async with self._uow: + meeting = await self._uow.meetings.get(meeting_id) + if meeting is None: + raise ValueError(f"Meeting {meeting_id} not found") + + segments = await self._uow.segments.get_by_meeting(meeting_id) + exporter = self._get_exporter(fmt) + return exporter.export(meeting, segments) + + async def export_to_file( + self, + meeting_id: MeetingId, + output_path: Path, + fmt: ExportFormat | None = None, + ) -> Path: + """Export meeting transcript to file. + + Args: + meeting_id: Meeting identifier. + output_path: Output file path (extension determines format if not specified). + fmt: Export format (optional, inferred from extension if not provided). + + Returns: + Path to the exported file. + + Raises: + ValueError: If meeting not found or format cannot be determined. + """ + # Determine format from extension if not provided + if fmt is None: + fmt = self._infer_format_from_extension(output_path.suffix) + + content = await self.export_transcript(meeting_id, fmt) + + # Ensure correct extension + exporter = self._get_exporter(fmt) + if output_path.suffix != exporter.file_extension: + output_path = output_path.with_suffix(exporter.file_extension) + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(content, encoding="utf-8") + return output_path + + def _infer_format_from_extension(self, extension: str) -> ExportFormat: + """Infer export format from file extension. + + Args: + extension: File extension (e.g., '.md', '.html'). + + Returns: + Inferred export format. + + Raises: + ValueError: If extension is not recognized. + """ + extension_map = { + ".md": ExportFormat.MARKDOWN, + ".markdown": ExportFormat.MARKDOWN, + ".html": ExportFormat.HTML, + ".htm": ExportFormat.HTML, + } + fmt = extension_map.get(extension.lower()) + if fmt is None: + raise ValueError( + f"Cannot infer format from extension '{extension}'. " + f"Supported: {', '.join(extension_map.keys())}" + ) + return fmt + + def get_supported_formats(self) -> list[tuple[str, str]]: + """Get list of supported export formats. + + Returns: + List of (format_name, file_extension) tuples. + """ + return [(e.format_name, e.file_extension) for e in self._exporters.values()] + + async def preview_export( + self, + meeting: Meeting, + segments: list[Segment], + fmt: ExportFormat = ExportFormat.MARKDOWN, + ) -> str: + """Preview export without fetching from database. + + Useful for previewing exports with in-memory data. + + Args: + meeting: Meeting entity. + segments: List of segments. + fmt: Export format. + + Returns: + Formatted transcript string. + """ + exporter = self._get_exporter(fmt) + return exporter.export(meeting, segments) +```` + +## File: src/noteflow/application/services/recovery_service.py +````python +"""Recovery service for crash recovery on startup. + +Detect and recover meetings left in active states after server restart. +""" + +from __future__ import annotations + +import logging +from datetime import UTC, datetime +from typing import TYPE_CHECKING, ClassVar + +from noteflow.domain.value_objects import MeetingState + +if TYPE_CHECKING: + from noteflow.domain.entities import Meeting + from noteflow.domain.ports.unit_of_work import UnitOfWork + +logger = logging.getLogger(__name__) + + +class RecoveryService: + """Recover meetings from crash states on server startup. + + Find meetings left in RECORDING or STOPPING state and mark them as ERROR. + This handles the case where the server crashed during an active meeting. + """ + + ACTIVE_STATES: ClassVar[list[MeetingState]] = [ + MeetingState.RECORDING, + MeetingState.STOPPING, + ] + + def __init__(self, uow: UnitOfWork) -> None: + """Initialize recovery service. + + Args: + uow: Unit of work for persistence. + """ + self._uow = uow + + async def recover_crashed_meetings(self) -> list[Meeting]: + """Find and recover meetings left in active states. + + Mark all meetings in RECORDING or STOPPING state as ERROR + with metadata explaining the crash recovery. + + Returns: + List of recovered meetings. + """ + async with self._uow: + # Find all meetings in active states + meetings, total = await self._uow.meetings.list_all( + states=self.ACTIVE_STATES, + limit=1000, # Handle up to 1000 crashed meetings + ) + + if total == 0: + logger.info("No crashed meetings found during recovery") + return [] + + logger.warning( + "Found %d meetings in active state during startup, marking as ERROR", + total, + ) + + recovered: list[Meeting] = [] + recovery_time = datetime.now(UTC).isoformat() + + for meeting in meetings: + previous_state = meeting.state.name + meeting.mark_error() + + # Add crash recovery metadata + meeting.metadata["crash_recovered"] = "true" + meeting.metadata["crash_recovery_time"] = recovery_time + meeting.metadata["crash_previous_state"] = previous_state + + await self._uow.meetings.update(meeting) + recovered.append(meeting) + + logger.info( + "Recovered crashed meeting: id=%s, previous_state=%s", + meeting.id, + previous_state, + ) + + await self._uow.commit() + logger.info("Crash recovery complete: %d meetings recovered", len(recovered)) + return recovered + + async def count_crashed_meetings(self) -> int: + """Count meetings currently in crash states. + + Returns: + Number of meetings in RECORDING or STOPPING state. + """ + async with self._uow: + total = 0 + for state in self.ACTIVE_STATES: + total += await self._uow.meetings.count_by_state(state) + return total +```` + +## File: src/noteflow/application/services/trigger_service.py +````python +"""Trigger evaluation and decision service. + +Orchestrate trigger detection with rate limiting and snooze support. +""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from noteflow.domain.triggers.entities import TriggerAction, TriggerDecision, TriggerSignal + +if TYPE_CHECKING: + from noteflow.domain.triggers.ports import SignalProvider + +logger = logging.getLogger(__name__) + + +@dataclass +class TriggerServiceSettings: + """Configuration for trigger service. + + Attributes: + enabled: Whether trigger detection is enabled. + auto_start_enabled: Whether to auto-start recording at high confidence. + rate_limit_seconds: Minimum seconds between trigger prompts. + snooze_seconds: Default snooze duration. + threshold_ignore: Confidence below which triggers are ignored. + threshold_auto_start: Confidence at or above which auto-start is allowed. + """ + + enabled: bool + auto_start_enabled: bool + rate_limit_seconds: int + snooze_seconds: int + threshold_ignore: float + threshold_auto_start: float + + def __post_init__(self) -> None: + if self.threshold_auto_start < self.threshold_ignore: + msg = "threshold_auto_start must be >= threshold_ignore" + raise ValueError(msg) + + +class TriggerService: + """Orchestrate trigger detection with rate limiting and snooze. + + Evaluates all signal providers and determines the appropriate action + based on combined confidence scores, rate limits, and snooze state. + + Threshold behavior is driven by TriggerServiceSettings: + - Confidence < threshold_ignore: IGNORE + - Confidence >= threshold_auto_start: AUTO_START (if enabled, else NOTIFY) + - Otherwise: NOTIFY + """ + + def __init__( + self, + providers: list[SignalProvider], + settings: TriggerServiceSettings, + ) -> None: + """Initialize trigger service. + + Args: + providers: List of signal providers to evaluate. + settings: Configuration settings for trigger behavior. + """ + self._providers = providers + self._settings = settings + self._last_prompt: float | None = None + self._snoozed_until: float | None = None + + @property + def is_enabled(self) -> bool: + """Check if trigger service is enabled.""" + return self._settings.enabled + + @property + def is_snoozed(self) -> bool: + """Check if triggers are currently snoozed.""" + if self._snoozed_until is None: + return False + return time.monotonic() < self._snoozed_until + + @property + def snooze_remaining_seconds(self) -> float: + """Get remaining snooze time in seconds, or 0 if not snoozed.""" + if self._snoozed_until is None: + return 0.0 + remaining = self._snoozed_until - time.monotonic() + return max(0.0, remaining) + + def evaluate(self) -> TriggerDecision: + """Evaluate all providers and determine action. + + Returns: + TriggerDecision with action and confidence details. + """ + now = time.monotonic() + + # Check if disabled + if not self._settings.enabled: + return self._make_decision(TriggerAction.IGNORE, 0.0, ()) + + # Check if snoozed + if self._snoozed_until is not None and now < self._snoozed_until: + return self._make_decision(TriggerAction.IGNORE, 0.0, ()) + + # Collect signals from all enabled providers + signals = [] + for provider in self._providers: + if not provider.is_enabled(): + continue + if signal := provider.get_signal(): + signals.append(signal) + + # Calculate total confidence + confidence = sum(s.weight for s in signals) + + # Determine action + action = self._determine_action(confidence, now) + + # Record prompt time for rate limiting + if action in (TriggerAction.NOTIFY, TriggerAction.AUTO_START): + self._last_prompt = now + logger.info( + "Trigger %s: confidence=%.2f, signals=%d", + action.value, + confidence, + len(signals), + ) + + return self._make_decision(action, confidence, tuple(signals)) + + def _determine_action(self, confidence: float, now: float) -> TriggerAction: + """Determine action based on confidence and rate limits. + + Args: + confidence: Total confidence from all signals. + now: Current monotonic time. + + Returns: + TriggerAction to take. + """ + # Check threshold_ignore first + if confidence < self._settings.threshold_ignore: + return TriggerAction.IGNORE + + # AUTO_START bypasses rate limit (high-confidence trigger should not be delayed) + if confidence >= self._settings.threshold_auto_start and self._settings.auto_start_enabled: + return TriggerAction.AUTO_START + + # Rate limit applies only to NOTIFY actions + if self._last_prompt is not None: + elapsed = now - self._last_prompt + if elapsed < self._settings.rate_limit_seconds: + return TriggerAction.IGNORE + + return TriggerAction.NOTIFY + + def _make_decision( + self, + action: TriggerAction, + confidence: float, + signals: tuple[TriggerSignal, ...], + ) -> TriggerDecision: + """Create a TriggerDecision with the given parameters.""" + return TriggerDecision( + action=action, + confidence=confidence, + signals=signals, + ) + + def snooze(self, seconds: int | None = None) -> None: + """Snooze triggers for the specified duration. + + Args: + seconds: Snooze duration in seconds (uses default if None). + """ + duration = seconds if seconds is not None else self._settings.snooze_seconds + self._snoozed_until = time.monotonic() + duration + logger.info("Triggers snoozed for %d seconds", duration) + + def clear_snooze(self) -> None: + """Clear any active snooze.""" + if self._snoozed_until is not None: + self._snoozed_until = None + logger.info("Trigger snooze cleared") + + def set_enabled(self, enabled: bool) -> None: + """Enable or disable trigger detection. + + Args: + enabled: Whether triggers should be enabled. + """ + self._settings.enabled = enabled + logger.info("Triggers %s", "enabled" if enabled else "disabled") + + def set_auto_start(self, enabled: bool) -> None: + """Enable or disable auto-start on high confidence. + + Args: + enabled: Whether auto-start should be enabled. + """ + self._settings.auto_start_enabled = enabled + logger.info("Auto-start %s", "enabled" if enabled else "disabled") +```` + +## File: src/noteflow/application/__init__.py +````python +"""NoteFlow application layer. + +Contains application services that orchestrate use cases. +""" +```` + +## File: src/noteflow/cli/__init__.py +````python +"""NoteFlow CLI tools.""" +```` + +## File: src/noteflow/cli/__main__.py +````python +"""Main entry point for NoteFlow CLI.""" + +from noteflow.cli.retention import main + +if __name__ == "__main__": + main() +```` + +## File: src/noteflow/client/components/_thread_mixin.py +````python +"""Mixin for background worker thread lifecycle management. + +Provides standardized thread start/stop patterns for UI components +that need background polling or timer threads. +""" + +from __future__ import annotations + +import threading +from collections.abc import Callable + + +class BackgroundWorkerMixin: + """Mixin providing background worker thread lifecycle management. + + Manages thread creation, start, stop, and cleanup for components + that need background polling loops. + + Usage: + class MyComponent(BackgroundWorkerMixin): + def __init__(self): + self._init_worker() + + def start_polling(self): + self._start_worker(self._poll_loop, "MyPoller") + + def stop_polling(self): + self._stop_worker() + + def _poll_loop(self): + while self._should_run(): + # Do work + self._wait_interval(0.1) + """ + + _worker_thread: threading.Thread | None + _stop_event: threading.Event + + def _init_worker(self) -> None: + """Initialize worker attributes. + + Call this in __init__ of classes using this mixin. + """ + self._worker_thread = None + self._stop_event = threading.Event() + + def _start_worker(self, target: Callable[[], None], name: str) -> None: + """Start background worker thread. + + No-op if worker is already running. + + Args: + target: Callable to run in background thread. + name: Thread name for debugging. + """ + if self._worker_thread and self._worker_thread.is_alive(): + return + + self._stop_event.clear() + self._worker_thread = threading.Thread( + target=target, + daemon=True, + name=name, + ) + self._worker_thread.start() + + def _stop_worker(self, timeout: float = 1.0) -> None: + """Stop background worker thread. + + Signals stop event and waits for thread to finish. + + Args: + timeout: Maximum seconds to wait for thread join. + """ + self._stop_event.set() + if self._worker_thread: + self._worker_thread.join(timeout=timeout) + self._worker_thread = None + + def _should_run(self) -> bool: + """Check if worker loop should continue. + + Returns: + True if worker should continue, False if stop requested. + """ + return not self._stop_event.is_set() + + def _wait_interval(self, seconds: float) -> None: + """Wait for interval, returning early if stop requested. + + Use this instead of time.sleep() in worker loops. + + Args: + seconds: Seconds to wait (returns early if stop signaled). + """ + self._stop_event.wait(seconds) +```` + +## File: src/noteflow/client/components/connection_panel.py +````python +"""Server connection management panel. + +Uses NoteFlowClient directly (not wrapped) and follows same callback pattern. +Does not recreate any types - imports and uses existing ones. +""" + +from __future__ import annotations + +import logging +import threading +from collections.abc import Callable +from typing import TYPE_CHECKING, Final + +import flet as ft + +# REUSE existing types - do not recreate +from noteflow.grpc.client import NoteFlowClient, ServerInfo + +if TYPE_CHECKING: + from noteflow.client.state import AppState + +logger = logging.getLogger(__name__) + +RECONNECT_ATTEMPTS: Final[int] = 3 +RECONNECT_DELAY_SECONDS: Final[float] = 2.0 + + +class ConnectionPanelComponent: + """Server connection management panel. + + Uses NoteFlowClient directly (not wrapped) and follows same callback pattern. + """ + + def __init__( + self, + state: AppState, + on_connected: Callable[[NoteFlowClient, ServerInfo], None] | None = None, + on_disconnected: Callable[[], None] | None = None, + on_transcript_callback: Callable[..., None] | None = None, + on_connection_change_callback: Callable[[bool, str], None] | None = None, + ) -> None: + """Initialize connection panel. + + Args: + state: Centralized application state. + on_connected: Callback when connected with client and server info. + on_disconnected: Callback when disconnected. + on_transcript_callback: Callback to pass to NoteFlowClient for transcripts. + on_connection_change_callback: Callback to pass to NoteFlowClient for connection changes. + """ + self._state = state + self._on_connected = on_connected + self._on_disconnected = on_disconnected + self._on_transcript_callback = on_transcript_callback + self._on_connection_change_callback = on_connection_change_callback + self._client: NoteFlowClient | None = None + self._manual_disconnect = False + self._auto_reconnect_enabled = False + self._reconnect_thread: threading.Thread | None = None + self._reconnect_stop_event = threading.Event() + self._reconnect_lock = threading.Lock() + self._reconnect_in_progress = False + self._suppress_connection_events = False + + self._server_field: ft.TextField | None = None + self._connect_btn: ft.ElevatedButton | None = None + self._status_text: ft.Text | None = None + self._server_info_text: ft.Text | None = None + + @property + def client(self) -> NoteFlowClient | None: + """Get current gRPC client instance.""" + return self._client + + def build(self) -> ft.Column: + """Build connection panel UI. + + Returns: + Column containing connection controls and status. + """ + self._status_text = ft.Text( + "Not connected", + size=14, + color=ft.Colors.GREY_600, + ) + self._server_info_text = ft.Text( + "", + size=12, + color=ft.Colors.GREY_500, + ) + + self._server_field = ft.TextField( + value=self._state.server_address, + label="Server Address", + width=300, + on_change=self._on_server_change, + ) + self._connect_btn = ft.ElevatedButton( + "Connect", + on_click=self._on_connect_click, + icon=ft.Icons.CLOUD_OFF, + ) + + return ft.Column( + [ + self._status_text, + self._server_info_text, + ft.Row([self._server_field, self._connect_btn]), + ], + spacing=10, + ) + + def update_button_state(self) -> None: + """Update connect button state based on connection status.""" + if self._connect_btn: + if self._state.connected: + self._connect_btn.text = "Disconnect" + self._connect_btn.icon = ft.Icons.CLOUD_DONE + else: + self._connect_btn.text = "Connect" + self._connect_btn.icon = ft.Icons.CLOUD_OFF + self._state.request_update() + + def disconnect(self) -> None: + """Disconnect from server.""" + self._manual_disconnect = True + self._auto_reconnect_enabled = False + self._cancel_reconnect() + if self._client: + self._suppress_connection_events = True + try: + self._client.disconnect() + finally: + self._suppress_connection_events = False + self._client = None + + self._state.connected = False + self._state.server_info = None + + self._update_status("Disconnected", ft.Colors.GREY_600) + self.update_button_state() + + # Follow NoteFlowClient callback pattern with error handling + if self._on_disconnected: + try: + self._on_disconnected() + except Exception as e: + logger.error("on_disconnected callback error: %s", e) + + def _on_server_change(self, e: ft.ControlEvent) -> None: + """Handle server address change. + + Args: + e: Control event. + """ + self._state.server_address = str(e.control.value) + + def _on_connect_click(self, e: ft.ControlEvent) -> None: + """Handle connect/disconnect button click. + + Args: + e: Control event. + """ + if self._state.connected: + self.disconnect() + else: + self._manual_disconnect = False + self._cancel_reconnect() + threading.Thread(target=self._connect, daemon=True).start() + + def _connect(self) -> None: + """Connect to server (background thread).""" + self._update_status("Connecting...", ft.Colors.ORANGE) + + try: + if self._client: + self._suppress_connection_events = True + try: + self._client.disconnect() + finally: + self._suppress_connection_events = False + + # Create client with callbacks - use NoteFlowClient directly + self._client = NoteFlowClient( + server_address=self._state.server_address, + on_transcript=self._on_transcript_callback, + on_connection_change=self._handle_connection_change, + ) + + if self._client.connect(timeout=10.0): + if info := self._client.get_server_info(): + self._state.connected = True + self._state.server_info = info + self._state.run_on_ui_thread(lambda: self._on_connect_success(info)) + else: + self._update_status("Failed to get server info", ft.Colors.RED) + if self._client: + self._suppress_connection_events = True + try: + self._client.disconnect() + finally: + self._suppress_connection_events = False + self._client = None + self._state.connected = False + self._state.run_on_ui_thread(self.update_button_state) + else: + self._update_status("Connection failed", ft.Colors.RED) + except Exception as exc: + logger.error("Connection error: %s", exc) + self._update_status(f"Error: {exc}", ft.Colors.RED) + + def _handle_connection_change(self, connected: bool, message: str) -> None: + """Handle connection state change from NoteFlowClient. + + Args: + connected: Connection state. + message: Status message. + """ + if self._suppress_connection_events: + return + + self._state.connected = connected + + if connected: + self._auto_reconnect_enabled = True + self._manual_disconnect = False + self._reconnect_stop_event.set() + self._reconnect_in_progress = False + self._state.run_on_ui_thread( + lambda: self._update_status(f"Connected: {message}", ft.Colors.GREEN) + ) + elif self._manual_disconnect or not self._auto_reconnect_enabled: + self._state.run_on_ui_thread( + lambda: self._update_status(f"Disconnected: {message}", ft.Colors.RED) + ) + elif not self._reconnect_in_progress: + self._start_reconnect_loop(message) + + self._state.run_on_ui_thread(self.update_button_state) + + # Forward to external callback if provided + if (callback := self._on_connection_change_callback) is not None: + try: + self._state.run_on_ui_thread(lambda: callback(connected, message)) + except Exception as e: + logger.error("on_connection_change callback error: %s", e) + + def _on_connect_success(self, info: ServerInfo) -> None: + """Handle successful connection (UI thread). + + Args: + info: Server info from connection. + """ + self._auto_reconnect_enabled = True + self._reconnect_stop_event.set() + self._reconnect_in_progress = False + self.update_button_state() + self._update_status("Connected", ft.Colors.GREEN) + + # Update server info display + if self._server_info_text: + asr_status = "ready" if info.asr_ready else "not ready" + self._server_info_text.value = ( + f"Server v{info.version} | " + f"ASR: {info.asr_model} ({asr_status}) | " + f"Active meetings: {info.active_meetings}" + ) + + self._state.request_update() + + # Follow NoteFlowClient callback pattern with error handling + if self._on_connected and self._client: + try: + self._on_connected(self._client, info) + except Exception as e: + logger.error("on_connected callback error: %s", e) + + def _start_reconnect_loop(self, message: str) -> None: + """Start background reconnect attempts.""" + with self._reconnect_lock: + if self._reconnect_in_progress: + return + + self._reconnect_in_progress = True + self._reconnect_stop_event.clear() + self._reconnect_thread = threading.Thread( + target=self._reconnect_worker, + args=(message,), + daemon=True, + ) + self._reconnect_thread.start() + + def _reconnect_worker(self, message: str) -> None: + """Attempt to reconnect several times before giving up.""" + if not self._client: + self._reconnect_in_progress = False + return + + # Stop streaming here to avoid audio queue growth while reconnecting. + self._client.stop_streaming() + + for attempt in range(1, RECONNECT_ATTEMPTS + 1): + if self._reconnect_stop_event.is_set(): + self._reconnect_in_progress = False + return + + warning = f"Disconnected: {message}. Reconnecting ({attempt}/{RECONNECT_ATTEMPTS})" + if self._state.recording: + warning += " - recording will stop if not reconnected." + self._update_status(warning, ft.Colors.ORANGE) + + if self._attempt_reconnect(): + self._reconnect_in_progress = False + return + + self._reconnect_stop_event.wait(RECONNECT_DELAY_SECONDS) + + self._reconnect_in_progress = False + self._auto_reconnect_enabled = False + if self._state.recording: + final_message = "Reconnection failed. Recording stopped." + else: + final_message = "Reconnection failed." + self._finalize_disconnect(final_message) + + def _attempt_reconnect(self) -> bool: + """Attempt a single reconnect. + + Returns: + True if reconnected successfully. + """ + if not self._client: + return False + + self._suppress_connection_events = True + try: + self._client.disconnect() + finally: + self._suppress_connection_events = False + + if not self._client.connect(timeout=10.0): + return False + + info = self._client.get_server_info() + if not info: + self._suppress_connection_events = True + try: + self._client.disconnect() + finally: + self._suppress_connection_events = False + return False + + self._state.connected = True + self._state.server_info = info + self._state.run_on_ui_thread(lambda: self._on_connect_success(info)) + return True + + def _finalize_disconnect(self, message: str) -> None: + """Finalize disconnect after failed reconnect attempts.""" + self._state.connected = False + self._state.server_info = None + self._update_status(message, ft.Colors.RED) + self._state.run_on_ui_thread(self.update_button_state) + + def handle_disconnect() -> None: + if self._on_disconnected: + try: + self._on_disconnected() + except Exception as e: + logger.error("on_disconnected callback error: %s", e) + + if self._client: + threading.Thread(target=self._disconnect_client, daemon=True).start() + + self._state.run_on_ui_thread(handle_disconnect) + + def _disconnect_client(self) -> None: + """Disconnect client without triggering connection callbacks.""" + if not self._client: + return + + self._suppress_connection_events = True + try: + self._client.disconnect() + finally: + self._suppress_connection_events = False + self._client = None + + def _cancel_reconnect(self) -> None: + """Stop any in-progress reconnect attempt.""" + self._reconnect_stop_event.set() + + def _update_status(self, message: str, color: str) -> None: + """Update status text. + + Args: + message: Status message. + color: Text color. + """ + + def update() -> None: + if self._status_text: + self._status_text.value = message + self._status_text.color = color + self._state.request_update() + + self._state.run_on_ui_thread(update) +```` + +## File: src/noteflow/client/components/meeting_library.py +````python +"""Meeting library component for browsing and exporting meetings. + +Uses MeetingInfo, ExportResult from grpc.client and format_datetime from _formatting. +Does not recreate any types - imports and uses existing ones. +""" + +from __future__ import annotations + +import logging +import threading +import time +from collections.abc import Callable +from datetime import datetime +from typing import TYPE_CHECKING + +import flet as ft + +# REUSE existing formatting - do not recreate +from noteflow.infrastructure.export._formatting import format_datetime + +if TYPE_CHECKING: + from noteflow.client.state import AppState + from noteflow.grpc.client import MeetingInfo, NoteFlowClient + +logger = logging.getLogger(__name__) + + +class MeetingLibraryComponent: + """Meeting library for browsing and exporting meetings. + + Uses NoteFlowClient.list_meetings() and export_transcript() for data. + """ + + DIARIZATION_POLL_INTERVAL_SECONDS: float = 2.0 + + def __init__( + self, + state: AppState, + get_client: Callable[[], NoteFlowClient | None], + on_meeting_selected: Callable[[MeetingInfo], None] | None = None, + ) -> None: + """Initialize meeting library. + + Args: + state: Centralized application state. + get_client: Callable that returns current gRPC client or None. + on_meeting_selected: Callback when a meeting is selected. + """ + self._state = state + self._get_client = get_client + self._on_meeting_selected = on_meeting_selected + + # UI elements + self._search_field: ft.TextField | None = None + self._list_view: ft.ListView | None = None + self._export_btn: ft.ElevatedButton | None = None + self._analyze_btn: ft.ElevatedButton | None = None + self._rename_btn: ft.ElevatedButton | None = None + self._refresh_btn: ft.IconButton | None = None + self._column: ft.Column | None = None + + # Export dialog + self._export_dialog: ft.AlertDialog | None = None + self._format_dropdown: ft.Dropdown | None = None + + # Analyze speakers dialog + self._analyze_dialog: ft.AlertDialog | None = None + self._num_speakers_field: ft.TextField | None = None + + # Rename speakers dialog + self._rename_dialog: ft.AlertDialog | None = None + self._rename_fields: dict[str, ft.TextField] = {} + + def build(self) -> ft.Column: + """Build meeting library UI. + + Returns: + Column containing search, list, and export controls. + """ + self._search_field = ft.TextField( + label="Search meetings", + prefix_icon=ft.Icons.SEARCH, + on_change=self._on_search_change, + expand=True, + ) + self._refresh_btn = ft.IconButton( + icon=ft.Icons.REFRESH, + tooltip="Refresh meetings", + on_click=self._on_refresh_click, + ) + self._export_btn = ft.ElevatedButton( + "Export", + icon=ft.Icons.DOWNLOAD, + on_click=self._show_export_dialog, + disabled=True, + ) + self._analyze_btn = ft.ElevatedButton( + "Refine Speakers", + icon=ft.Icons.RECORD_VOICE_OVER, + on_click=self._show_analyze_dialog, + disabled=True, + ) + self._rename_btn = ft.ElevatedButton( + "Rename Speakers", + icon=ft.Icons.EDIT, + on_click=self._show_rename_dialog, + disabled=True, + ) + + self._list_view = ft.ListView( + spacing=5, + padding=10, + height=200, + ) + + self._column = ft.Column( + [ + ft.Row([self._search_field, self._refresh_btn]), + ft.Container( + content=self._list_view, + border=ft.border.all(1, ft.Colors.GREY_400), + border_radius=8, + ), + ft.Row( + [self._analyze_btn, self._rename_btn, self._export_btn], + alignment=ft.MainAxisAlignment.END, + spacing=10, + ), + ], + spacing=10, + ) + return self._column + + def refresh_meetings(self) -> None: + """Refresh meeting list from server.""" + client = self._get_client() + if not client: + logger.warning("No gRPC client available") + return + + try: + meetings = client.list_meetings(limit=50) + self._state.meetings = meetings + self._state.run_on_ui_thread(self._render_meetings) + except Exception as exc: + logger.error("Error fetching meetings: %s", exc) + + def _on_search_change(self, e: ft.ControlEvent) -> None: + """Handle search field change.""" + self._render_meetings() + + def _on_refresh_click(self, e: ft.ControlEvent) -> None: + """Handle refresh button click.""" + self.refresh_meetings() + + def _render_meetings(self) -> None: + """Render meeting list (UI thread only).""" + if not self._list_view: + return + + self._list_view.controls.clear() + + # Filter by search query + search_query = (self._search_field.value or "").lower() if self._search_field else "" + filtered_meetings = [m for m in self._state.meetings if search_query in m.title.lower()] + + for meeting in filtered_meetings: + self._list_view.controls.append(self._create_meeting_row(meeting)) + + self._state.request_update() + + def _create_meeting_row(self, meeting: MeetingInfo) -> ft.Container: + """Create a row for a meeting. + + Args: + meeting: Meeting info to display. + + Returns: + Container with meeting details. + """ + # Format datetime from timestamp + created_dt = datetime.fromtimestamp(meeting.created_at) if meeting.created_at else None + date_str = format_datetime(created_dt) + + # Format duration + duration = meeting.duration_seconds + duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else "--:--" + + is_selected = self._state.selected_meeting and self._state.selected_meeting.id == meeting.id + + row = ft.Row( + [ + ft.Column( + [ + ft.Text(meeting.title, weight=ft.FontWeight.BOLD, size=14), + ft.Text( + f"{date_str} | {meeting.state} | {meeting.segment_count} segments | {duration_str}", + size=11, + color=ft.Colors.GREY_600, + ), + ], + spacing=2, + expand=True, + ), + ] + ) + + return ft.Container( + content=row, + padding=10, + border_radius=4, + bgcolor=ft.Colors.BLUE_50 if is_selected else None, + on_click=lambda e, m=meeting: self._on_meeting_click(m), + ink=True, + ) + + def _on_meeting_click(self, meeting: MeetingInfo) -> None: + """Handle meeting row click. + + Args: + meeting: Selected meeting. + """ + self._state.selected_meeting = meeting + + # Enable action buttons + if self._export_btn: + self._export_btn.disabled = False + if self._analyze_btn: + self._analyze_btn.disabled = not self._can_refine_speakers(meeting) + if self._rename_btn: + self._rename_btn.disabled = not self._can_refine_speakers(meeting) + + # Re-render to update selection + self._render_meetings() + + # Notify callback + if self._on_meeting_selected: + self._on_meeting_selected(meeting) + + def _show_export_dialog(self, e: ft.ControlEvent) -> None: + """Show export format selection dialog.""" + if not self._state.selected_meeting: + return + + self._format_dropdown = ft.Dropdown( + label="Export Format", + options=[ + ft.dropdown.Option("markdown", "Markdown (.md)"), + ft.dropdown.Option("html", "HTML (.html)"), + ], + value="markdown", + width=200, + ) + + self._export_dialog = ft.AlertDialog( + title=ft.Text("Export Transcript"), + content=ft.Column( + [ + ft.Text(f"Meeting: {self._state.selected_meeting.title}"), + self._format_dropdown, + ], + spacing=10, + tight=True, + ), + actions=[ + ft.TextButton("Cancel", on_click=self._close_export_dialog), + ft.ElevatedButton("Export", on_click=self._do_export), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + + if self._state._page: + self._state._page.dialog = self._export_dialog + self._export_dialog.open = True + self._state.request_update() + + def _close_export_dialog(self, e: ft.ControlEvent | None = None) -> None: + """Close the export dialog.""" + if self._export_dialog: + self._export_dialog.open = False + self._state.request_update() + + def _do_export(self, e: ft.ControlEvent) -> None: + """Perform the export.""" + if not self._state.selected_meeting or not self._format_dropdown: + return + + format_name = self._format_dropdown.value or "markdown" + meeting_id = self._state.selected_meeting.id + + self._close_export_dialog() + + client = self._get_client() + if not client: + logger.warning("No gRPC client available for export") + return + + try: + if result := client.export_transcript(meeting_id, format_name): + self._save_export(result.content, result.file_extension) + else: + logger.error("Export failed - no result returned") + except Exception as exc: + logger.error("Error exporting transcript: %s", exc) + + def _save_export(self, content: str, extension: str) -> None: + """Save exported content to file. + + Args: + content: Export content. + extension: File extension. + """ + if not self._state.selected_meeting: + return + + # Create filename from meeting title + safe_title = "".join( + c if c.isalnum() or c in " -_" else "_" for c in self._state.selected_meeting.title + ) + filename = f"{safe_title}.{extension}" + + # Use FilePicker for save dialog + if self._state._page: + + def on_save(e: ft.FilePickerResultEvent) -> None: + if e.path: + try: + with open(e.path, "w", encoding="utf-8") as f: + f.write(content) + logger.info("Exported to: %s", e.path) + except OSError as exc: + logger.error("Error saving export: %s", exc) + + picker = ft.FilePicker(on_result=on_save) + self._state._page.overlay.append(picker) + self._state._page.update() + picker.save_file( + file_name=filename, + allowed_extensions=[extension], + ) + + # ========================================================================= + # Speaker Refinement Methods + # ========================================================================= + + def _show_analyze_dialog(self, e: ft.ControlEvent) -> None: + """Show speaker refinement dialog.""" + if not self._state.selected_meeting: + return + + if not self._can_refine_speakers(self._state.selected_meeting): + self._show_simple_dialog( + "Meeting still active", + ft.Text("Stop the meeting before refining speakers."), + ) + return + + self._num_speakers_field = ft.TextField( + label="Number of speakers (optional)", + hint_text="Leave empty for auto-detect", + width=200, + keyboard_type=ft.KeyboardType.NUMBER, + ) + + self._analyze_dialog = ft.AlertDialog( + title=ft.Text("Refine Speakers"), + content=ft.Column( + [ + ft.Text(f"Meeting: {self._state.selected_meeting.title}"), + ft.Text( + "Refine speaker labels using offline diarization.", + size=12, + color=ft.Colors.GREY_600, + ), + self._num_speakers_field, + ], + spacing=10, + tight=True, + ), + actions=[ + ft.TextButton("Cancel", on_click=self._close_analyze_dialog), + ft.ElevatedButton("Analyze", on_click=self._do_analyze), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + + if self._state._page: + self._state._page.dialog = self._analyze_dialog + self._analyze_dialog.open = True + self._state.request_update() + + def _close_analyze_dialog(self, e: ft.ControlEvent | None = None) -> None: + """Close the analyze dialog.""" + if self._analyze_dialog: + self._analyze_dialog.open = False + self._state.request_update() + + def _do_analyze(self, e: ft.ControlEvent) -> None: + """Perform speaker analysis.""" + if not self._state.selected_meeting: + return + + # Parse number of speakers (optional) + num_speakers: int | None = None + if self._num_speakers_field and self._num_speakers_field.value: + try: + num_speakers = int(self._num_speakers_field.value) + if num_speakers < 1: + num_speakers = None + except ValueError: + logger.debug("Invalid speaker count input '%s', using auto-detection", self._num_speakers_field.value) + + meeting_id = self._state.selected_meeting.id + self._close_analyze_dialog() + + client = self._get_client() + if not client: + logger.warning("No gRPC client available for analysis") + return + + # Show progress indicator + self._show_analysis_progress("Starting...") + + try: + result = client.refine_speaker_diarization(meeting_id, num_speakers) + except Exception as exc: + logger.error("Error analyzing speakers: %s", exc) + self._show_analysis_error(str(exc)) + return + + if not result: + self._show_analysis_error("Analysis failed - no response from server") + return + + if result.is_terminal: + if result.success: + self._show_analysis_result(result.segments_updated, result.speaker_ids) + else: + self._show_analysis_error(result.error_message or "Analysis failed") + return + + if not result.job_id: + self._show_analysis_error(result.error_message or "Server did not return job ID") + return + + # Job queued/running - poll for completion + self._show_analysis_progress(self._format_job_status(result.status)) + self._start_diarization_poll(result.job_id) + + def _show_analysis_progress(self, status: str = "Refining...") -> None: + """Show refinement in progress indicator.""" + if self._analyze_btn: + self._analyze_btn.disabled = True + self._analyze_btn.text = status + self._state.request_update() + + def _show_analysis_result(self, segments_updated: int, speaker_ids: list[str]) -> None: + """Show refinement success result. + + Args: + segments_updated: Number of segments with speaker labels. + speaker_ids: List of detected speaker IDs. + """ + if self._analyze_btn: + self._analyze_btn.disabled = False + self._analyze_btn.text = "Refine Speakers" + + speaker_list = ", ".join(speaker_ids) if speaker_ids else "None found" + + result_dialog = ft.AlertDialog( + title=ft.Text("Refinement Complete"), + content=ft.Column( + [ + ft.Text(f"Segments updated: {segments_updated}"), + ft.Text(f"Speakers found: {speaker_list}"), + ft.Text( + "Reload the meeting to see speaker labels.", + size=12, + color=ft.Colors.GREY_600, + italic=True, + ), + ], + spacing=5, + tight=True, + ), + actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))], + ) + + if self._state._page: + self._state._page.dialog = result_dialog + result_dialog.open = True + self._state.request_update() + + def _show_analysis_error(self, error_message: str) -> None: + """Show analysis error. + + Args: + error_message: Error description. + """ + if self._analyze_btn: + self._analyze_btn.disabled = False + self._analyze_btn.text = "Refine Speakers" + self._show_simple_dialog("Refinement Failed", ft.Text(error_message)) + + def _close_result_dialog(self, e: ft.ControlEvent) -> None: + """Close any result dialog.""" + if self._state._page and self._state._page.dialog: + self._state._page.dialog.open = False + self._state.request_update() + + def _start_diarization_poll(self, job_id: str) -> None: + """Start polling for diarization job completion.""" + page = self._state._page + if page and hasattr(page, "run_thread"): + page.run_thread(lambda: self._poll_diarization_job(job_id)) + return + + threading.Thread( + target=self._poll_diarization_job, + args=(job_id,), + daemon=True, + name="diarization-poll", + ).start() + + def _poll_diarization_job(self, job_id: str) -> None: + """Poll background diarization job until completion.""" + client = self._get_client() + if not client: + self._state.run_on_ui_thread( + lambda: self._show_analysis_error("No gRPC client available for polling") + ) + return + + while True: + result = client.get_diarization_job_status(job_id) + if not result: + self._state.run_on_ui_thread( + lambda: self._show_analysis_error("Failed to fetch diarization status") + ) + return + + if result.is_terminal: + if result.success: + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_result( + r.segments_updated, + r.speaker_ids, + ) + ) + else: + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_error( + r.error_message or "Diarization failed" + ) + ) + return + + # Update status text while running + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_progress(self._format_job_status(r.status)) + ) + time.sleep(self.DIARIZATION_POLL_INTERVAL_SECONDS) + + @staticmethod + def _format_job_status(status: str) -> str: + """Format job status for button label.""" + return { + "queued": "Queued...", + "running": "Refining...", + }.get(status, "Refining...") + + def _show_simple_dialog(self, title: str, content: ft.Control) -> None: + """Show a simple dialog with title, content, and OK button. + + Args: + title: Dialog title. + content: Dialog content control. + """ + dialog = ft.AlertDialog( + title=ft.Text(title), + content=content, + actions=[ft.TextButton("OK", on_click=self._close_result_dialog)], + ) + if self._state._page: + self._state._page.dialog = dialog + dialog.open = True + self._state.request_update() + + # ========================================================================= + # Speaker Rename Methods + # ========================================================================= + + def _show_rename_dialog(self, e: ft.ControlEvent) -> None: + """Show speaker rename dialog with current speaker IDs.""" + if not self._state.selected_meeting: + return + + if not self._can_refine_speakers(self._state.selected_meeting): + self._show_simple_dialog( + "Meeting still active", + ft.Text("Stop the meeting before renaming speakers."), + ) + return + + client = self._get_client() + if not client: + logger.warning("No gRPC client available") + return + + # Get segments to extract distinct speaker IDs + meeting_id = self._state.selected_meeting.id + segments = client.get_meeting_segments(meeting_id) + + # Extract distinct speaker IDs + speaker_ids = sorted({s.speaker_id for s in segments if s.speaker_id}) + + if not speaker_ids: + self._show_no_speakers_message() + return + + # Create text fields for each speaker + self._rename_fields.clear() + speaker_controls: list[ft.Control] = [] + + for speaker_id in speaker_ids: + field = ft.TextField( + label=f"{speaker_id}", + hint_text="Enter new name", + width=200, + ) + self._rename_fields[speaker_id] = field + speaker_controls.append( + ft.Row( + [ + ft.Text(speaker_id, width=120, size=12), + ft.Icon(ft.Icons.ARROW_RIGHT, size=16), + field, + ], + alignment=ft.MainAxisAlignment.START, + ) + ) + + self._rename_dialog = ft.AlertDialog( + title=ft.Text("Rename Speakers"), + content=ft.Column( + [ + ft.Text(f"Meeting: {self._state.selected_meeting.title}"), + ft.Text( + "Enter new names for speakers (leave blank to keep current):", + size=12, + color=ft.Colors.GREY_600, + ), + ft.Divider(), + *speaker_controls, + ], + spacing=10, + scroll=ft.ScrollMode.AUTO, + height=300, + ), + actions=[ + ft.TextButton("Cancel", on_click=self._close_rename_dialog), + ft.ElevatedButton("Apply", on_click=self._do_rename), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + + if self._state._page: + self._state._page.dialog = self._rename_dialog + self._rename_dialog.open = True + self._state.request_update() + + def _close_rename_dialog(self, e: ft.ControlEvent | None = None) -> None: + """Close the rename dialog.""" + if self._rename_dialog: + self._rename_dialog.open = False + self._state.request_update() + + def _show_no_speakers_message(self) -> None: + """Show message when no speakers found.""" + self._show_simple_dialog( + "No Speakers Found", + ft.Text( + "This meeting has no speaker labels. " + "Run 'Refine Speakers' first to identify speakers." + ), + ) + + def _do_rename(self, e: ft.ControlEvent) -> None: + """Apply speaker renames.""" + if not self._state.selected_meeting: + return + + client = self._get_client() + if not client: + logger.warning("No gRPC client available") + return + + meeting_id = self._state.selected_meeting.id + self._close_rename_dialog() + + # Collect renames (only non-empty values) + renames: list[tuple[str, str]] = [] + for old_id, field in self._rename_fields.items(): + new_name = (field.value or "").strip() + if new_name and new_name != old_id: + renames.append((old_id, new_name)) + + if not renames: + return + + # Apply renames + total_updated = 0 + errors: list[str] = [] + + for old_id, new_name in renames: + try: + result = client.rename_speaker(meeting_id, old_id, new_name) + if result and result.success: + total_updated += result.segments_updated + else: + errors.append(f"{old_id}: rename failed") + except Exception as exc: + logger.error("Error renaming speaker %s: %s", old_id, exc) + errors.append(f"{old_id}: {exc}") + + # Show result + if errors: + self._show_rename_errors(errors) + else: + self._show_rename_success(total_updated, len(renames)) + + def _show_rename_success(self, segments_updated: int, speakers_renamed: int) -> None: + """Show rename success message. + + Args: + segments_updated: Total number of segments updated. + speakers_renamed: Number of speakers renamed. + """ + success_dialog = ft.AlertDialog( + title=ft.Text("Rename Complete"), + content=ft.Column( + [ + ft.Text(f"Renamed {speakers_renamed} speaker(s)"), + ft.Text(f"Updated {segments_updated} segment(s)"), + ft.Text( + "Reload the meeting to see the new speaker names.", + size=12, + color=ft.Colors.GREY_600, + italic=True, + ), + ], + spacing=5, + tight=True, + ), + actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))], + ) + + if self._state._page: + self._state._page.dialog = success_dialog + success_dialog.open = True + self._state.request_update() + + def _show_rename_errors(self, errors: list[str]) -> None: + """Show rename errors. + + Args: + errors: List of error messages. + """ + self._show_simple_dialog("Rename Errors", ft.Text("\n".join(errors))) + + @staticmethod + def _can_refine_speakers(meeting: MeetingInfo) -> bool: + """Return True when meeting is stopped/completed and safe to refine/rename.""" + return meeting.state in {"stopped", "completed", "error"} +```` + +## File: src/noteflow/client/components/playback_sync.py +````python +"""Playback-transcript synchronization controller. + +Polls playback position and updates transcript highlight state. +Follows RecordingTimerComponent pattern for background threading. +""" + +from __future__ import annotations + +import logging +import threading +from collections.abc import Callable +from typing import TYPE_CHECKING, Final + +from noteflow.infrastructure.audio import PlaybackState + +if TYPE_CHECKING: + from noteflow.client.state import AppState + +logger = logging.getLogger(__name__) + +POSITION_POLL_INTERVAL: Final[float] = 0.1 # 100ms for smooth highlighting + + +class PlaybackSyncController: + """Synchronize playback position with transcript highlighting. + + Polls playback position and updates state.highlighted_segment_index. + Triggers UI updates via state.run_on_ui_thread(). + """ + + def __init__( + self, + state: AppState, + on_highlight_change: Callable[[int | None], None] | None = None, + ) -> None: + """Initialize sync controller. + + Args: + state: Centralized application state. + on_highlight_change: Callback when highlighted segment changes. + """ + self._state = state + self._on_highlight_change = on_highlight_change + self._sync_thread: threading.Thread | None = None + self._stop_event = threading.Event() + + def start(self) -> None: + """Start position sync polling.""" + if self._sync_thread and self._sync_thread.is_alive(): + return + + self._stop_event.clear() + self._sync_thread = threading.Thread( + target=self._sync_loop, + daemon=True, + name="PlaybackSyncController", + ) + self._sync_thread.start() + logger.debug("Started playback sync controller") + + def stop(self) -> None: + """Stop position sync polling.""" + self._stop_event.set() + if self._sync_thread: + self._sync_thread.join(timeout=2.0) + self._sync_thread = None + logger.debug("Stopped playback sync controller") + + def _sync_loop(self) -> None: + """Background sync loop - polls position and updates highlight.""" + while not self._stop_event.is_set(): + playback = self._state.playback + + if playback.state == PlaybackState.PLAYING: + position = playback.current_position + self._update_position(position) + elif playback.state == PlaybackState.STOPPED: + # Clear highlight when stopped + if self._state.highlighted_segment_index is not None: + self._state.highlighted_segment_index = None + self._state.run_on_ui_thread(self._notify_highlight_change) + + self._stop_event.wait(POSITION_POLL_INTERVAL) + + def _update_position(self, position: float) -> None: + """Update state with current position and find matching segment.""" + self._state.playback_position = position + + new_index = self._state.find_segment_at_position(position) + old_index = self._state.highlighted_segment_index + + if new_index != old_index: + self._state.highlighted_segment_index = new_index + self._state.run_on_ui_thread(self._notify_highlight_change) + + def _notify_highlight_change(self) -> None: + """Notify UI of highlight change (UI thread only).""" + if self._on_highlight_change: + try: + self._on_highlight_change(self._state.highlighted_segment_index) + except Exception as e: + logger.error("Highlight change callback error: %s", e) + + self._state.request_update() + + def seek_to_segment(self, segment_index: int) -> bool: + """Seek playback to start of specified segment. + + Args: + segment_index: Index into state.transcript_segments. + + Returns: + True if seek was successful. + """ + segments = self._state.transcript_segments + if not (0 <= segment_index < len(segments)): + logger.warning("Invalid segment index: %d", segment_index) + return False + + playback = self._state.playback + segment = segments[segment_index] + + if playback.seek(segment.start_time): + self._state.highlighted_segment_index = segment_index + self._state.playback_position = segment.start_time + self._state.run_on_ui_thread(self._notify_highlight_change) + return True + + return False +```` + +## File: src/noteflow/client/components/vu_meter.py +````python +"""VU meter component for audio level visualization. + +Uses RmsLevelProvider from AppState (not a new instance). +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import flet as ft +import numpy as np +from numpy.typing import NDArray + +if TYPE_CHECKING: + from noteflow.client.state import AppState + + +class VuMeterComponent: + """Audio level visualization component. + + Uses RmsLevelProvider from AppState (not a new instance). + """ + + def __init__(self, state: AppState) -> None: + """Initialize VU meter component. + + Args: + state: Centralized application state with level_provider. + """ + self._state = state + # REUSE level_provider from state - do not create new instance + self._progress_bar: ft.ProgressBar | None = None + self._label: ft.Text | None = None + + def build(self) -> ft.Row: + """Build VU meter UI elements. + + Returns: + Row containing progress bar and level label. + """ + self._progress_bar = ft.ProgressBar( + value=0, + width=300, + bar_height=20, + color=ft.Colors.GREEN, + bgcolor=ft.Colors.GREY_300, + ) + self._label = ft.Text("-60 dB", size=12, width=60) + + return ft.Row( + [ + ft.Text("Level:", size=12), + self._progress_bar, + self._label, + ] + ) + + def on_audio_frames(self, frames: NDArray[np.float32]) -> None: + """Process incoming audio frames for level metering. + + Uses state.level_provider.get_db() - existing RmsLevelProvider method. + + Args: + frames: Audio samples as float32 array. + """ + # REUSE existing RmsLevelProvider from state + db_level = self._state.level_provider.get_db(frames) + self._state.current_db_level = db_level + self._state.run_on_ui_thread(self._update_display) + + def _update_display(self) -> None: + """Update VU meter display (UI thread only).""" + if not self._progress_bar or not self._label: + return + + db = self._state.current_db_level + # Convert dB to 0-1 range (-60 to 0 dB) + normalized = max(0.0, min(1.0, (db + 60) / 60)) + + self._progress_bar.value = normalized + self._progress_bar.color = ( + ft.Colors.RED if db > -6 else ft.Colors.YELLOW if db > -20 else ft.Colors.GREEN + ) + self._label.value = f"{db:.0f} dB" + + self._state.request_update() +```` + +## File: src/noteflow/client/__init__.py +````python +"""NoteFlow client application.""" +```` + +## File: src/noteflow/client/_trigger_mixin.py +````python +"""Trigger detection mixin for NoteFlow client. + +Extracts trigger detection logic from app.py to keep file under 750 lines. +Handles meeting detection triggers via app audio activity and calendar proximity. +""" + +from __future__ import annotations + +import asyncio +import logging +from typing import TYPE_CHECKING, Protocol + +import flet as ft + +from noteflow.application.services import TriggerService, TriggerServiceSettings +from noteflow.config.settings import TriggerSettings, get_trigger_settings +from noteflow.domain.triggers import TriggerAction, TriggerDecision +from noteflow.infrastructure.triggers import ( + AppAudioProvider, + AppAudioSettings, + CalendarProvider, + CalendarSettings, +) +from noteflow.infrastructure.triggers.calendar import parse_calendar_events + +if TYPE_CHECKING: + from noteflow.client.state import AppState + +logger = logging.getLogger(__name__) + + +class TriggerHost(Protocol): + """Protocol for app hosting trigger mixin.""" + + _state: AppState + _trigger_settings: TriggerSettings | None + _trigger_service: TriggerService | None + _app_audio: AppAudioProvider | None + _calendar_provider: CalendarProvider | None + _trigger_poll_interval: float + _trigger_task: asyncio.Task | None + + def _start_recording(self) -> None: + """Start recording audio.""" + ... + + def _ensure_audio_capture(self) -> bool: + """Ensure audio capture is running.""" + ... + + +class TriggerMixin: + """Mixin providing trigger detection functionality. + + Requires host to implement TriggerHost protocol. + """ + + def _initialize_triggers(self: TriggerHost) -> None: + """Initialize trigger settings, providers, and service.""" + self._trigger_settings = get_trigger_settings() + self._state.trigger_enabled = self._trigger_settings.trigger_enabled + self._trigger_poll_interval = self._trigger_settings.trigger_poll_interval_seconds + meeting_apps = {app.lower() for app in self._trigger_settings.trigger_meeting_apps} + suppressed_apps = {app.lower() for app in self._trigger_settings.trigger_suppressed_apps} + + app_audio_settings = AppAudioSettings( + enabled=self._trigger_settings.trigger_audio_enabled, + threshold_db=self._trigger_settings.trigger_audio_threshold_db, + window_seconds=self._trigger_settings.trigger_audio_window_seconds, + min_active_ratio=self._trigger_settings.trigger_audio_min_active_ratio, + min_samples=self._trigger_settings.trigger_audio_min_samples, + max_history=self._trigger_settings.trigger_audio_max_history, + weight=self._trigger_settings.trigger_weight_audio, + meeting_apps=meeting_apps, + suppressed_apps=suppressed_apps, + ) + calendar_settings = CalendarSettings( + enabled=self._trigger_settings.trigger_calendar_enabled, + weight=self._trigger_settings.trigger_weight_calendar, + lookahead_minutes=self._trigger_settings.trigger_calendar_lookahead_minutes, + lookbehind_minutes=self._trigger_settings.trigger_calendar_lookbehind_minutes, + events=parse_calendar_events(self._trigger_settings.trigger_calendar_events), + ) + + self._app_audio = AppAudioProvider(app_audio_settings) + self._calendar_provider = CalendarProvider(calendar_settings) + self._trigger_service = TriggerService( + providers=[self._app_audio, self._calendar_provider], + settings=TriggerServiceSettings( + enabled=self._trigger_settings.trigger_enabled, + auto_start_enabled=self._trigger_settings.trigger_auto_start, + rate_limit_seconds=self._trigger_settings.trigger_rate_limit_minutes * 60, + snooze_seconds=self._trigger_settings.trigger_snooze_minutes * 60, + threshold_ignore=self._trigger_settings.trigger_confidence_ignore, + threshold_auto_start=self._trigger_settings.trigger_confidence_auto, + ), + ) + + def _should_keep_capture_running(self: TriggerHost) -> bool: + """Return True if background audio capture should remain active.""" + return False + + async def _trigger_check_loop(self: TriggerHost) -> None: + """Background loop to check trigger conditions. + + Runs every poll interval while not recording. + """ + check_interval = self._trigger_poll_interval + try: + while True: + await asyncio.sleep(check_interval) + + # Skip if recording or trigger pending + if self._state.recording or self._state.trigger_pending: + continue + + # Skip if triggers disabled + if not self._state.trigger_enabled or not self._trigger_service: + continue + + # Evaluate triggers + decision = self._trigger_service.evaluate() + self._state.trigger_decision = decision + + if decision.action == TriggerAction.IGNORE: + continue + + if decision.action == TriggerAction.AUTO_START: + # Auto-start if connected + if self._state.connected: + logger.info( + "Auto-starting recording (confidence=%.2f)", decision.confidence + ) + self._start_recording() + elif decision.action == TriggerAction.NOTIFY: + # Show prompt to user + self._show_trigger_prompt(decision) + except asyncio.CancelledError: + logger.debug("Trigger loop cancelled") + raise + + def _show_trigger_prompt(self: TriggerHost, decision: TriggerDecision) -> None: + """Show trigger notification prompt to user. + + Args: + decision: Trigger decision with confidence and signals. + """ + self._state.trigger_pending = True + + # Build signal description + signal_desc = ", ".join(s.app_name or s.source.value for s in decision.signals) + + def handle_start(_: ft.ControlEvent) -> None: + self._state.trigger_pending = False + if dialog.open: + dialog.open = False + self._state.request_update() + if self._state.connected: + self._start_recording() + + def handle_snooze(_: ft.ControlEvent) -> None: + self._state.trigger_pending = False + if self._trigger_service: + self._trigger_service.snooze() + if dialog.open: + dialog.open = False + self._state.request_update() + + def handle_dismiss(_: ft.ControlEvent) -> None: + self._state.trigger_pending = False + if dialog.open: + dialog.open = False + self._state.request_update() + + dialog = ft.AlertDialog( + title=ft.Text("Meeting Detected"), + content=ft.Text( + "Detected: " + f"{signal_desc}\n" + f"Confidence: {decision.confidence:.0%}\n\n" + "Start recording?" + ), + actions=[ + ft.TextButton("Start", on_click=handle_start), + ft.TextButton("Snooze", on_click=handle_snooze), + ft.TextButton("Dismiss", on_click=handle_dismiss), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + + if self._state._page: + self._state._page.dialog = dialog + dialog.open = True + self._state.request_update() +```` + +## File: src/noteflow/core/__init__.py +````python +"""Core types and protocols for NoteFlow.""" +```` + +## File: src/noteflow/domain/entities/__init__.py +````python +"""Domain entities for NoteFlow.""" + +from .annotation import Annotation +from .meeting import Meeting +from .segment import Segment, WordTiming +from .summary import ActionItem, KeyPoint, Summary + +__all__ = [ + "ActionItem", + "Annotation", + "KeyPoint", + "Meeting", + "Segment", + "Summary", + "WordTiming", +] +```` + +## File: src/noteflow/domain/entities/annotation.py +````python +"""Annotation entity for user-created annotations during recording. + +Distinct from LLM-extracted ActionItem/KeyPoint in summaries. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId + + +@dataclass +class Annotation: + """User-created annotation during recording. + + Evidence-linked to specific transcript segments for navigation. + Unlike ActionItem/KeyPoint (LLM-extracted from Summary), annotations + are created in real-time during recording and belong directly to Meeting. + """ + + id: AnnotationId + meeting_id: MeetingId + annotation_type: AnnotationType + text: str + start_time: float + end_time: float + segment_ids: list[int] = field(default_factory=list) + created_at: datetime = field(default_factory=datetime.now) + + # Database primary key (set after persistence) + db_id: int | None = None + + def __post_init__(self) -> None: + """Validate annotation data.""" + if self.end_time < self.start_time: + raise ValueError( + f"end_time ({self.end_time}) must be >= start_time ({self.start_time})" + ) + + @property + def duration(self) -> float: + """Annotation duration in seconds.""" + return self.end_time - self.start_time + + def has_segments(self) -> bool: + """Check if annotation is linked to transcript segments.""" + return len(self.segment_ids) > 0 +```` + +## File: src/noteflow/domain/entities/meeting.py +````python +"""Meeting aggregate root entity.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import TYPE_CHECKING +from uuid import UUID, uuid4 + +from noteflow.domain.value_objects import MeetingId, MeetingState + +if TYPE_CHECKING: + from noteflow.domain.entities.segment import Segment + from noteflow.domain.entities.summary import Summary + + +@dataclass +class Meeting: + """Meeting aggregate root. + + The central entity representing a recorded meeting with its + transcript segments and optional summary. + """ + + id: MeetingId + title: str + state: MeetingState = MeetingState.CREATED + created_at: datetime = field(default_factory=datetime.now) + started_at: datetime | None = None + ended_at: datetime | None = None + segments: list[Segment] = field(default_factory=list) + summary: Summary | None = None + metadata: dict[str, str] = field(default_factory=dict) + wrapped_dek: bytes | None = None # Encrypted data encryption key + + @classmethod + def create( + cls, + title: str = "", + metadata: dict[str, str] | None = None, + ) -> Meeting: + """Factory method to create a new meeting. + + Args: + title: Optional meeting title. + metadata: Optional metadata dictionary. + + Returns: + New Meeting instance. + """ + meeting_id = MeetingId(uuid4()) + now = datetime.now() + + if not title: + title = f"Meeting {now.strftime('%Y-%m-%d %H:%M')}" + + return cls( + id=meeting_id, + title=title, + state=MeetingState.CREATED, + created_at=now, + metadata=metadata or {}, + ) + + @classmethod + def from_uuid_str( + cls, + uuid_str: str, + title: str = "", + state: MeetingState = MeetingState.CREATED, + created_at: datetime | None = None, + started_at: datetime | None = None, + ended_at: datetime | None = None, + metadata: dict[str, str] | None = None, + wrapped_dek: bytes | None = None, + ) -> Meeting: + """Create meeting with existing UUID string. + + Args: + uuid_str: UUID string for meeting ID. + title: Meeting title. + state: Meeting state. + created_at: Creation timestamp. + started_at: Start timestamp. + ended_at: End timestamp. + metadata: Meeting metadata. + wrapped_dek: Encrypted data encryption key. + + Returns: + Meeting instance with specified ID. + """ + meeting_id = MeetingId(UUID(uuid_str)) + return cls( + id=meeting_id, + title=title, + state=state, + created_at=created_at or datetime.now(), + started_at=started_at, + ended_at=ended_at, + metadata=metadata or {}, + wrapped_dek=wrapped_dek, + ) + + def start_recording(self) -> None: + """Transition to recording state. + + Raises: + ValueError: If transition is not valid. + """ + if not self.state.can_transition_to(MeetingState.RECORDING): + raise ValueError(f"Cannot start recording from state {self.state.name}") + self.state = MeetingState.RECORDING + self.started_at = datetime.now() + + def begin_stopping(self) -> None: + """Transition to stopping state for graceful shutdown. + + This intermediate state allows audio writers and other resources + to flush and close properly before the meeting is fully stopped. + + Raises: + ValueError: If transition is not valid. + """ + if not self.state.can_transition_to(MeetingState.STOPPING): + raise ValueError(f"Cannot begin stopping from state {self.state.name}") + self.state = MeetingState.STOPPING + + def stop_recording(self) -> None: + """Transition to stopped state (from STOPPING). + + Raises: + ValueError: If transition is not valid. + """ + if not self.state.can_transition_to(MeetingState.STOPPED): + raise ValueError(f"Cannot stop recording from state {self.state.name}") + self.state = MeetingState.STOPPED + if self.ended_at is None: + self.ended_at = datetime.now() + + def complete(self) -> None: + """Transition to completed state. + + Raises: + ValueError: If transition is not valid. + """ + if not self.state.can_transition_to(MeetingState.COMPLETED): + raise ValueError(f"Cannot complete from state {self.state.name}") + self.state = MeetingState.COMPLETED + + def mark_error(self) -> None: + """Transition to error state.""" + self.state = MeetingState.ERROR + + def add_segment(self, segment: Segment) -> None: + """Add a transcript segment. + + Args: + segment: Segment to add. + """ + self.segments.append(segment) + + def set_summary(self, summary: Summary) -> None: + """Set the meeting summary. + + Args: + summary: Summary to set. + """ + self.summary = summary + + @property + def duration_seconds(self) -> float: + """Calculate meeting duration in seconds.""" + if self.ended_at and self.started_at: + return (self.ended_at - self.started_at).total_seconds() + if self.started_at: + return (datetime.now() - self.started_at).total_seconds() + return 0.0 + + @property + def next_segment_id(self) -> int: + """Get the next available segment ID.""" + return max(s.segment_id for s in self.segments) + 1 if self.segments else 0 + + @property + def segment_count(self) -> int: + """Number of transcript segments.""" + return len(self.segments) + + @property + def full_transcript(self) -> str: + """Concatenate all segment text.""" + return " ".join(s.text for s in self.segments) + + def is_active(self) -> bool: + """Check if meeting is in an active state (created or recording). + + Note: STOPPING is not considered active as it's transitioning to stopped. + """ + return self.state in (MeetingState.CREATED, MeetingState.RECORDING) + + def has_summary(self) -> bool: + """Check if meeting has a summary.""" + return self.summary is not None +```` + +## File: src/noteflow/domain/entities/summary.py +````python +"""Summary-related entities for meeting summaries.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from noteflow.domain.value_objects import MeetingId + + +@dataclass +class KeyPoint: + """A key point extracted from the meeting. + + Evidence-linked to specific transcript segments for verification. + """ + + text: str + segment_ids: list[int] = field(default_factory=list) + start_time: float = 0.0 + end_time: float = 0.0 + + # Database primary key (set after persistence) + db_id: int | None = None + + def has_evidence(self) -> bool: + """Check if key point is backed by transcript evidence.""" + return len(self.segment_ids) > 0 + + +@dataclass +class ActionItem: + """An action item extracted from the meeting. + + Evidence-linked to specific transcript segments for verification. + """ + + text: str + assignee: str = "" + due_date: datetime | None = None + priority: int = 0 # 0=unspecified, 1=low, 2=medium, 3=high + segment_ids: list[int] = field(default_factory=list) + + # Database primary key (set after persistence) + db_id: int | None = None + + def has_evidence(self) -> bool: + """Check if action item is backed by transcript evidence.""" + return len(self.segment_ids) > 0 + + def is_assigned(self) -> bool: + """Check if action item has an assignee.""" + return bool(self.assignee) + + def has_due_date(self) -> bool: + """Check if action item has a due date.""" + return self.due_date is not None + + +@dataclass +class Summary: + """Meeting summary entity. + + Contains executive summary, key points, and action items, + all evidence-linked to transcript segments. + """ + + meeting_id: MeetingId + executive_summary: str = "" + key_points: list[KeyPoint] = field(default_factory=list) + action_items: list[ActionItem] = field(default_factory=list) + generated_at: datetime | None = None + model_version: str = "" + + # Database primary key (set after persistence) + db_id: int | None = None + + def all_points_have_evidence(self) -> bool: + """Check if all key points have transcript evidence.""" + return all(kp.has_evidence() for kp in self.key_points) + + def all_actions_have_evidence(self) -> bool: + """Check if all action items have transcript evidence.""" + return all(ai.has_evidence() for ai in self.action_items) + + def is_fully_evidenced(self) -> bool: + """Check if entire summary is backed by transcript evidence.""" + return self.all_points_have_evidence() and self.all_actions_have_evidence() + + @property + def key_point_count(self) -> int: + """Number of key points.""" + return len(self.key_points) + + @property + def action_item_count(self) -> int: + """Number of action items.""" + return len(self.action_items) + + @property + def unevidenced_points(self) -> list[KeyPoint]: + """Key points without transcript evidence.""" + return [kp for kp in self.key_points if not kp.has_evidence()] + + @property + def unevidenced_actions(self) -> list[ActionItem]: + """Action items without transcript evidence.""" + return [ai for ai in self.action_items if not ai.has_evidence()] +```` + +## File: src/noteflow/domain/ports/__init__.py +````python +"""Domain ports (interfaces) for NoteFlow.""" + +from .repositories import ( + AnnotationRepository, + MeetingRepository, + SegmentRepository, + SummaryRepository, +) +from .unit_of_work import UnitOfWork + +__all__ = [ + "AnnotationRepository", + "MeetingRepository", + "SegmentRepository", + "SummaryRepository", + "UnitOfWork", +] +```` + +## File: src/noteflow/domain/ports/unit_of_work.py +````python +"""Unit of Work protocol for transaction management.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, Self + +if TYPE_CHECKING: + from .repositories import ( + AnnotationRepository, + MeetingRepository, + SegmentRepository, + SummaryRepository, + ) + + +class UnitOfWork(Protocol): + """Unit of Work protocol for managing transactions across repositories. + + Provides transactional consistency when operating on multiple + aggregates. Use as a context manager for automatic commit/rollback. + + Example: + async with uow: + meeting = await uow.meetings.get(meeting_id) + await uow.segments.add(meeting_id, segment) + await uow.commit() + """ + + annotations: AnnotationRepository + meetings: MeetingRepository + segments: SegmentRepository + summaries: SummaryRepository + + async def __aenter__(self) -> Self: + """Enter the unit of work context. + + Returns: + Self for use in async with statement. + """ + ... + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: object, + ) -> None: + """Exit the unit of work context. + + Rolls back on exception, otherwise commits. + + Args: + exc_type: Exception type if raised. + exc_val: Exception value if raised. + exc_tb: Exception traceback if raised. + """ + ... + + async def commit(self) -> None: + """Commit the current transaction. + + Persists all changes made within the unit of work. + """ + ... + + async def rollback(self) -> None: + """Rollback the current transaction. + + Discards all changes made within the unit of work. + """ + ... +```` + +## File: src/noteflow/domain/summarization/__init__.py +````python +"""Summarization domain module. + +Provides protocols and data transfer objects for meeting summarization. +""" + +from noteflow.domain.summarization.ports import ( + CitationVerificationResult, + CitationVerifier, + InvalidResponseError, + ProviderUnavailableError, + SummarizationError, + SummarizationRequest, + SummarizationResult, + SummarizationTimeoutError, + SummarizerProvider, +) + +__all__ = [ + "CitationVerificationResult", + "CitationVerifier", + "InvalidResponseError", + "ProviderUnavailableError", + "SummarizationError", + "SummarizationRequest", + "SummarizationResult", + "SummarizationTimeoutError", + "SummarizerProvider", +] +```` + +## File: src/noteflow/domain/summarization/ports.py +````python +"""Summarization provider port protocols.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from collections.abc import Sequence + + from noteflow.domain.entities import Segment, Summary + from noteflow.domain.value_objects import MeetingId + + +@dataclass(frozen=True) +class SummarizationRequest: + """Request for meeting summarization. + + Contains the meeting context needed for summary generation. + """ + + meeting_id: MeetingId + segments: Sequence[Segment] + max_key_points: int = 5 + max_action_items: int = 10 + + @property + def transcript_text(self) -> str: + """Concatenate all segment text into a single transcript.""" + return " ".join(seg.text for seg in self.segments) + + @property + def segment_count(self) -> int: + """Number of segments in the request.""" + return len(self.segments) + + @property + def total_duration(self) -> float: + """Total duration of all segments in seconds.""" + if not self.segments: + return 0.0 + return self.segments[-1].end_time - self.segments[0].start_time + + +@dataclass(frozen=True) +class SummarizationResult: + """Result from summarization provider. + + Contains the generated summary along with metadata. + """ + + summary: Summary + model_name: str + provider_name: str + tokens_used: int | None = None + latency_ms: float = 0.0 + + @property + def is_success(self) -> bool: + """Check if summarization succeeded with content.""" + return bool(self.summary.executive_summary) + + +@dataclass(frozen=True) +class CitationVerificationResult: + """Result of citation verification. + + Identifies which citations are valid and which are invalid. + """ + + is_valid: bool + invalid_key_point_indices: tuple[int, ...] = field(default_factory=tuple) + invalid_action_item_indices: tuple[int, ...] = field(default_factory=tuple) + missing_segment_ids: tuple[int, ...] = field(default_factory=tuple) + + @property + def invalid_count(self) -> int: + """Total number of invalid citations.""" + return len(self.invalid_key_point_indices) + len(self.invalid_action_item_indices) + + +class SummarizerProvider(Protocol): + """Protocol for LLM summarization providers. + + Implementations must provide async summarization with evidence linking. + """ + + @property + def provider_name(self) -> str: + """Provider identifier (e.g., 'mock', 'ollama', 'openai').""" + ... + + @property + def is_available(self) -> bool: + """Check if provider is configured and available.""" + ... + + @property + def requires_cloud_consent(self) -> bool: + """Return True if data is sent to external services. + + Cloud providers must return True to ensure explicit user consent. + """ + ... + + async def summarize(self, request: SummarizationRequest) -> SummarizationResult: + """Generate evidence-linked summary from transcript segments. + + Args: + request: Summarization request with segments and constraints. + + Returns: + SummarizationResult with generated summary and metadata. + + Raises: + SummarizationError: If summarization fails. + """ + ... + + +class CitationVerifier(Protocol): + """Protocol for verifying evidence citations. + + Validates that segment_ids in summaries reference actual segments. + """ + + def verify_citations( + self, + summary: Summary, + segments: Sequence[Segment], + ) -> CitationVerificationResult: + """Verify all segment_ids exist in the transcript. + + Args: + summary: Summary with key points and action items to verify. + segments: Available transcript segments. + + Returns: + CitationVerificationResult with validation status and details. + """ + ... + + +class SummarizationError(Exception): + """Base exception for summarization errors.""" + + pass + + +class ProviderUnavailableError(SummarizationError): + """Provider is not available or not configured.""" + + pass + + +class SummarizationTimeoutError(SummarizationError): + """Summarization operation timed out.""" + + pass + + +class InvalidResponseError(SummarizationError): + """Provider returned an invalid or unparseable response.""" + + pass +```` + +## File: src/noteflow/domain/triggers/__init__.py +````python +"""Trigger domain package.""" + +from noteflow.domain.triggers.entities import ( + TriggerAction, + TriggerDecision, + TriggerSignal, + TriggerSource, +) +from noteflow.domain.triggers.ports import SignalProvider + +__all__ = [ + "SignalProvider", + "TriggerAction", + "TriggerDecision", + "TriggerSignal", + "TriggerSource", +] +```` + +## File: src/noteflow/domain/triggers/ports.py +````python +"""Trigger signal provider port protocol. + +Define the interface for signal providers that detect meeting conditions. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource + + +class SignalProvider(Protocol): + """Protocol for trigger signal providers. + + Signal providers detect specific conditions (audio activity, foreground app, etc.) + and return weighted signals used in trigger evaluation. + + Each provider: + - Has a specific source type + - Has a maximum weight contribution + - Can be enabled/disabled + - Returns a signal when conditions are met, None otherwise + """ + + @property + def source(self) -> TriggerSource: + """Get the source type for this provider.""" + ... + + @property + def max_weight(self) -> float: + """Get the maximum weight this provider can contribute.""" + ... + + def get_signal(self) -> TriggerSignal | None: + """Get current signal if conditions are met. + + Returns: + TriggerSignal if provider conditions are satisfied, None otherwise. + """ + ... + + def is_enabled(self) -> bool: + """Check if this provider is enabled. + + Returns: + True if provider is enabled and can produce signals. + """ + ... +```` + +## File: src/noteflow/domain/__init__.py +````python +"""NoteFlow domain layer.""" + +from .value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState + +__all__ = ["AnnotationId", "AnnotationType", "MeetingId", "MeetingState"] +```` + +## File: src/noteflow/grpc/proto/__init__.py +````python +"""Generated protobuf and gRPC code.""" +```` + +## File: src/noteflow/grpc/__init__.py +````python +"""NoteFlow gRPC server and client components.""" + +from noteflow.domain.value_objects import MeetingState + +from .client import ( + AnnotationInfo, + DiarizationResult, + ExportResult, + MeetingInfo, + NoteFlowClient, + RenameSpeakerResult, + ServerInfo, + TranscriptSegment, +) +from .meeting_store import MeetingStore +from .service import NoteFlowServicer + +__all__ = [ + "AnnotationInfo", + "DiarizationResult", + "ExportResult", + "MeetingInfo", + "MeetingState", + "MeetingStore", + "NoteFlowClient", + "NoteFlowServicer", + "RenameSpeakerResult", + "ServerInfo", + "TranscriptSegment", +] +```` + +## File: src/noteflow/infrastructure/asr/__init__.py +````python +"""ASR infrastructure module. + +Provides speech-to-text transcription using faster-whisper. +""" + +from noteflow.infrastructure.asr.dto import ( + AsrResult, + PartialUpdate, + VadEvent, + VadEventType, + WordTiming, +) +from noteflow.infrastructure.asr.engine import FasterWhisperEngine +from noteflow.infrastructure.asr.protocols import AsrEngine +from noteflow.infrastructure.asr.segmenter import ( + AudioSegment, + Segmenter, + SegmenterConfig, + SegmenterState, +) +from noteflow.infrastructure.asr.streaming_vad import ( + EnergyVad, + EnergyVadConfig, + StreamingVad, + VadEngine, +) + +__all__ = [ + "AsrEngine", + "AsrResult", + "AudioSegment", + "EnergyVad", + "EnergyVadConfig", + "FasterWhisperEngine", + "PartialUpdate", + "Segmenter", + "SegmenterConfig", + "SegmenterState", + "StreamingVad", + "VadEngine", + "VadEvent", + "VadEventType", + "WordTiming", +] +```` + +## File: src/noteflow/infrastructure/asr/dto.py +````python +"""Data Transfer Objects for ASR. + +These DTOs define the data structures used by ASR components. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum + + +@dataclass(frozen=True) +class WordTiming: + """Word-level timing information.""" + + word: str + start: float # Start time in seconds + end: float # End time in seconds + probability: float # Confidence (0.0-1.0) + + def __post_init__(self) -> None: + """Validate timing data.""" + if self.end < self.start: + raise ValueError(f"Word end ({self.end}) < start ({self.start})") + if not 0.0 <= self.probability <= 1.0: + raise ValueError(f"Probability must be 0.0-1.0, got {self.probability}") + + +@dataclass(frozen=True) +class AsrResult: + """ASR transcription result for a segment.""" + + text: str + start: float # Start time in seconds + end: float # End time in seconds + words: tuple[WordTiming, ...] = field(default_factory=tuple) + language: str = "en" + language_probability: float = 1.0 + avg_logprob: float = 0.0 + no_speech_prob: float = 0.0 + + def __post_init__(self) -> None: + """Validate result data.""" + if self.end < self.start: + raise ValueError(f"Segment end ({self.end}) < start ({self.start})") + + @property + def duration(self) -> float: + """Duration of the segment in seconds.""" + return self.end - self.start + + +@dataclass +class PartialUpdate: + """Unstable partial transcript (may be replaced).""" + + text: str + start: float + end: float + + def __post_init__(self) -> None: + """Validate partial data.""" + if self.end < self.start: + raise ValueError(f"Partial end ({self.end}) < start ({self.start})") + + +class VadEventType(Enum): + """Voice Activity Detection event types.""" + + SPEECH_START = "speech_start" + SPEECH_END = "speech_end" + + +@dataclass(frozen=True) +class VadEvent: + """Voice Activity Detection event. + + Represents a speech/silence transition detected by VAD. + """ + + event_type: VadEventType + timestamp: float # Seconds from stream start + confidence: float = 1.0 # Detection confidence (0.0-1.0) + + def __post_init__(self) -> None: + """Validate event data.""" + if self.timestamp < 0: + raise ValueError(f"Timestamp must be non-negative, got {self.timestamp}") + if not 0.0 <= self.confidence <= 1.0: + raise ValueError(f"Confidence must be 0.0-1.0, got {self.confidence}") +```` + +## File: src/noteflow/infrastructure/asr/engine.py +````python +"""ASR engine implementation using faster-whisper. + +Provides Whisper-based transcription with word-level timestamps. +""" + +from __future__ import annotations + +import asyncio +import logging +from collections.abc import Iterator +from functools import partial +from typing import TYPE_CHECKING, Final + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + +from noteflow.infrastructure.asr.dto import AsrResult, WordTiming + +logger = logging.getLogger(__name__) + +# Available model sizes +VALID_MODEL_SIZES: Final[tuple[str, ...]] = ( + "tiny", + "tiny.en", + "base", + "base.en", + "small", + "small.en", + "medium", + "medium.en", + "large-v1", + "large-v2", + "large-v3", +) + + +class FasterWhisperEngine: + """faster-whisper based ASR engine. + + Uses CTranslate2 for efficient Whisper inference on CPU or GPU. + """ + + def __init__( + self, + compute_type: str = "int8", + device: str = "cpu", + num_workers: int = 1, + ) -> None: + """Initialize the engine. + + Args: + compute_type: Computation type ("int8", "float16", "float32"). + device: Device to use ("cpu" or "cuda"). + num_workers: Number of worker threads. + """ + self._compute_type = compute_type + self._device = device + self._num_workers = num_workers + self._model = None + self._model_size: str | None = None + + def load_model(self, model_size: str = "base") -> None: + """Load the ASR model. + + Args: + model_size: Model size (e.g., "tiny", "base", "small"). + + Raises: + ValueError: If model_size is invalid. + RuntimeError: If model loading fails. + """ + from faster_whisper import WhisperModel + + if model_size not in VALID_MODEL_SIZES: + raise ValueError( + f"Invalid model size: {model_size}. Valid sizes: {', '.join(VALID_MODEL_SIZES)}" + ) + + logger.info( + "Loading Whisper model '%s' on %s with %s compute...", + model_size, + self._device, + self._compute_type, + ) + + try: + self._model = WhisperModel( + model_size, + device=self._device, + compute_type=self._compute_type, + num_workers=self._num_workers, + ) + self._model_size = model_size + logger.info("Model loaded successfully") + except Exception as e: + raise RuntimeError(f"Failed to load model: {e}") from e + + def transcribe( + self, + audio: NDArray[np.float32], + language: str | None = None, + ) -> Iterator[AsrResult]: + """Transcribe audio and yield results. + + Args: + audio: Audio samples as float32 array (16kHz mono, normalized). + language: Optional language code (e.g., "en"). + + Yields: + AsrResult segments with word-level timestamps. + """ + if self._model is None: + raise RuntimeError("Model not loaded. Call load_model() first.") + + # Transcribe with word timestamps + segments, info = self._model.transcribe( + audio, + language=language, + word_timestamps=True, + beam_size=5, + vad_filter=True, # Filter out non-speech + ) + + logger.debug( + "Detected language: %s (prob: %.2f)", + info.language, + info.language_probability, + ) + + for segment in segments: + # Convert word info to WordTiming objects + words: list[WordTiming] = [] + if segment.words: + words = [ + WordTiming( + word=word.word, + start=word.start, + end=word.end, + probability=word.probability, + ) + for word in segment.words + ] + + yield AsrResult( + text=segment.text.strip(), + start=segment.start, + end=segment.end, + words=tuple(words), + language=info.language, + language_probability=info.language_probability, + avg_logprob=segment.avg_logprob, + no_speech_prob=segment.no_speech_prob, + ) + + async def transcribe_async( + self, + audio: NDArray[np.float32], + language: str | None = None, + ) -> list[AsrResult]: + """Transcribe audio asynchronously using executor. + + Offloads blocking transcription to a thread pool executor to avoid + blocking the asyncio event loop. + + Args: + audio: Audio samples as float32 array (16kHz mono, normalized). + language: Optional language code (e.g., "en"). + + Returns: + List of AsrResult segments with word-level timestamps. + """ + loop = asyncio.get_running_loop() + return await loop.run_in_executor( + None, + partial(lambda a, lang: list(self.transcribe(a, lang)), audio, language), + ) + + @property + def is_loaded(self) -> bool: + """Return True if model is loaded.""" + return self._model is not None + + @property + def model_size(self) -> str | None: + """Return the loaded model size, or None if not loaded.""" + return self._model_size + + def unload(self) -> None: + """Unload the model to free memory.""" + self._model = None + self._model_size = None + logger.info("Model unloaded") + + @property + def compute_type(self) -> str: + """Return the compute type.""" + return self._compute_type + + @property + def device(self) -> str: + """Return the device.""" + return self._device +```` + +## File: src/noteflow/infrastructure/asr/protocols.py +````python +"""ASR protocols defining contracts for ASR components.""" + +from __future__ import annotations + +from collections.abc import Iterator +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + + from noteflow.infrastructure.asr.dto import AsrResult + + +class AsrEngine(Protocol): + """Protocol for ASR transcription engine. + + Implementations should handle model loading, caching, and inference. + """ + + def load_model(self, model_size: str = "base") -> None: + """Load the ASR model. + + Downloads the model if not cached. + + Args: + model_size: Model size ("tiny", "base", "small", "medium", "large"). + + Raises: + ValueError: If model_size is invalid. + RuntimeError: If model loading fails. + """ + ... + + def transcribe( + self, + audio: NDArray[np.float32], + language: str | None = None, + ) -> Iterator[AsrResult]: + """Transcribe audio and yield results. + + Args: + audio: Audio samples as float32 array (16kHz mono, normalized). + language: Optional language code (e.g., "en"). Auto-detected if None. + + Yields: + AsrResult segments. + + Raises: + RuntimeError: If model not loaded. + """ + ... + + @property + def is_loaded(self) -> bool: + """Return True if model is loaded.""" + ... + + @property + def model_size(self) -> str | None: + """Return the loaded model size, or None if not loaded.""" + ... + + def unload(self) -> None: + """Unload the model to free memory.""" + ... +```` + +## File: src/noteflow/infrastructure/audio/capture.py +````python +"""Audio capture implementation using sounddevice. + +Provide cross-platform audio input capture with device handling. +""" + +from __future__ import annotations + +import logging +import time +from typing import TYPE_CHECKING + +import numpy as np +import sounddevice as sd + +from noteflow.infrastructure.audio.dto import AudioDeviceInfo, AudioFrameCallback + +if TYPE_CHECKING: + from numpy.typing import NDArray + +logger = logging.getLogger(__name__) + + +class SoundDeviceCapture: + """sounddevice-based implementation of AudioCapture. + + Handle device enumeration, stream management, and device change detection. + Use PortAudio under the hood for cross-platform audio capture. + """ + + def __init__(self) -> None: + """Initialize the capture instance.""" + self._stream: sd.InputStream | None = None + self._callback: AudioFrameCallback | None = None + self._device_id: int | None = None + self._sample_rate: int = 16000 + self._channels: int = 1 + + def list_devices(self) -> list[AudioDeviceInfo]: + """List available audio input devices. + + Returns: + List of AudioDeviceInfo for all available input devices. + """ + devices: list[AudioDeviceInfo] = [] + device_list = sd.query_devices() + + # Get default input device index + try: + default_input = sd.default.device[0] # Input device index + except (TypeError, IndexError): + default_input = -1 + + devices.extend( + AudioDeviceInfo( + device_id=idx, + name=dev["name"], + channels=int(dev["max_input_channels"]), + sample_rate=int(dev["default_samplerate"]), + is_default=(idx == default_input), + ) + for idx, dev in enumerate(device_list) + if int(dev["max_input_channels"]) > 0 + ) + return devices + + def get_default_device(self) -> AudioDeviceInfo | None: + """Get the default input device. + + Returns: + Default input device info, or None if no input devices available. + """ + devices = self.list_devices() + for dev in devices: + if dev.is_default: + return dev + return devices[0] if devices else None + + def start( + self, + device_id: int | None, + on_frames: AudioFrameCallback, + sample_rate: int = 16000, + channels: int = 1, + chunk_duration_ms: int = 100, + ) -> None: + """Start capturing audio from the specified device. + + Args: + device_id: Device ID to capture from, or None for default device. + on_frames: Callback receiving (frames, timestamp) for each chunk. + sample_rate: Sample rate in Hz (default 16kHz for ASR). + channels: Number of channels (default 1 for mono). + chunk_duration_ms: Duration of each audio chunk in milliseconds. + + Raises: + RuntimeError: If already capturing. + ValueError: If device_id is invalid. + """ + if self._stream is not None: + raise RuntimeError("Already capturing audio") + + self._callback = on_frames + self._device_id = device_id + self._sample_rate = sample_rate + self._channels = channels + + # Calculate block size from chunk duration + blocksize = int(sample_rate * chunk_duration_ms / 1000) + + def _stream_callback( + indata: NDArray[np.float32], + frames: int, + time_info: object, # cffi CData from sounddevice, unused + status: sd.CallbackFlags, + ) -> None: + """Internal sounddevice callback.""" + # Suppress unused parameter warnings + _ = frames, time_info + + if status: + logger.warning("Audio stream status: %s", status) + + if self._callback is not None: + # Copy the data and flatten to 1D array + audio_data = indata.copy().flatten().astype(np.float32) + timestamp = time.monotonic() + self._callback(audio_data, timestamp) + + try: + self._stream = sd.InputStream( + device=device_id, + channels=channels, + samplerate=sample_rate, + blocksize=blocksize, + dtype=np.float32, + callback=_stream_callback, + ) + self._stream.start() + logger.info( + "Started audio capture: device=%s, rate=%d, channels=%d, blocksize=%d", + device_id, + sample_rate, + channels, + blocksize, + ) + except sd.PortAudioError as e: + self._stream = None + self._callback = None + raise RuntimeError(f"Failed to start audio capture: {e}") from e + + def stop(self) -> None: + """Stop audio capture. + + Safe to call even if not capturing. + """ + if self._stream is not None: + try: + self._stream.stop() + self._stream.close() + except sd.PortAudioError as e: + logger.warning("Error stopping audio stream: %s", e) + finally: + self._stream = None + self._callback = None + logger.info("Stopped audio capture") + + def is_capturing(self) -> bool: + """Check if currently capturing audio. + + Returns: + True if capture is active. + """ + return self._stream is not None and self._stream.active + + @property + def current_device_id(self) -> int | None: + """Get the current device ID being used for capture.""" + return self._device_id + + @property + def sample_rate(self) -> int: + """Get the current sample rate.""" + return self._sample_rate + + @property + def channels(self) -> int: + """Get the current number of channels.""" + return self._channels +```` + +## File: src/noteflow/infrastructure/audio/dto.py +````python +"""Data Transfer Objects for audio capture. + +Define data structures used by audio capture components. +""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass + +import numpy as np +from numpy.typing import NDArray + + +@dataclass(frozen=True) +class AudioDeviceInfo: + """Information about an audio input device.""" + + device_id: int + name: str + channels: int + sample_rate: int + is_default: bool + + +@dataclass +class TimestampedAudio: + """Audio frames with capture timestamp.""" + + frames: NDArray[np.float32] + timestamp: float # Monotonic time when captured + duration: float # Duration in seconds + + def __post_init__(self) -> None: + """Validate audio data.""" + if self.duration < 0: + raise ValueError("Duration must be non-negative") + if self.timestamp < 0: + raise ValueError("Timestamp must be non-negative") + + +# Type alias for audio frame callback +AudioFrameCallback = Callable[[NDArray[np.float32], float], None] +```` + +## File: src/noteflow/infrastructure/audio/playback.py +````python +"""Audio playback implementation using sounddevice. + +Provide cross-platform audio output playback from ring buffer audio. +""" + +from __future__ import annotations + +import logging +import threading +from enum import Enum, auto +from typing import TYPE_CHECKING + +import numpy as np +import sounddevice as sd +from numpy.typing import NDArray + +if TYPE_CHECKING: + from noteflow.infrastructure.audio.dto import TimestampedAudio + +logger = logging.getLogger(__name__) + + +class PlaybackState(Enum): + """Playback state machine states.""" + + STOPPED = auto() + PLAYING = auto() + PAUSED = auto() + + +class SoundDevicePlayback: + """sounddevice-based implementation of AudioPlayback. + + Handle audio output playback with position tracking and state management. + Thread-safe for UI callbacks. + """ + + def __init__(self, sample_rate: int = 16000, channels: int = 1) -> None: + """Initialize the playback instance. + + Args: + sample_rate: Sample rate in Hz (default 16kHz for ASR audio). + channels: Number of channels (default 1 for mono). + """ + self._sample_rate = sample_rate + self._channels = channels + + # Playback state + self._state = PlaybackState.STOPPED + self._lock = threading.Lock() + + # Audio data + self._audio_data: NDArray[np.float32] | None = None + self._total_samples: int = 0 + self._current_sample: int = 0 + + # Stream + self._stream: sd.OutputStream | None = None + + def play(self, audio: list[TimestampedAudio]) -> None: + """Start playback of audio chunks. + + Args: + audio: List of TimestampedAudio chunks to play, ordered oldest to newest. + """ + if not audio: + logger.warning("No audio chunks to play") + return + + with self._lock: + # Stop any existing playback + self._stop_internal() + + # Concatenate all audio frames + frames = [chunk.frames for chunk in audio] + self._audio_data = np.concatenate(frames).astype(np.float32) + self._total_samples = len(self._audio_data) + self._current_sample = 0 + + # Create and start stream + self._start_stream() + self._state = PlaybackState.PLAYING + + logger.info( + "Started playback: %d samples (%.2f seconds)", + self._total_samples, + self.total_duration, + ) + + def pause(self) -> None: + """Pause playback. + + Safe to call even if not playing. + """ + with self._lock: + if self._state == PlaybackState.PLAYING and self._stream is not None: + self._stream.stop() + self._state = PlaybackState.PAUSED + logger.debug("Paused playback at %.2f seconds", self.current_position) + + def resume(self) -> None: + """Resume paused playback. + + No-op if not paused. + """ + with self._lock: + if self._state == PlaybackState.PAUSED and self._stream is not None: + self._stream.start() + self._state = PlaybackState.PLAYING + logger.debug("Resumed playback from %.2f seconds", self.current_position) + + def stop(self) -> None: + """Stop playback and reset position. + + Safe to call even if not playing. + """ + with self._lock: + self._stop_internal() + + def _stop_internal(self) -> None: + """Internal stop without lock (caller must hold lock).""" + if self._stream is not None: + try: + self._stream.stop() + self._stream.close() + except sd.PortAudioError as e: + logger.warning("Error stopping playback stream: %s", e) + finally: + self._stream = None + + self._state = PlaybackState.STOPPED + self._current_sample = 0 + self._audio_data = None + self._total_samples = 0 + logger.debug("Stopped playback") + + def _start_stream(self) -> None: + """Start the output stream (caller must hold lock).""" + + def _stream_callback( + outdata: NDArray[np.float32], + frames: int, + time_info: object, + status: sd.CallbackFlags, + ) -> None: + """Internal sounddevice output callback.""" + _ = time_info # Unused + + if status: + logger.warning("Playback stream status: %s", status) + + with self._lock: + if self._audio_data is None or self._state != PlaybackState.PLAYING: + # Output silence + outdata.fill(0) + return + + # Calculate how many samples we can provide + available = self._total_samples - self._current_sample + to_copy = min(frames, available) + + if to_copy > 0: + # Copy audio data to output buffer + outdata[:to_copy, 0] = self._audio_data[ + self._current_sample : self._current_sample + to_copy + ] + self._current_sample += to_copy + + # Fill remaining with silence + if to_copy < frames: + outdata[to_copy:] = 0 + + # Check if playback is complete + if self._current_sample >= self._total_samples: + # Schedule stop on another thread to avoid deadlock + threading.Thread(target=self._on_playback_complete, daemon=True).start() + + try: + self._stream = sd.OutputStream( + channels=self._channels, + samplerate=self._sample_rate, + dtype=np.float32, + callback=_stream_callback, + ) + self._stream.start() + except sd.PortAudioError as e: + self._stream = None + raise RuntimeError(f"Failed to start playback stream: {e}") from e + + def _on_playback_complete(self) -> None: + """Handle playback completion.""" + logger.info("Playback completed") + self.stop() + + def seek(self, position: float) -> bool: + """Seek to a specific position in the audio. + + Thread-safe. Can be called from any thread. + + Args: + position: Position in seconds from start of audio. + + Returns: + True if seek was successful, False if no audio loaded or position out of bounds. + """ + with self._lock: + if self._audio_data is None: + logger.warning("Cannot seek: no audio loaded") + return False + + # Clamp position to valid range + max_position = self._total_samples / self._sample_rate + clamped_position = max(0.0, min(position, max_position)) + + # Convert to sample position + self._current_sample = int(clamped_position * self._sample_rate) + + logger.debug( + "Seeked to %.2f seconds (sample %d)", + clamped_position, + self._current_sample, + ) + return True + + def is_playing(self) -> bool: + """Check if currently playing audio. + + Returns: + True if playback is active (not paused or stopped). + """ + with self._lock: + return self._state == PlaybackState.PLAYING + + @property + def current_position(self) -> float: + """Current playback position in seconds from start of loaded audio.""" + with self._lock: + return self._current_sample / self._sample_rate + + @property + def total_duration(self) -> float: + """Total duration of loaded audio in seconds.""" + with self._lock: + return self._total_samples / self._sample_rate + + @property + def state(self) -> PlaybackState: + """Current playback state.""" + with self._lock: + return self._state + + @property + def sample_rate(self) -> int: + """Sample rate in Hz.""" + return self._sample_rate + + @property + def channels(self) -> int: + """Number of channels.""" + return self._channels +```` + +## File: src/noteflow/infrastructure/audio/protocols.py +````python +"""Audio protocols defining contracts for audio components. + +Define Protocol interfaces for audio capture, level metering, and buffering. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + + from noteflow.infrastructure.audio.dto import ( + AudioDeviceInfo, + AudioFrameCallback, + TimestampedAudio, + ) + + +class AudioCapture(Protocol): + """Protocol for audio input capture. + + Implementations should handle device enumeration, stream management, + and device change detection. + """ + + def list_devices(self) -> list[AudioDeviceInfo]: + """List available audio input devices. + + Returns: + List of AudioDeviceInfo for all available input devices. + """ + ... + + def start( + self, + device_id: int | None, + on_frames: AudioFrameCallback, + sample_rate: int = 16000, + channels: int = 1, + chunk_duration_ms: int = 100, + ) -> None: + """Start capturing audio from the specified device. + + Args: + device_id: Device ID to capture from, or None for default device. + on_frames: Callback receiving (frames, timestamp) for each chunk. + sample_rate: Sample rate in Hz (default 16kHz for ASR). + channels: Number of channels (default 1 for mono). + chunk_duration_ms: Duration of each audio chunk in milliseconds. + + Raises: + RuntimeError: If already capturing. + ValueError: If device_id is invalid. + """ + ... + + def stop(self) -> None: + """Stop audio capture. + + Safe to call even if not capturing. + """ + ... + + def is_capturing(self) -> bool: + """Check if currently capturing audio. + + Returns: + True if capture is active. + """ + ... + + +class AudioLevelProvider(Protocol): + """Protocol for computing audio levels (VU meter data).""" + + def get_rms(self, frames: NDArray[np.float32]) -> float: + """Calculate RMS level from audio frames. + + Args: + frames: Audio samples as float32 array (normalized -1.0 to 1.0). + + Returns: + RMS level normalized to 0.0-1.0 range. + """ + ... + + def get_db(self, frames: NDArray[np.float32]) -> float: + """Calculate dB level from audio frames. + + Args: + frames: Audio samples as float32 array (normalized -1.0 to 1.0). + + Returns: + Level in dB (typically -60 to 0 range). + """ + ... + + +class RingBuffer(Protocol): + """Protocol for timestamped audio ring buffer. + + Ring buffers store recent audio with timestamps for ASR processing + and playback sync. + """ + + def push(self, audio: TimestampedAudio) -> None: + """Add audio to the buffer. + + Old audio is discarded if buffer exceeds max_duration. + + Args: + audio: Timestamped audio chunk to add. + """ + ... + + def get_window(self, duration_seconds: float) -> list[TimestampedAudio]: + """Get the last N seconds of audio. + + Args: + duration_seconds: How many seconds of audio to retrieve. + + Returns: + List of TimestampedAudio chunks, ordered oldest to newest. + """ + ... + + def clear(self) -> None: + """Clear all audio from the buffer.""" + ... + + @property + def duration(self) -> float: + """Total duration of buffered audio in seconds.""" + ... + + @property + def max_duration(self) -> float: + """Maximum buffer duration in seconds.""" + ... + + +class AudioPlayback(Protocol): + """Protocol for audio output playback. + + Implementations should handle output device management, playback state, + and position tracking for sync with UI. + """ + + def play(self, audio: list[TimestampedAudio]) -> None: + """Start playback of audio chunks. + + Args: + audio: List of TimestampedAudio chunks to play, ordered oldest to newest. + """ + ... + + def pause(self) -> None: + """Pause playback. + + Safe to call even if not playing. + """ + ... + + def resume(self) -> None: + """Resume paused playback. + + No-op if not paused. + """ + ... + + def stop(self) -> None: + """Stop playback and reset position. + + Safe to call even if not playing. + """ + ... + + def is_playing(self) -> bool: + """Check if currently playing audio. + + Returns: + True if playback is active (not paused or stopped). + """ + ... + + @property + def current_position(self) -> float: + """Current playback position in seconds from start of loaded audio.""" + ... + + @property + def total_duration(self) -> float: + """Total duration of loaded audio in seconds.""" + ... +```` + +## File: src/noteflow/infrastructure/audio/ring_buffer.py +````python +"""Timestamped audio ring buffer implementation. + +Store recent audio with timestamps for ASR processing and playback sync. +""" + +from __future__ import annotations + +from collections import deque +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from noteflow.infrastructure.audio.dto import TimestampedAudio + + +class TimestampedRingBuffer: + """Ring buffer for timestamped audio chunks. + + Automatically discard old audio when the buffer exceeds max_duration. + Thread-safe for single-producer, single-consumer use. + """ + + def __init__(self, max_duration: float = 30.0) -> None: + """Initialize ring buffer. + + Args: + max_duration: Maximum audio duration to keep in seconds. + + Raises: + ValueError: If max_duration is not positive. + """ + if max_duration <= 0: + raise ValueError("max_duration must be positive") + + self._max_duration = max_duration + self._buffer: deque[TimestampedAudio] = deque() + self._total_duration: float = 0.0 + + def push(self, audio: TimestampedAudio) -> None: + """Add audio to the buffer. + + Old audio is discarded if buffer exceeds max_duration. + + Args: + audio: Timestamped audio chunk to add. + """ + self._buffer.append(audio) + self._total_duration += audio.duration + + # Evict old chunks if over capacity + while self._total_duration > self._max_duration and self._buffer: + old = self._buffer.popleft() + self._total_duration -= old.duration + + def get_window(self, duration_seconds: float) -> list[TimestampedAudio]: + """Get the last N seconds of audio. + + Args: + duration_seconds: How many seconds of audio to retrieve. + + Returns: + List of TimestampedAudio chunks, ordered oldest to newest. + """ + if duration_seconds <= 0: + return [] + + result: list[TimestampedAudio] = [] + accumulated_duration = 0.0 + + # Iterate from newest to oldest + for audio in reversed(self._buffer): + result.append(audio) + accumulated_duration += audio.duration + if accumulated_duration >= duration_seconds: + break + + # Return in chronological order (oldest first) + result.reverse() + return result + + def get_all(self) -> list[TimestampedAudio]: + """Get all buffered audio. + + Returns: + List of all TimestampedAudio chunks, ordered oldest to newest. + """ + return list(self._buffer) + + def clear(self) -> None: + """Clear all audio from the buffer.""" + self._buffer.clear() + self._total_duration = 0.0 + + @property + def duration(self) -> float: + """Total duration of buffered audio in seconds.""" + return self._total_duration + + @property + def max_duration(self) -> float: + """Maximum buffer duration in seconds.""" + return self._max_duration + + @property + def chunk_count(self) -> int: + """Number of audio chunks in the buffer.""" + return len(self._buffer) + + def __len__(self) -> int: + """Return number of chunks in buffer.""" + return len(self._buffer) +```` + +## File: src/noteflow/infrastructure/audio/writer.py +````python +"""Streaming encrypted audio file writer for meetings.""" + +from __future__ import annotations + +import json +import logging +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING + +import numpy as np + +from noteflow.infrastructure.security.crypto import ChunkedAssetWriter + +if TYPE_CHECKING: + from numpy.typing import NDArray + + from noteflow.infrastructure.security.crypto import AesGcmCryptoBox + +logger = logging.getLogger(__name__) + + +class MeetingAudioWriter: + """Write audio chunks to encrypted meeting file. + + Manage meeting directory creation, manifest file, and encrypted audio storage. + Uses ChunkedAssetWriter for the actual encryption. + + Directory structure: + ~/.noteflow/meetings// + ├── manifest.json # Meeting metadata + wrapped DEK + └── audio.enc # Encrypted PCM16 chunks (NFAE format) + """ + + def __init__( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Initialize audio writer. + + Args: + crypto: CryptoBox instance for encryption operations. + meetings_dir: Root directory for all meetings (e.g., ~/.noteflow/meetings). + """ + self._crypto = crypto + self._meetings_dir = meetings_dir + self._asset_writer: ChunkedAssetWriter | None = None + self._meeting_dir: Path | None = None + self._sample_rate: int = 16000 + self._chunk_count: int = 0 + + def open( + self, + meeting_id: str, + dek: bytes, + wrapped_dek: bytes, + sample_rate: int = 16000, + ) -> None: + """Open meeting for audio writing. + + Create meeting directory, write manifest, open encrypted audio file. + + Args: + meeting_id: Meeting UUID string. + dek: Unwrapped data encryption key (32 bytes). + wrapped_dek: Encrypted DEK to store in manifest. + sample_rate: Audio sample rate (default 16000 Hz). + + Raises: + RuntimeError: If already open. + OSError: If directory creation fails. + """ + if self._asset_writer is not None: + raise RuntimeError("Writer already open") + + # Create meeting directory + self._meeting_dir = self._meetings_dir / meeting_id + self._meeting_dir.mkdir(parents=True, exist_ok=True) + + # Write manifest.json + manifest = { + "meeting_id": meeting_id, + "created_at": datetime.now(UTC).isoformat(), + "sample_rate": sample_rate, + "channels": 1, + "format": "pcm16", + "wrapped_dek": wrapped_dek.hex(), # Store as hex string + } + manifest_path = self._meeting_dir / "manifest.json" + manifest_path.write_text(json.dumps(manifest, indent=2)) + + # Open encrypted audio file + audio_path = self._meeting_dir / "audio.enc" + self._asset_writer = ChunkedAssetWriter(self._crypto) + self._asset_writer.open(audio_path, dek) + + self._sample_rate = sample_rate + self._chunk_count = 0 + + logger.info( + "Opened audio writer: meeting=%s, dir=%s", + meeting_id, + self._meeting_dir, + ) + + def write_chunk(self, audio: NDArray[np.float32]) -> None: + """Write audio chunk (convert float32 → PCM16). + + Args: + audio: Audio samples as float32 array (-1.0 to 1.0). + + Raises: + RuntimeError: If not open. + """ + if self._asset_writer is None or not self._asset_writer.is_open: + raise RuntimeError("Writer not open") + + # Convert float32 [-1.0, 1.0] to int16 [-32768, 32767] + # Clamp to prevent overflow on conversion + audio_clamped = np.clip(audio, -1.0, 1.0) + pcm16 = (audio_clamped * 32767.0).astype(np.int16) + + # Write as raw bytes (platform-native endianness, typically little-endian) + self._asset_writer.write_chunk(pcm16.tobytes()) + self._chunk_count += 1 + + def close(self) -> None: + """Close audio writer and finalize files. + + Safe to call if already closed or never opened. + """ + if self._asset_writer is not None: + bytes_written = self._asset_writer.bytes_written + self._asset_writer.close() + self._asset_writer = None + + logger.info( + "Closed audio writer: dir=%s, chunks=%d, bytes=%d", + self._meeting_dir, + self._chunk_count, + bytes_written, + ) + + self._meeting_dir = None + self._chunk_count = 0 + + @property + def is_open(self) -> bool: + """Check if writer is currently open for writing.""" + return self._asset_writer is not None and self._asset_writer.is_open + + @property + def bytes_written(self) -> int: + """Total encrypted bytes written to audio.enc file.""" + return 0 if self._asset_writer is None else self._asset_writer.bytes_written + + @property + def chunk_count(self) -> int: + """Number of audio chunks written.""" + return self._chunk_count + + @property + def meeting_dir(self) -> Path | None: + """Current meeting directory, or None if not open.""" + return self._meeting_dir +```` + +## File: src/noteflow/infrastructure/converters/__init__.py +````python +"""Infrastructure converters for data transformation between layers.""" + +from noteflow.infrastructure.converters.asr_converters import AsrConverter +from noteflow.infrastructure.converters.orm_converters import OrmConverter + +__all__ = [ + "AsrConverter", + "OrmConverter", +] +```` + +## File: src/noteflow/infrastructure/converters/asr_converters.py +````python +"""Convert ASR DTOs to domain entities.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from noteflow.domain.entities import WordTiming + +if TYPE_CHECKING: + from noteflow.infrastructure.asr import dto + from noteflow.infrastructure.asr.dto import AsrResult + + +class AsrConverter: + """Convert ASR DTOs to domain entities.""" + + @staticmethod + def word_timing_to_domain(asr_word: dto.WordTiming) -> WordTiming: + """Convert ASR WordTiming DTO to domain WordTiming entity. + + Map field names from ASR convention (start/end) to domain + convention (start_time/end_time). + + Args: + asr_word: ASR WordTiming DTO from faster-whisper engine. + + Returns: + Domain WordTiming entity with validated timing. + + Raises: + ValueError: If timing validation fails. + """ + return WordTiming( + word=asr_word.word, + start_time=asr_word.start, + end_time=asr_word.end, + probability=asr_word.probability, + ) + + @staticmethod + def result_to_domain_words(result: AsrResult) -> list[WordTiming]: + """Convert all words from ASR result to domain entities. + + Args: + result: ASR transcription result with word timings. + + Returns: + List of domain WordTiming entities. + """ + return [AsrConverter.word_timing_to_domain(word) for word in result.words] +```` + +## File: src/noteflow/infrastructure/diarization/__init__.py +````python +"""Speaker diarization infrastructure module. + +Provides speaker diarization using pyannote.audio (offline) and diart (streaming). +""" + +from noteflow.infrastructure.diarization.assigner import ( + assign_speaker, + assign_speakers_batch, +) +from noteflow.infrastructure.diarization.dto import SpeakerTurn +from noteflow.infrastructure.diarization.engine import DiarizationEngine + +__all__ = [ + "DiarizationEngine", + "SpeakerTurn", + "assign_speaker", + "assign_speakers_batch", +] +```` + +## File: src/noteflow/infrastructure/diarization/assigner.py +````python +"""Speaker assignment utilities for mapping diarization to segments. + +Provides functions to assign speaker labels to transcript segments based on +diarization output using timestamp overlap matching. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from noteflow.infrastructure.diarization.dto import SpeakerTurn + + +def assign_speaker( + start_time: float, + end_time: float, + turns: Sequence[SpeakerTurn], +) -> tuple[str | None, float]: + """Assign a speaker to a time range based on diarization turns. + + Uses maximum overlap duration to determine the dominant speaker + for the given time range. + + Args: + start_time: Segment start time in seconds. + end_time: Segment end time in seconds. + turns: Sequence of speaker turns from diarization. + + Returns: + Tuple of (speaker_id, confidence) where speaker_id is None if + no overlapping turns found. Confidence is the ratio of overlap + duration to segment duration. + """ + if not turns: + return None, 0.0 + + segment_duration = end_time - start_time + if segment_duration <= 0: + return None, 0.0 + + best_speaker: str | None = None + best_overlap: float = 0.0 + + for turn in turns: + overlap = turn.overlap_duration(start_time, end_time) + if overlap > best_overlap: + best_overlap = overlap + best_speaker = turn.speaker + + if best_speaker is None: + return None, 0.0 + + confidence = best_overlap / segment_duration + return best_speaker, confidence + + +def assign_speakers_batch( + segments: Sequence[tuple[float, float]], + turns: Sequence[SpeakerTurn], +) -> list[tuple[str | None, float]]: + """Assign speakers to multiple segments in batch. + + Args: + segments: Sequence of (start_time, end_time) tuples. + turns: Sequence of speaker turns from diarization. + + Returns: + List of (speaker_id, confidence) tuples, one per segment. + """ + return [assign_speaker(start, end, turns) for start, end in segments] +```` + +## File: src/noteflow/infrastructure/diarization/dto.py +````python +"""Data Transfer Objects for speaker diarization. + +These DTOs define the data structures used by diarization components. +""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class SpeakerTurn: + """Speaker turn from diarization output. + + Represents a time segment where a specific speaker is talking. + """ + + speaker: str # Speaker label (e.g., "SPEAKER_00") + start: float # Start time in seconds + end: float # End time in seconds + confidence: float = 1.0 # Confidence score (0.0-1.0) + + def __post_init__(self) -> None: + """Validate turn data.""" + if self.end < self.start: + raise ValueError(f"Turn end ({self.end}) < start ({self.start})") + if not 0.0 <= self.confidence <= 1.0: + raise ValueError(f"Confidence must be 0.0-1.0, got {self.confidence}") + + @property + def duration(self) -> float: + """Duration of the turn in seconds.""" + return self.end - self.start + + def overlaps(self, start: float, end: float) -> bool: + """Check if this turn overlaps with a time range. + + Args: + start: Range start time in seconds. + end: Range end time in seconds. + + Returns: + True if there is any overlap. + """ + return self.start < end and self.end > start + + def overlap_duration(self, start: float, end: float) -> float: + """Calculate overlap duration with a time range. + + Args: + start: Range start time in seconds. + end: Range end time in seconds. + + Returns: + Overlap duration in seconds (0.0 if no overlap). + """ + overlap_start = max(self.start, start) + overlap_end = min(self.end, end) + return max(0.0, overlap_end - overlap_start) +```` + +## File: src/noteflow/infrastructure/diarization/engine.py +````python +"""Diarization engine implementation using pyannote.audio and diart. + +Provides speaker diarization for both streaming (real-time) and +offline (post-meeting) processing. + +Requires optional dependencies: pip install noteflow[diarization] +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from noteflow.infrastructure.diarization.dto import SpeakerTurn + +if TYPE_CHECKING: + from collections.abc import Sequence + + import numpy as np + from numpy.typing import NDArray + from pyannote.core import Annotation + +logger = logging.getLogger(__name__) + + +class DiarizationEngine: + """Speaker diarization engine using pyannote.audio and diart. + + Supports both streaming (real-time via diart) and offline + (post-meeting via pyannote.audio) diarization modes. + """ + + def __init__( + self, + device: str = "auto", + hf_token: str | None = None, + streaming_latency: float = 0.5, + min_speakers: int = 1, + max_speakers: int = 10, + ) -> None: + """Initialize the diarization engine. + + Args: + device: Device to use ("auto", "cpu", "cuda", "mps"). + "auto" selects CUDA > MPS > CPU based on availability. + hf_token: HuggingFace token for pyannote model access. + streaming_latency: Latency for streaming diarization in seconds. + min_speakers: Minimum expected speakers for offline diarization. + max_speakers: Maximum expected speakers for offline diarization. + """ + self._device_preference = device + self._device: str | None = None + self._hf_token = hf_token + self._streaming_latency = streaming_latency + self._min_speakers = min_speakers + self._max_speakers = max_speakers + + # Lazy-loaded models + self._streaming_pipeline = None + self._offline_pipeline = None + + def _resolve_device(self) -> str: + """Resolve the actual device to use based on availability. + + Returns: + Device string ("cuda", "mps", or "cpu"). + """ + if self._device is not None: + return self._device + + import torch + + if self._device_preference == "auto": + if torch.cuda.is_available(): + self._device = "cuda" + elif torch.backends.mps.is_available(): + self._device = "mps" + else: + self._device = "cpu" + else: + self._device = self._device_preference + + logger.info("Diarization device resolved to: %s", self._device) + return self._device + + def load_streaming_model(self) -> None: + """Load the streaming diarization model (diart). + + Raises: + RuntimeError: If model loading fails. + ValueError: If HuggingFace token is not provided. + """ + if self._streaming_pipeline is not None: + logger.debug("Streaming model already loaded") + return + + if not self._hf_token: + raise ValueError("HuggingFace token required for pyannote models") + + device = self._resolve_device() + + logger.info( + "Loading streaming diarization model on %s with latency %.2fs...", + device, + self._streaming_latency, + ) + + try: + from diart import SpeakerDiarization, SpeakerDiarizationConfig + from diart.models import EmbeddingModel, SegmentationModel + + segmentation = SegmentationModel.from_pretrained( + "pyannote/segmentation-3.0", + use_hf_token=self._hf_token, + ) + embedding = EmbeddingModel.from_pretrained( + "pyannote/wespeaker-voxceleb-resnet34-LM", + use_hf_token=self._hf_token, + ) + + config = SpeakerDiarizationConfig( + segmentation=segmentation, + embedding=embedding, + step=self._streaming_latency, + latency=self._streaming_latency, + device=device, + ) + + self._streaming_pipeline = SpeakerDiarization(config) + logger.info("Streaming diarization model loaded successfully") + + except Exception as e: + raise RuntimeError(f"Failed to load streaming diarization model: {e}") from e + + def load_offline_model(self) -> None: + """Load the offline diarization model (pyannote.audio). + + Raises: + RuntimeError: If model loading fails. + ValueError: If HuggingFace token is not provided. + """ + if self._offline_pipeline is not None: + logger.debug("Offline model already loaded") + return + + if not self._hf_token: + raise ValueError("HuggingFace token required for pyannote models") + + device = self._resolve_device() + + logger.info("Loading offline diarization model on %s...", device) + + try: + import torch + from pyannote.audio import Pipeline + + self._offline_pipeline = Pipeline.from_pretrained( + "pyannote/speaker-diarization-3.1", + use_auth_token=self._hf_token, + ) + + torch_device = torch.device(device) + self._offline_pipeline.to(torch_device) + + logger.info("Offline diarization model loaded successfully") + + except Exception as e: + raise RuntimeError(f"Failed to load offline diarization model: {e}") from e + + def process_chunk( + self, + audio: NDArray[np.float32], + sample_rate: int = 16000, + ) -> Sequence[SpeakerTurn]: + """Process an audio chunk for streaming diarization. + + Args: + audio: Audio samples as float32 array (mono). + sample_rate: Audio sample rate in Hz. + + Returns: + Sequence of speaker turns detected in this chunk. + + Raises: + RuntimeError: If streaming model not loaded. + """ + if self._streaming_pipeline is None: + raise RuntimeError("Streaming model not loaded. Call load_streaming_model() first.") + + from pyannote.core import SlidingWindowFeature + + # Reshape audio for diart: (samples,) -> (1, samples) + if audio.ndim == 1: + audio = audio.reshape(1, -1) + + # Create SlidingWindowFeature for diart + from pyannote.core import SlidingWindow + + duration = audio.shape[1] / sample_rate + window = SlidingWindow(start=0.0, duration=duration, step=duration) + waveform = SlidingWindowFeature(audio, window) + + # Process through pipeline + results = self._streaming_pipeline([waveform]) + + turns: list[SpeakerTurn] = [] + for annotation, _ in results: + turns.extend(self._annotation_to_turns(annotation)) + + return turns + + def diarize_full( + self, + audio: NDArray[np.float32], + sample_rate: int = 16000, + num_speakers: int | None = None, + ) -> Sequence[SpeakerTurn]: + """Diarize a complete audio recording. + + Args: + audio: Audio samples as float32 array (mono). + sample_rate: Audio sample rate in Hz. + num_speakers: Known number of speakers (None for auto-detect). + + Returns: + Sequence of speaker turns for the full recording. + + Raises: + RuntimeError: If offline model not loaded. + """ + if self._offline_pipeline is None: + raise RuntimeError("Offline model not loaded. Call load_offline_model() first.") + + import torch + + # Prepare audio tensor: (samples,) -> (channels, samples) + if audio.ndim == 1: + audio_tensor = torch.from_numpy(audio).unsqueeze(0) + else: + audio_tensor = torch.from_numpy(audio) + + # Create waveform dict for pyannote + waveform = {"waveform": audio_tensor, "sample_rate": sample_rate} + + logger.debug( + "Running offline diarization on %.2fs audio", + audio_tensor.shape[1] / sample_rate, + ) + + # Run diarization with speaker hints + if num_speakers is not None: + annotation = self._offline_pipeline(waveform, num_speakers=num_speakers) + else: + annotation = self._offline_pipeline( + waveform, + min_speakers=self._min_speakers, + max_speakers=self._max_speakers, + ) + + return self._annotation_to_turns(annotation) + + def _annotation_to_turns(self, annotation: Annotation) -> list[SpeakerTurn]: + """Convert pyannote Annotation to SpeakerTurn list. + + Args: + annotation: Pyannote diarization annotation. + + Returns: + List of SpeakerTurn objects. + """ + turns: list[SpeakerTurn] = [] + + # itertracks(yield_label=True) returns 3-tuples: (segment, track, label) + for track in annotation.itertracks(yield_label=True): + # Unpack with len check for type safety with pyannote's union return + if len(track) == 3: + segment, _, speaker = track + turns.append( + SpeakerTurn( + speaker=str(speaker), + start=segment.start, + end=segment.end, + ) + ) + + return turns + + def reset_streaming(self) -> None: + """Reset streaming pipeline state for a new recording.""" + if self._streaming_pipeline is not None: + self._streaming_pipeline.reset() + logger.debug("Streaming pipeline state reset") + + def unload(self) -> None: + """Unload all models to free memory.""" + self._streaming_pipeline = None + self._offline_pipeline = None + self._device = None + logger.info("Diarization models unloaded") + + @property + def is_streaming_loaded(self) -> bool: + """Return True if streaming model is loaded.""" + return self._streaming_pipeline is not None + + @property + def is_offline_loaded(self) -> bool: + """Return True if offline model is loaded.""" + return self._offline_pipeline is not None + + @property + def device(self) -> str | None: + """Return the resolved device, or None if not yet resolved.""" + return self._device +```` + +## File: src/noteflow/infrastructure/export/__init__.py +````python +"""Export infrastructure module. + +Provide transcript export functionality to various file formats. +""" + +from noteflow.infrastructure.export.html import HtmlExporter +from noteflow.infrastructure.export.markdown import MarkdownExporter +from noteflow.infrastructure.export.protocols import TranscriptExporter + +__all__ = [ + "HtmlExporter", + "MarkdownExporter", + "TranscriptExporter", +] +```` + +## File: src/noteflow/infrastructure/export/_formatting.py +````python +"""Shared formatting utilities for export modules.""" + +from __future__ import annotations + +from datetime import datetime + + +def format_timestamp(seconds: float) -> str: + """Format seconds as MM:SS or HH:MM:SS. + + Args: + seconds: Time in seconds. + + Returns: + Formatted time string. + """ + total_seconds = int(seconds) + hours, remainder = divmod(total_seconds, 3600) + minutes, secs = divmod(remainder, 60) + + if hours > 0: + return f"{hours:d}:{minutes:02d}:{secs:02d}" + return f"{minutes:d}:{secs:02d}" + + +def format_datetime(dt: datetime | None) -> str: + """Format datetime for display. + + Args: + dt: Datetime to format. + + Returns: + Formatted datetime string or empty string. + """ + return "" if dt is None else dt.strftime("%Y-%m-%d %H:%M:%S") +```` + +## File: src/noteflow/infrastructure/export/protocols.py +````python +"""Export protocols defining contracts for transcript exporters. + +Define Protocol interfaces for exporting meeting transcripts to various formats. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from collections.abc import Sequence + + from noteflow.domain.entities.meeting import Meeting + from noteflow.domain.entities.segment import Segment + + +class TranscriptExporter(Protocol): + """Protocol for exporting meeting transcripts to file formats. + + Implementations should produce formatted output for the target format + (e.g., Markdown, HTML) from meeting data. + """ + + def export( + self, + meeting: Meeting, + segments: Sequence[Segment], + ) -> str: + """Export meeting transcript to formatted string. + + Args: + meeting: Meeting entity with metadata. + segments: Ordered list of transcript segments. + + Returns: + Formatted transcript string in target format. + """ + ... + + @property + def format_name(self) -> str: + """Human-readable format name (e.g., 'Markdown', 'HTML').""" + ... + + @property + def file_extension(self) -> str: + """File extension for this format (e.g., '.md', '.html').""" + ... +```` + +## File: src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py +````python +"""initial_schema + +Revision ID: 6a9d9f408f40 +Revises: +Create Date: 2025-12-16 19:10:55.135444 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "6a9d9f408f40" +down_revision: str | Sequence[str] | None = None +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +# Vector dimension for embeddings (OpenAI compatible) +EMBEDDING_DIM = 1536 + + +def upgrade() -> None: + """Create NoteFlow schema and tables.""" + # Create schema + op.execute("CREATE SCHEMA IF NOT EXISTS noteflow") + + # Enable pgvector extension + try: + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + except sa.exc.ProgrammingError as e: + raise RuntimeError( + f"Failed to create pgvector extension: {e}. " + "Ensure the database user has CREATE EXTENSION privileges, or " + "install pgvector manually: CREATE EXTENSION vector;" + ) from e + + # Create meetings table + op.create_table( + "meetings", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column("title", sa.String(255), nullable=False), + sa.Column("state", sa.Integer(), nullable=False, server_default="1"), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True), + sa.Column( + "metadata", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="{}", + ), + sa.Column("wrapped_dek", sa.LargeBinary(), nullable=True), + schema="noteflow", + ) + + # Create segments table + op.create_table( + "segments", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "meeting_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("segment_id", sa.Integer(), nullable=False), + sa.Column("text", sa.Text(), nullable=False), + sa.Column("start_time", sa.Float(), nullable=False), + sa.Column("end_time", sa.Float(), nullable=False), + sa.Column("language", sa.String(10), nullable=False, server_default="en"), + sa.Column("language_confidence", sa.Float(), nullable=False, server_default="0.0"), + sa.Column("avg_logprob", sa.Float(), nullable=False, server_default="0.0"), + sa.Column("no_speech_prob", sa.Float(), nullable=False, server_default="0.0"), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + schema="noteflow", + ) + + # Add vector column for embeddings (pgvector) + op.execute(f"ALTER TABLE noteflow.segments ADD COLUMN embedding vector({EMBEDDING_DIM})") + + # Create index for vector similarity search + op.execute( + "CREATE INDEX IF NOT EXISTS ix_segments_embedding " + "ON noteflow.segments USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)" + ) + + # Create index for meeting_id lookups + op.create_index( + "ix_segments_meeting_id", + "segments", + ["meeting_id"], + schema="noteflow", + ) + + # Create word_timings table + op.create_table( + "word_timings", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "segment_pk", + sa.Integer(), + sa.ForeignKey("noteflow.segments.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("word", sa.String(255), nullable=False), + sa.Column("start_time", sa.Float(), nullable=False), + sa.Column("end_time", sa.Float(), nullable=False), + sa.Column("probability", sa.Float(), nullable=False), + schema="noteflow", + ) + + # Create index for segment_pk lookups + op.create_index( + "ix_word_timings_segment_pk", + "word_timings", + ["segment_pk"], + schema="noteflow", + ) + + # Create summaries table + op.create_table( + "summaries", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "meeting_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + unique=True, + ), + sa.Column("executive_summary", sa.Text(), nullable=True), + sa.Column( + "generated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column("model_version", sa.String(50), nullable=True), + schema="noteflow", + ) + + # Create key_points table + op.create_table( + "key_points", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "summary_id", + sa.Integer(), + sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("text", sa.Text(), nullable=False), + sa.Column("start_time", sa.Float(), nullable=False, server_default="0.0"), + sa.Column("end_time", sa.Float(), nullable=False, server_default="0.0"), + sa.Column( + "segment_ids", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + schema="noteflow", + ) + + # Create index for summary_id lookups + op.create_index( + "ix_key_points_summary_id", + "key_points", + ["summary_id"], + schema="noteflow", + ) + + # Create action_items table + op.create_table( + "action_items", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "summary_id", + sa.Integer(), + sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("text", sa.Text(), nullable=False), + sa.Column("assignee", sa.String(255), nullable=False, server_default=""), + sa.Column("due_date", sa.DateTime(timezone=True), nullable=True), + sa.Column("priority", sa.Integer(), nullable=False, server_default="0"), + sa.Column( + "segment_ids", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + schema="noteflow", + ) + + # Create index for summary_id lookups + op.create_index( + "ix_action_items_summary_id", + "action_items", + ["summary_id"], + schema="noteflow", + ) + + +def downgrade() -> None: + """Drop all NoteFlow tables and schema.""" + # Drop tables in reverse order (respecting foreign keys) + op.drop_table("action_items", schema="noteflow") + op.drop_table("key_points", schema="noteflow") + op.drop_table("summaries", schema="noteflow") + op.drop_table("word_timings", schema="noteflow") + op.drop_table("segments", schema="noteflow") + op.drop_table("meetings", schema="noteflow") + + # Drop schema + op.execute("DROP SCHEMA IF EXISTS noteflow CASCADE") +```` + +## File: src/noteflow/infrastructure/persistence/migrations/versions/b5c3e8a2d1f0_add_annotations_table.py +````python +"""add_annotations_table + +Revision ID: b5c3e8a2d1f0 +Revises: 6a9d9f408f40 +Create Date: 2025-12-17 10:00:00.000000 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "b5c3e8a2d1f0" +down_revision: str | Sequence[str] | None = "6a9d9f408f40" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Create annotations table for user-created annotations during recording.""" + op.create_table( + "annotations", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "annotation_id", + postgresql.UUID(as_uuid=True), + nullable=False, + unique=True, + ), + sa.Column( + "meeting_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("annotation_type", sa.String(50), nullable=False), + sa.Column("text", sa.Text(), nullable=False), + sa.Column("start_time", sa.Float(), nullable=False), + sa.Column("end_time", sa.Float(), nullable=False), + sa.Column( + "segment_ids", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + schema="noteflow", + ) + + # Create index for meeting_id lookups + op.create_index( + "ix_annotations_meeting_id", + "annotations", + ["meeting_id"], + schema="noteflow", + ) + + # Create index for time-based queries + op.create_index( + "ix_annotations_time_range", + "annotations", + ["meeting_id", "start_time", "end_time"], + schema="noteflow", + ) + + +def downgrade() -> None: + """Drop annotations table.""" + op.drop_index("ix_annotations_time_range", table_name="annotations", schema="noteflow") + op.drop_index("ix_annotations_meeting_id", table_name="annotations", schema="noteflow") + op.drop_table("annotations", schema="noteflow") +```` + +## File: src/noteflow/infrastructure/persistence/migrations/versions/c7d4e9f3a2b1_add_speaker_fields_to_segments.py +````python +"""add_speaker_fields_to_segments + +Revision ID: c7d4e9f3a2b1 +Revises: b5c3e8a2d1f0 +Create Date: 2025-12-18 16:00:00.000000 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "c7d4e9f3a2b1" +down_revision: str | Sequence[str] | None = "b5c3e8a2d1f0" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add speaker_id and speaker_confidence columns to segments table.""" + op.add_column( + "segments", + sa.Column("speaker_id", sa.String(50), nullable=True), + schema="noteflow", + ) + op.add_column( + "segments", + sa.Column("speaker_confidence", sa.Float(), nullable=False, server_default="0.0"), + schema="noteflow", + ) + + +def downgrade() -> None: + """Remove speaker_id and speaker_confidence columns from segments table.""" + op.drop_column("segments", "speaker_confidence", schema="noteflow") + op.drop_column("segments", "speaker_id", schema="noteflow") +```` + +## File: src/noteflow/infrastructure/persistence/migrations/__init__.py +````python +"""Alembic database migrations for NoteFlow.""" +```` + +## File: src/noteflow/infrastructure/persistence/migrations/README +```` +Generic single-database configuration with an async dbapi. +```` + +## File: src/noteflow/infrastructure/persistence/migrations/script.py.mako +```` +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} +```` + +## File: src/noteflow/infrastructure/persistence/repositories/__init__.py +````python +"""Repository implementations for NoteFlow.""" + +from .annotation_repo import SqlAlchemyAnnotationRepository +from .meeting_repo import SqlAlchemyMeetingRepository +from .segment_repo import SqlAlchemySegmentRepository +from .summary_repo import SqlAlchemySummaryRepository + +__all__ = [ + "SqlAlchemyAnnotationRepository", + "SqlAlchemyMeetingRepository", + "SqlAlchemySegmentRepository", + "SqlAlchemySummaryRepository", +] +```` + +## File: src/noteflow/infrastructure/persistence/repositories/_base.py +````python +"""Base repository providing common SQLAlchemy patterns.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, TypeVar + +from sqlalchemy.ext.asyncio import AsyncSession + +if TYPE_CHECKING: + from sqlalchemy.sql import Select + +TModel = TypeVar("TModel") + + +class BaseRepository: + """Base class for SQLAlchemy repositories. + + Provides common session management and helper methods for + executing queries and persisting models. + """ + + def __init__(self, session: AsyncSession) -> None: + """Initialize repository with database session. + + Args: + session: SQLAlchemy async session. + """ + self._session = session + + async def _execute_scalar( + self, + stmt: Select[tuple[TModel]], + ) -> TModel | None: + """Execute statement and return single scalar result. + + Args: + stmt: SQLAlchemy select statement. + + Returns: + Single model instance or None if not found. + """ + result = await self._session.execute(stmt) + return result.scalar_one_or_none() + + async def _execute_scalars( + self, + stmt: Select[tuple[TModel]], + ) -> list[TModel]: + """Execute statement and return all scalar results. + + Args: + stmt: SQLAlchemy select statement. + + Returns: + List of model instances. + """ + result = await self._session.execute(stmt) + return list(result.scalars().all()) + + async def _add_and_flush(self, model: TModel) -> TModel: + """Add model to session and flush. + + Args: + model: ORM model instance to persist. + + Returns: + The persisted model with generated fields populated. + """ + self._session.add(model) + await self._session.flush() + return model + + async def _delete_and_flush(self, model: object) -> None: + """Delete model from session and flush. + + Args: + model: ORM model instance to delete. + """ + await self._session.delete(model) + await self._session.flush() +```` + +## File: src/noteflow/infrastructure/persistence/__init__.py +````python +"""Persistence infrastructure for NoteFlow.""" + +from .database import create_async_engine, get_async_session_factory +from .unit_of_work import SqlAlchemyUnitOfWork + +__all__ = [ + "SqlAlchemyUnitOfWork", + "create_async_engine", + "get_async_session_factory", +] +```` + +## File: src/noteflow/infrastructure/persistence/database.py +````python +"""Database connection and session management.""" + +from __future__ import annotations + +from collections.abc import AsyncGenerator +from typing import TYPE_CHECKING + +from sqlalchemy.ext.asyncio import ( + AsyncEngine, + AsyncSession, + async_sessionmaker, +) +from sqlalchemy.ext.asyncio import ( + create_async_engine as sa_create_async_engine, +) + +if TYPE_CHECKING: + from noteflow.config import Settings + + +def create_async_engine(settings: Settings) -> AsyncEngine: + """Create an async SQLAlchemy engine. + + Args: + settings: Application settings with database URL. + + Returns: + Configured async engine. + """ + return sa_create_async_engine( + settings.database_url_str, + pool_size=settings.db_pool_size, + echo=settings.db_echo, + pool_pre_ping=True, # Verify connections before use + ) + + +def get_async_session_factory( + engine: AsyncEngine, +) -> async_sessionmaker[AsyncSession]: + """Create an async session factory. + + Args: + engine: SQLAlchemy async engine. + + Returns: + Session factory for creating async sessions. + """ + return async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, + autocommit=False, + autoflush=False, + ) + + +async def get_async_session( + session_factory: async_sessionmaker[AsyncSession], +) -> AsyncGenerator[AsyncSession, None]: + """Yield an async database session. + + Args: + session_factory: Factory for creating sessions. + + Yields: + Async database session that is closed after use. + """ + async with session_factory() as session: + yield session + + +def create_async_session_factory( + database_url: str, + pool_size: int = 5, + echo: bool = False, +) -> async_sessionmaker[AsyncSession]: + """Create an async session factory from a database URL string. + + Convenience function for creating a session factory directly from a URL. + + Args: + database_url: PostgreSQL database URL. + pool_size: Connection pool size. + echo: Enable SQL echo logging. + + Returns: + Async session factory. + """ + engine = sa_create_async_engine( + database_url, + pool_size=pool_size, + echo=echo, + pool_pre_ping=True, + ) + return async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, + autocommit=False, + autoflush=False, + ) +```` + +## File: src/noteflow/infrastructure/security/__init__.py +````python +"""Security infrastructure module. + +Provides encryption and key management using OS credential stores. +""" + +from noteflow.infrastructure.security.crypto import ( + AesGcmCryptoBox, + ChunkedAssetReader, + ChunkedAssetWriter, +) +from noteflow.infrastructure.security.keystore import InMemoryKeyStore, KeyringKeyStore +from noteflow.infrastructure.security.protocols import ( + CryptoBox, + EncryptedAssetReader, + EncryptedAssetWriter, + EncryptedChunk, + KeyStore, +) + +__all__ = [ + "AesGcmCryptoBox", + "ChunkedAssetReader", + "ChunkedAssetWriter", + "CryptoBox", + "EncryptedAssetReader", + "EncryptedAssetWriter", + "EncryptedChunk", + "InMemoryKeyStore", + "KeyStore", + "KeyringKeyStore", +] +```` + +## File: src/noteflow/infrastructure/security/crypto.py +````python +"""Cryptographic operations implementation using cryptography library. + +Provides AES-GCM encryption for audio data with envelope encryption. +""" + +from __future__ import annotations + +import logging +import secrets +import struct +from collections.abc import Iterator +from pathlib import Path +from typing import TYPE_CHECKING, BinaryIO, Final + +from cryptography.hazmat.primitives.ciphers.aead import AESGCM + +from noteflow.infrastructure.security.protocols import EncryptedChunk + +if TYPE_CHECKING: + from noteflow.infrastructure.security.keystore import InMemoryKeyStore, KeyringKeyStore + +logger = logging.getLogger(__name__) + +# Constants +KEY_SIZE: Final[int] = 32 # 256-bit key +NONCE_SIZE: Final[int] = 12 # 96-bit nonce for AES-GCM +TAG_SIZE: Final[int] = 16 # 128-bit authentication tag + +# File format magic number and version +FILE_MAGIC: Final[bytes] = b"NFAE" # NoteFlow Audio Encrypted +FILE_VERSION: Final[int] = 1 + + +class AesGcmCryptoBox: + """AES-GCM based encryption with envelope encryption. + + Uses a master key to wrap/unwrap per-meeting Data Encryption Keys (DEKs). + Each audio chunk is encrypted with AES-256-GCM using the DEK. + """ + + def __init__(self, keystore: KeyringKeyStore | InMemoryKeyStore) -> None: + """Initialize the crypto box. + + Args: + keystore: KeyStore instance for master key access. + """ + self._keystore = keystore + self._master_cipher: AESGCM | None = None + + def _get_master_cipher(self) -> AESGCM: + """Get or create the master key cipher.""" + if self._master_cipher is None: + master_key = self._keystore.get_or_create_master_key() + self._master_cipher = AESGCM(master_key) + return self._master_cipher + + def generate_dek(self) -> bytes: + """Generate a new Data Encryption Key. + + Returns: + 32-byte random DEK. + """ + return secrets.token_bytes(KEY_SIZE) + + def wrap_dek(self, dek: bytes) -> bytes: + """Encrypt DEK with master key. + + Args: + dek: Data Encryption Key to wrap. + + Returns: + Encrypted DEK (nonce || ciphertext || tag). + """ + cipher = self._get_master_cipher() + nonce = secrets.token_bytes(NONCE_SIZE) + ciphertext = cipher.encrypt(nonce, dek, associated_data=None) + # Return nonce || ciphertext (tag is appended by AESGCM) + return nonce + ciphertext + + def unwrap_dek(self, wrapped_dek: bytes) -> bytes: + """Decrypt DEK with master key. + + Args: + wrapped_dek: Encrypted DEK from wrap_dek(). + + Returns: + Original DEK. + + Raises: + ValueError: If decryption fails. + """ + if len(wrapped_dek) < NONCE_SIZE + KEY_SIZE + TAG_SIZE: + raise ValueError("Invalid wrapped DEK: too short") + + cipher = self._get_master_cipher() + nonce = wrapped_dek[:NONCE_SIZE] + ciphertext = wrapped_dek[NONCE_SIZE:] + + try: + return cipher.decrypt(nonce, ciphertext, associated_data=None) + except Exception as e: + raise ValueError(f"DEK unwrap failed: {e}") from e + + def encrypt_chunk(self, plaintext: bytes, dek: bytes) -> EncryptedChunk: + """Encrypt a chunk of data with AES-GCM. + + Args: + plaintext: Data to encrypt. + dek: Data Encryption Key. + + Returns: + EncryptedChunk with nonce, ciphertext, and tag. + """ + cipher = AESGCM(dek) + nonce = secrets.token_bytes(NONCE_SIZE) + + # AESGCM appends the tag to ciphertext + ciphertext_with_tag = cipher.encrypt(nonce, plaintext, associated_data=None) + + # Split ciphertext and tag + ciphertext = ciphertext_with_tag[:-TAG_SIZE] + tag = ciphertext_with_tag[-TAG_SIZE:] + + return EncryptedChunk(nonce=nonce, ciphertext=ciphertext, tag=tag) + + def decrypt_chunk(self, chunk: EncryptedChunk, dek: bytes) -> bytes: + """Decrypt a chunk of data. + + Args: + chunk: EncryptedChunk to decrypt. + dek: Data Encryption Key. + + Returns: + Original plaintext. + + Raises: + ValueError: If decryption fails. + """ + cipher = AESGCM(dek) + + # Reconstruct ciphertext with tag for AESGCM + ciphertext_with_tag = chunk.ciphertext + chunk.tag + + try: + return cipher.decrypt(chunk.nonce, ciphertext_with_tag, associated_data=None) + except Exception as e: + raise ValueError(f"Chunk decryption failed: {e}") from e + + +class ChunkedAssetWriter: + """Streaming encrypted asset writer. + + File format: + - 4 bytes: magic ("NFAE") + - 1 byte: version + - For each chunk: + - 4 bytes: chunk length (big-endian) + - 12 bytes: nonce + - N bytes: ciphertext + - 16 bytes: tag + """ + + def __init__(self, crypto: AesGcmCryptoBox) -> None: + """Initialize the writer. + + Args: + crypto: CryptoBox instance for encryption. + """ + self._crypto = crypto + self._file: Path | None = None + self._dek: bytes | None = None + self._handle: BinaryIO | None = None + self._bytes_written: int = 0 + + def open(self, path: Path, dek: bytes) -> None: + """Open file for writing. + + Args: + path: Path to the encrypted file. + dek: Data Encryption Key for this file. + """ + if self._handle is not None: + raise RuntimeError("Already open") + + self._file = path + self._dek = dek + self._handle = path.open("wb") + self._bytes_written = 0 + + # Write header + self._handle.write(FILE_MAGIC) + self._handle.write(struct.pack("B", FILE_VERSION)) + + logger.debug("Opened encrypted file for writing: %s", path) + + def write_chunk(self, audio_bytes: bytes) -> None: + """Write and encrypt an audio chunk.""" + if self._handle is None or self._dek is None: + raise RuntimeError("File not open") + + # Encrypt the chunk + chunk = self._crypto.encrypt_chunk(audio_bytes, self._dek) + + # Calculate total chunk size (nonce + ciphertext + tag) + chunk_data = chunk.nonce + chunk.ciphertext + chunk.tag + chunk_length = len(chunk_data) + + # Write length prefix and chunk data + self._handle.write(struct.pack(">I", chunk_length)) + self._handle.write(chunk_data) + self._handle.flush() + + self._bytes_written += 4 + chunk_length + + def close(self) -> None: + """Finalize and close the file.""" + if self._handle is not None: + self._handle.close() + self._handle = None + logger.debug("Closed encrypted file, wrote %d bytes", self._bytes_written) + + self._dek = None + + @property + def is_open(self) -> bool: + """Check if file is open for writing.""" + return self._handle is not None + + @property + def bytes_written(self) -> int: + """Total encrypted bytes written.""" + return self._bytes_written + + +class ChunkedAssetReader: + """Streaming encrypted asset reader.""" + + def __init__(self, crypto: AesGcmCryptoBox) -> None: + """Initialize the reader. + + Args: + crypto: CryptoBox instance for decryption. + """ + self._crypto = crypto + self._file: Path | None = None + self._dek: bytes | None = None + self._handle: BinaryIO | None = None + + def open(self, path: Path, dek: bytes) -> None: + """Open file for reading.""" + if self._handle is not None: + raise RuntimeError("Already open") + + self._file = path + self._dek = dek + self._handle = path.open("rb") + + # Read and validate header + magic = self._handle.read(4) + if magic != FILE_MAGIC: + self._handle.close() + self._handle = None + raise ValueError(f"Invalid file format: expected {FILE_MAGIC!r}, got {magic!r}") + + version = struct.unpack("B", self._handle.read(1))[0] + if version != FILE_VERSION: + self._handle.close() + self._handle = None + raise ValueError(f"Unsupported file version: {version}") + + logger.debug("Opened encrypted file for reading: %s", path) + + def read_chunks(self) -> Iterator[bytes]: + """Yield decrypted audio chunks.""" + if self._handle is None or self._dek is None: + raise RuntimeError("File not open") + + while True: + # Read chunk length + length_bytes = self._handle.read(4) + if len(length_bytes) < 4: + break # End of file + + chunk_length = struct.unpack(">I", length_bytes)[0] + + # Read chunk data + chunk_data = self._handle.read(chunk_length) + if len(chunk_data) < chunk_length: + raise ValueError("Truncated chunk") + + # Parse chunk (nonce + ciphertext + tag) + nonce = chunk_data[:NONCE_SIZE] + ciphertext = chunk_data[NONCE_SIZE:-TAG_SIZE] + tag = chunk_data[-TAG_SIZE:] + + chunk = EncryptedChunk(nonce=nonce, ciphertext=ciphertext, tag=tag) + + # Decrypt and yield + yield self._crypto.decrypt_chunk(chunk, self._dek) + + def close(self) -> None: + """Close the file.""" + if self._handle is not None: + self._handle.close() + self._handle = None + logger.debug("Closed encrypted file") + + self._dek = None + + @property + def is_open(self) -> bool: + """Check if file is open for reading.""" + return self._handle is not None +```` + +## File: src/noteflow/infrastructure/security/keystore.py +````python +"""Keystore implementation using the keyring library. + +Provides secure master key storage using OS credential stores. +""" + +from __future__ import annotations + +import base64 +import logging +import os +import secrets +from typing import Final + +import keyring + +logger = logging.getLogger(__name__) + +# Constants +KEY_SIZE: Final[int] = 32 # 256-bit key +SERVICE_NAME: Final[str] = "noteflow" +KEY_NAME: Final[str] = "master_key" +ENV_VAR_NAME: Final[str] = "NOTEFLOW_MASTER_KEY" + + +class KeyringKeyStore: + """keyring-based key storage using OS credential store. + + Uses: + - macOS: Keychain + - Windows: Credential Manager + - Linux: SecretService (GNOME Keyring, KWallet) + """ + + def __init__( + self, + service_name: str = SERVICE_NAME, + key_name: str = KEY_NAME, + ) -> None: + """Initialize the keystore. + + Args: + service_name: Service identifier for keyring. + key_name: Key identifier within the service. + """ + self._service_name = service_name + self._key_name = key_name + + def get_or_create_master_key(self) -> bytes: + """Retrieve or generate the master encryption key. + + Checks for an environment variable first (for headless/container deployments), + then falls back to the OS keyring. + + Returns: + 32-byte master key. + + Raises: + RuntimeError: If keychain is unavailable and no env var is set. + """ + # Check environment variable first (for headless/container deployments) + if env_key := os.environ.get(ENV_VAR_NAME): + logger.debug("Using master key from environment variable") + return base64.b64decode(env_key) + + try: + # Try to retrieve existing key from keyring + stored = keyring.get_password(self._service_name, self._key_name) + if stored is not None: + logger.debug("Retrieved existing master key from keyring") + return base64.b64decode(stored) + + # Generate new key + new_key = secrets.token_bytes(KEY_SIZE) + encoded = base64.b64encode(new_key).decode("ascii") + + # Store in keyring + keyring.set_password(self._service_name, self._key_name, encoded) + logger.info("Generated and stored new master key in keyring") + return new_key + + except keyring.errors.KeyringError as e: + raise RuntimeError( + f"Keyring unavailable: {e}. " + f"Set {ENV_VAR_NAME} environment variable for headless mode." + ) from e + + def delete_master_key(self) -> None: + """Delete the master key from the keychain. + + Safe to call if key doesn't exist. + """ + try: + keyring.delete_password(self._service_name, self._key_name) + logger.info("Deleted master key") + except keyring.errors.PasswordDeleteError: + # Key doesn't exist, that's fine + logger.debug("Master key not found, nothing to delete") + except keyring.errors.KeyringError as e: + logger.warning("Failed to delete master key: %s", e) + + def has_master_key(self) -> bool: + """Check if master key exists in the keychain. + + Returns: + True if master key exists. + """ + try: + stored = keyring.get_password(self._service_name, self._key_name) + return stored is not None + except keyring.errors.KeyringError: + return False + + @property + def service_name(self) -> str: + """Get the service name used for keyring.""" + return self._service_name + + @property + def key_name(self) -> str: + """Get the key name used for keyring.""" + return self._key_name + + +class InMemoryKeyStore: + """In-memory key storage for testing. + + Keys are lost when the process exits. + """ + + def __init__(self) -> None: + """Initialize the in-memory keystore.""" + self._key: bytes | None = None + + def get_or_create_master_key(self) -> bytes: + """Retrieve or generate the master encryption key.""" + if self._key is None: + self._key = secrets.token_bytes(KEY_SIZE) + logger.debug("Generated in-memory master key") + return self._key + + def delete_master_key(self) -> None: + """Delete the master key.""" + self._key = None + logger.debug("Deleted in-memory master key") + + def has_master_key(self) -> bool: + """Check if master key exists.""" + return self._key is not None +```` + +## File: src/noteflow/infrastructure/security/protocols.py +````python +"""Security protocols and data types. + +These protocols define the contracts for key storage and encryption components. +""" + +from __future__ import annotations + +from collections.abc import Iterator +from dataclasses import dataclass +from pathlib import Path +from typing import Protocol + + +@dataclass(frozen=True) +class EncryptedChunk: + """An encrypted chunk of data with authentication tag.""" + + nonce: bytes # Unique nonce for this chunk + ciphertext: bytes # Encrypted data + tag: bytes # Authentication tag + + +class KeyStore(Protocol): + """Protocol for OS keychain access. + + Implementations should use the OS credential store (Keychain, Credential Manager) + to securely store the master encryption key. + """ + + def get_or_create_master_key(self) -> bytes: + """Retrieve or generate the master encryption key. + + If the master key doesn't exist, generates a new 32-byte key + and stores it in the OS keychain. + + Returns: + 32-byte master key. + + Raises: + RuntimeError: If keychain is unavailable or locked. + """ + ... + + def delete_master_key(self) -> None: + """Delete the master key from the keychain. + + This renders all encrypted data permanently unrecoverable. + + Safe to call if key doesn't exist. + """ + ... + + def has_master_key(self) -> bool: + """Check if master key exists in the keychain. + + Returns: + True if master key exists. + """ + ... + + +class CryptoBox(Protocol): + """Protocol for envelope encryption with per-meeting keys. + + Uses a master key to wrap/unwrap Data Encryption Keys (DEKs), + which are used to encrypt actual meeting data. + """ + + def generate_dek(self) -> bytes: + """Generate a new Data Encryption Key. + + Returns: + 32-byte random DEK. + """ + ... + + def wrap_dek(self, dek: bytes) -> bytes: + """Encrypt DEK with master key. + + Args: + dek: Data Encryption Key to wrap. + + Returns: + Encrypted DEK (can be stored in DB). + """ + ... + + def unwrap_dek(self, wrapped_dek: bytes) -> bytes: + """Decrypt DEK with master key. + + Args: + wrapped_dek: Encrypted DEK from wrap_dek(). + + Returns: + Original DEK. + + Raises: + ValueError: If decryption fails (invalid or tampered). + """ + ... + + def encrypt_chunk(self, plaintext: bytes, dek: bytes) -> EncryptedChunk: + """Encrypt a chunk of data with AES-GCM. + + Args: + plaintext: Data to encrypt. + dek: Data Encryption Key. + + Returns: + EncryptedChunk with nonce, ciphertext, and tag. + """ + ... + + def decrypt_chunk(self, chunk: EncryptedChunk, dek: bytes) -> bytes: + """Decrypt a chunk of data. + + Args: + chunk: EncryptedChunk to decrypt. + dek: Data Encryption Key. + + Returns: + Original plaintext. + + Raises: + ValueError: If decryption fails (invalid or tampered). + """ + ... + + +class EncryptedAssetWriter(Protocol): + """Protocol for streaming encrypted audio writer. + + Writes audio chunks encrypted with a DEK to a file. + """ + + def open(self, path: Path, dek: bytes) -> None: + """Open file for writing. + + Args: + path: Path to the encrypted file. + dek: Data Encryption Key for this file. + + Raises: + RuntimeError: If already open. + OSError: If file cannot be created. + """ + ... + + def write_chunk(self, audio_bytes: bytes) -> None: + """Write and encrypt an audio chunk. + + Args: + audio_bytes: Raw audio data to encrypt and write. + + Raises: + RuntimeError: If not open. + """ + ... + + def close(self) -> None: + """Finalize and close the file. + + Safe to call if already closed. + """ + ... + + @property + def is_open(self) -> bool: + """Check if file is open for writing.""" + ... + + @property + def bytes_written(self) -> int: + """Total encrypted bytes written.""" + ... + + +class EncryptedAssetReader(Protocol): + """Protocol for streaming encrypted audio reader. + + Reads and decrypts audio chunks from a file. + """ + + def open(self, path: Path, dek: bytes) -> None: + """Open file for reading. + + Args: + path: Path to the encrypted file. + dek: Data Encryption Key for this file. + + Raises: + RuntimeError: If already open. + OSError: If file cannot be read. + ValueError: If file format is invalid. + """ + ... + + def read_chunks(self) -> Iterator[bytes]: + """Yield decrypted audio chunks. + + Yields: + Decrypted audio data chunks. + + Raises: + RuntimeError: If not open. + ValueError: If decryption fails. + """ + ... + + def close(self) -> None: + """Close the file. + + Safe to call if already closed. + """ + ... + + @property + def is_open(self) -> bool: + """Check if file is open for reading.""" + ... +```` + +## File: src/noteflow/infrastructure/summarization/citation_verifier.py +````python +"""Citation verification implementation.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from noteflow.domain.summarization import CitationVerificationResult + +if TYPE_CHECKING: + from collections.abc import Sequence + + from noteflow.domain.entities import Segment, Summary + + +class SegmentCitationVerifier: + """Verify that summary citations reference valid segments. + + Checks that all segment_ids in key points and action items + correspond to actual segments in the transcript. + """ + + def verify_citations( + self, + summary: Summary, + segments: Sequence[Segment], + ) -> CitationVerificationResult: + """Verify all segment_ids exist in the transcript. + + Args: + summary: Summary with key points and action items to verify. + segments: Available transcript segments. + + Returns: + CitationVerificationResult with validation status and details. + """ + # Build set of valid segment IDs + valid_segment_ids = {seg.segment_id for seg in segments} + + # Track invalid citations + invalid_key_point_indices: list[int] = [] + invalid_action_item_indices: list[int] = [] + missing_segment_ids: set[int] = set() + + # Verify key points + for idx, key_point in enumerate(summary.key_points): + for seg_id in key_point.segment_ids: + if seg_id not in valid_segment_ids: + if idx not in invalid_key_point_indices: + invalid_key_point_indices.append(idx) + missing_segment_ids.add(seg_id) + + # Verify action items + for idx, action_item in enumerate(summary.action_items): + for seg_id in action_item.segment_ids: + if seg_id not in valid_segment_ids: + if idx not in invalid_action_item_indices: + invalid_action_item_indices.append(idx) + missing_segment_ids.add(seg_id) + + is_valid = not invalid_key_point_indices and not invalid_action_item_indices + + return CitationVerificationResult( + is_valid=is_valid, + invalid_key_point_indices=tuple(invalid_key_point_indices), + invalid_action_item_indices=tuple(invalid_action_item_indices), + missing_segment_ids=tuple(sorted(missing_segment_ids)), + ) + + def filter_invalid_citations( + self, + summary: Summary, + segments: Sequence[Segment], + ) -> Summary: + """Return a copy of the summary with invalid citations removed. + + Invalid segment_ids are removed from key points and action items. + Items with no remaining citations keep empty segment_ids lists. + + Args: + summary: Summary to filter. + segments: Available transcript segments. + + Returns: + New Summary with invalid citations removed. + """ + valid_segment_ids = {seg.segment_id for seg in segments} + + # Filter key point citations + from noteflow.domain.entities import ActionItem, KeyPoint + from noteflow.domain.entities import Summary as SummaryEntity + + filtered_key_points = [ + KeyPoint( + text=kp.text, + segment_ids=[sid for sid in kp.segment_ids if sid in valid_segment_ids], + start_time=kp.start_time, + end_time=kp.end_time, + db_id=kp.db_id, + ) + for kp in summary.key_points + ] + + # Filter action item citations + filtered_action_items = [ + ActionItem( + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date, + priority=ai.priority, + segment_ids=[sid for sid in ai.segment_ids if sid in valid_segment_ids], + db_id=ai.db_id, + ) + for ai in summary.action_items + ] + + return SummaryEntity( + meeting_id=summary.meeting_id, + executive_summary=summary.executive_summary, + key_points=filtered_key_points, + action_items=filtered_action_items, + generated_at=summary.generated_at, + model_version=summary.model_version, + db_id=summary.db_id, + ) +```` + +## File: src/noteflow/infrastructure/summarization/factory.py +````python +"""Factory for creating configured SummarizationService instances.""" + +from __future__ import annotations + +import logging + +from noteflow.application.services.summarization_service import ( + SummarizationMode, + SummarizationService, + SummarizationServiceSettings, +) +from noteflow.infrastructure.summarization.citation_verifier import SegmentCitationVerifier +from noteflow.infrastructure.summarization.mock_provider import MockSummarizer +from noteflow.infrastructure.summarization.ollama_provider import OllamaSummarizer + +logger = logging.getLogger(__name__) + + +def create_summarization_service( + default_mode: SummarizationMode = SummarizationMode.LOCAL, + include_local: bool = True, + include_mock: bool = True, + verify_citations: bool = True, + filter_invalid_citations: bool = True, +) -> SummarizationService: + """Create a fully-configured SummarizationService. + + Auto-detects provider availability. Falls back to MOCK if LOCAL unavailable. + + Args: + default_mode: Preferred summarization mode. + include_local: Register OllamaSummarizer (checked at runtime). + include_mock: Register MockSummarizer (always available). + verify_citations: Enable citation verification. + filter_invalid_citations: Remove invalid citations from output. + + Returns: + Configured SummarizationService ready for use. + """ + service = SummarizationService( + settings=SummarizationServiceSettings( + default_mode=default_mode, + fallback_to_local=True, # Enables LOCAL → MOCK fallback + verify_citations=verify_citations, + filter_invalid_citations=filter_invalid_citations, + ), + ) + + # Always register MOCK as fallback + if include_mock: + service.register_provider(SummarizationMode.MOCK, MockSummarizer()) + logger.debug("Registered MOCK summarization provider") + + # Register LOCAL (Ollama) - availability checked at runtime + if include_local: + ollama = OllamaSummarizer() + service.register_provider(SummarizationMode.LOCAL, ollama) + if ollama.is_available: + logger.info("Registered LOCAL (Ollama) summarization provider - available") + else: + logger.info( + "Registered LOCAL (Ollama) summarization provider - unavailable, will fallback" + ) + + # Set citation verifier + if verify_citations: + service.set_verifier(SegmentCitationVerifier()) + logger.debug("Citation verification enabled") + + return service +```` + +## File: src/noteflow/infrastructure/triggers/__init__.py +````python +"""Trigger infrastructure module. + +Provide signal providers for meeting detection triggers. +""" + +from noteflow.infrastructure.triggers.app_audio import AppAudioProvider, AppAudioSettings +from noteflow.infrastructure.triggers.audio_activity import ( + AudioActivityProvider, + AudioActivitySettings, +) +from noteflow.infrastructure.triggers.calendar import CalendarProvider, CalendarSettings +from noteflow.infrastructure.triggers.foreground_app import ( + ForegroundAppProvider, + ForegroundAppSettings, +) + +__all__ = [ + "AppAudioProvider", + "AppAudioSettings", + "AudioActivityProvider", + "AudioActivitySettings", + "CalendarProvider", + "CalendarSettings", + "ForegroundAppProvider", + "ForegroundAppSettings", +] +```` + +## File: src/noteflow/infrastructure/triggers/foreground_app.py +````python +"""Foreground app detection using PyWinCtl. + +Detect meeting applications in the foreground window. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field + +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource + +logger = logging.getLogger(__name__) + + +@dataclass +class ForegroundAppSettings: + """Configuration for foreground app detection. + + Attributes: + enabled: Whether foreground app detection is enabled. + weight: Confidence weight contributed by this provider. + meeting_apps: Set of app name substrings to match (lowercase). + suppressed_apps: Apps to ignore even if they match meeting_apps. + """ + + enabled: bool + weight: float + meeting_apps: set[str] = field(default_factory=set) + suppressed_apps: set[str] = field(default_factory=set) + + def __post_init__(self) -> None: + self.meeting_apps = {app.lower() for app in self.meeting_apps} + self.suppressed_apps = {app.lower() for app in self.suppressed_apps} + + +class ForegroundAppProvider: + """Detect meeting apps in foreground using PyWinCtl. + + PyWinCtl provides cross-platform active window detection for + Linux (X11/Wayland), macOS, and Windows. + """ + + def __init__(self, settings: ForegroundAppSettings) -> None: + """Initialize foreground app provider. + + Args: + settings: Configuration settings for foreground app detection. + """ + self._settings = settings + self._available: bool | None = None + + @property + def source(self) -> TriggerSource: + """Get the source type for this provider.""" + return TriggerSource.FOREGROUND_APP + + @property + def max_weight(self) -> float: + """Get the maximum weight this provider can contribute.""" + return self._settings.weight + + def is_enabled(self) -> bool: + """Check if this provider is enabled and available.""" + return self._settings.enabled and self._is_available() + + def _is_available(self) -> bool: + """Check if PyWinCtl is available and working.""" + if self._available is not None: + return self._available + + try: + import pywinctl + + # Try to get active window to verify it works + _ = pywinctl.getActiveWindow() + self._available = True + logger.debug("PyWinCtl available for foreground detection") + except ImportError: + self._available = False + logger.warning("PyWinCtl not installed - foreground detection disabled") + except Exception as e: + self._available = False + logger.warning("PyWinCtl unavailable: %s - foreground detection disabled", e) + + return self._available + + def get_signal(self) -> TriggerSignal | None: + """Get current signal if meeting app is in foreground. + + Returns: + TriggerSignal if a meeting app is detected, None otherwise. + """ + if not self.is_enabled(): + return None + + try: + import pywinctl + + window = pywinctl.getActiveWindow() + if not window: + return None + + title = window.title + if not title: + return None + + title_lower = title.lower() + + # Check if app is suppressed + for suppressed in self._settings.suppressed_apps: + if suppressed in title_lower: + return None + + # Check if it's a meeting app + for app in self._settings.meeting_apps: + if app in title_lower: + return TriggerSignal( + source=self.source, + weight=self.max_weight, + app_name=title, + ) + + except Exception as e: + logger.debug("Foreground detection error: %s", e) + + return None + + def suppress_app(self, app_name: str) -> None: + """Add an app to the suppression list. + + Args: + app_name: App name substring to suppress (will be lowercased). + """ + self._settings.suppressed_apps.add(app_name.lower()) + logger.info("Suppressed app: %s", app_name) + + def unsuppress_app(self, app_name: str) -> None: + """Remove an app from the suppression list. + + Args: + app_name: App name substring to unsuppress. + """ + self._settings.suppressed_apps.discard(app_name.lower()) + + def add_meeting_app(self, app_name: str) -> None: + """Add an app to the meeting apps list. + + Args: + app_name: App name substring to add (will be lowercased). + """ + self._settings.meeting_apps.add(app_name.lower()) + + @property + def suppressed_apps(self) -> frozenset[str]: + """Get current suppressed apps.""" + return frozenset(self._settings.suppressed_apps) +```` + +## File: src/noteflow/__init__.py +````python +"""NoteFlow - Intelligent Meeting Notetaker.""" + +__version__ = "0.1.0" +```` + +## File: src/noteflow_pb2.py +````python +# Compatibility shim for generated gRPC stubs. +# The generated `noteflow_pb2_grpc.py` imports a top-level `noteflow_pb2` module. +# Re-export the packaged definitions to satisfy that import while keeping the +# compiled protobufs under `noteflow.grpc.proto`. +from noteflow.grpc.proto.noteflow_pb2 import * # noqa: F401,F403 +```` + +## File: tests/application/__init__.py +````python +"""Application layer unit tests.""" +```` + +## File: tests/application/test_recovery_service.py +````python +"""Tests for RecoveryService application service.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from noteflow.application.services.recovery_service import RecoveryService +from noteflow.domain.entities import Meeting +from noteflow.domain.value_objects import MeetingState + + +@pytest.fixture +def mock_uow() -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.meetings = MagicMock() + return uow + + +class TestRecoveryServiceRecovery: + """Tests for crash recovery operations.""" + + async def test_recover_no_crashed_meetings(self, mock_uow: MagicMock) -> None: + """Test recovery with no crashed meetings.""" + mock_uow.meetings.list_all = AsyncMock(return_value=([], 0)) + + service = RecoveryService(mock_uow) + result = await service.recover_crashed_meetings() + + assert result == [] + mock_uow.commit.assert_not_called() + + async def test_recover_single_recording_meeting(self, mock_uow: MagicMock) -> None: + """Test recovery of a meeting left in RECORDING state.""" + meeting = Meeting.create(title="Crashed Recording") + meeting.start_recording() # Put in RECORDING state + assert meeting.state == MeetingState.RECORDING + + mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1)) + mock_uow.meetings.update = AsyncMock(return_value=meeting) + + service = RecoveryService(mock_uow) + result = await service.recover_crashed_meetings() + + assert len(result) == 1 + assert result[0].state == MeetingState.ERROR + assert result[0].metadata["crash_recovered"] == "true" + assert result[0].metadata["crash_previous_state"] == "RECORDING" + assert "crash_recovery_time" in result[0].metadata + mock_uow.meetings.update.assert_called_once() + mock_uow.commit.assert_called_once() + + async def test_recover_single_stopping_meeting(self, mock_uow: MagicMock) -> None: + """Test recovery of a meeting left in STOPPING state.""" + meeting = Meeting.create(title="Crashed Stopping") + meeting.start_recording() + meeting.begin_stopping() # Put in STOPPING state + assert meeting.state == MeetingState.STOPPING + + mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1)) + mock_uow.meetings.update = AsyncMock(return_value=meeting) + + service = RecoveryService(mock_uow) + result = await service.recover_crashed_meetings() + + assert len(result) == 1 + assert result[0].state == MeetingState.ERROR + assert result[0].metadata["crash_previous_state"] == "STOPPING" + mock_uow.commit.assert_called_once() + + async def test_recover_multiple_crashed_meetings(self, mock_uow: MagicMock) -> None: + """Test recovery of multiple crashed meetings.""" + meeting1 = Meeting.create(title="Crashed 1") + meeting1.start_recording() + + meeting2 = Meeting.create(title="Crashed 2") + meeting2.start_recording() + meeting2.begin_stopping() + + meeting3 = Meeting.create(title="Crashed 3") + meeting3.start_recording() + + meetings = [meeting1, meeting2, meeting3] + mock_uow.meetings.list_all = AsyncMock(return_value=(meetings, 3)) + mock_uow.meetings.update = AsyncMock(side_effect=meetings) + + service = RecoveryService(mock_uow) + result = await service.recover_crashed_meetings() + + assert len(result) == 3 + assert all(m.state == MeetingState.ERROR for m in result) + assert result[0].metadata["crash_previous_state"] == "RECORDING" + assert result[1].metadata["crash_previous_state"] == "STOPPING" + assert result[2].metadata["crash_previous_state"] == "RECORDING" + assert mock_uow.meetings.update.call_count == 3 + mock_uow.commit.assert_called_once() + + +class TestRecoveryServiceCounting: + """Tests for counting crashed meetings.""" + + async def test_count_no_crashed_meetings(self, mock_uow: MagicMock) -> None: + """Test counting with no crashed meetings.""" + mock_uow.meetings.count_by_state = AsyncMock(return_value=0) + + service = RecoveryService(mock_uow) + result = await service.count_crashed_meetings() + + assert result == 0 + assert mock_uow.meetings.count_by_state.call_count == 2 + + async def test_count_crashed_meetings_both_states(self, mock_uow: MagicMock) -> None: + """Test counting meetings in both active states.""" + + async def count_by_state(state: MeetingState) -> int: + state_counts = { + MeetingState.RECORDING: 3, + MeetingState.STOPPING: 2, + } + return state_counts.get(state, 0) + + mock_uow.meetings.count_by_state = AsyncMock(side_effect=count_by_state) + + service = RecoveryService(mock_uow) + result = await service.count_crashed_meetings() + + assert result == 5 # 3 RECORDING + 2 STOPPING + + +class TestRecoveryServiceMetadata: + """Tests for recovery metadata handling.""" + + async def test_recovery_preserves_existing_metadata(self, mock_uow: MagicMock) -> None: + """Test recovery preserves existing meeting metadata.""" + meeting = Meeting.create( + title="Has Metadata", + metadata={"project": "NoteFlow", "important": "yes"}, + ) + meeting.start_recording() + + mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1)) + mock_uow.meetings.update = AsyncMock(return_value=meeting) + + service = RecoveryService(mock_uow) + result = await service.recover_crashed_meetings() + + assert len(result) == 1 + # Verify original metadata preserved + assert result[0].metadata["project"] == "NoteFlow" + assert result[0].metadata["important"] == "yes" + # Verify recovery metadata added + assert result[0].metadata["crash_recovered"] == "true" + assert result[0].metadata["crash_previous_state"] == "RECORDING" +```` + +## File: tests/domain/__init__.py +````python +"""Domain unit tests.""" +```` + +## File: tests/domain/test_annotation.py +````python +"""Tests for Annotation entity.""" + +from __future__ import annotations + +from uuid import uuid4 + +import pytest + +from noteflow.domain.entities.annotation import Annotation +from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId + + +class TestAnnotation: + """Tests for Annotation entity.""" + + def test_annotation_valid(self) -> None: + """Annotation can be created with valid fields.""" + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=MeetingId(uuid4()), + annotation_type=AnnotationType.NOTE, + text="Important point", + start_time=1.0, + end_time=2.0, + ) + + assert annotation.text == "Important point" + assert annotation.duration == 1.0 + assert annotation.has_segments() is False + + def test_annotation_invalid_times_raises(self) -> None: + """Annotation raises when end_time < start_time.""" + with pytest.raises(ValueError, match=r"end_time .* must be >= start_time"): + Annotation( + id=AnnotationId(uuid4()), + meeting_id=MeetingId(uuid4()), + annotation_type=AnnotationType.DECISION, + text="Bad timing", + start_time=5.0, + end_time=2.0, + ) + + def test_annotation_has_segments(self) -> None: + """has_segments reflects segment_ids list.""" + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=MeetingId(uuid4()), + annotation_type=AnnotationType.ACTION_ITEM, + text="Follow up", + start_time=0.0, + end_time=1.0, + segment_ids=[1, 2], + ) + + assert annotation.has_segments() is True + assert annotation.duration == 1.0 +```` + +## File: tests/domain/test_segment.py +````python +"""Tests for Segment and WordTiming entities.""" + +from __future__ import annotations + +import pytest + +from noteflow.domain.entities.segment import Segment, WordTiming + + +class TestWordTiming: + """Tests for WordTiming entity.""" + + def test_word_timing_valid(self) -> None: + """Test creating valid WordTiming.""" + word = WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=0.95) + assert word.word == "hello" + assert word.start_time == 0.0 + assert word.end_time == 0.5 + assert word.probability == 0.95 + + def test_word_timing_invalid_times_raises(self) -> None: + """Test WordTiming raises on end_time < start_time.""" + with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"): + WordTiming(word="hello", start_time=1.0, end_time=0.5, probability=0.9) + + @pytest.mark.parametrize("prob", [-0.1, 1.1, 2.0]) + def test_word_timing_invalid_probability_raises(self, prob: float) -> None: + """Test WordTiming raises on invalid probability.""" + with pytest.raises(ValueError, match="probability must be between 0 and 1"): + WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=prob) + + @pytest.mark.parametrize("prob", [0.0, 0.5, 1.0]) + def test_word_timing_valid_probability_bounds(self, prob: float) -> None: + """Test WordTiming accepts probability at boundaries.""" + word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=prob) + assert word.probability == prob + + +class TestSegment: + """Tests for Segment entity.""" + + def test_segment_valid(self) -> None: + """Test creating valid Segment.""" + segment = Segment( + segment_id=0, + text="Hello world", + start_time=0.0, + end_time=2.5, + language="en", + ) + assert segment.segment_id == 0 + assert segment.text == "Hello world" + assert segment.start_time == 0.0 + assert segment.end_time == 2.5 + assert segment.language == "en" + + def test_segment_invalid_times_raises(self) -> None: + """Test Segment raises on end_time < start_time.""" + with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"): + Segment(segment_id=0, text="test", start_time=5.0, end_time=1.0) + + def test_segment_invalid_id_raises(self) -> None: + """Test Segment raises on negative segment_id.""" + with pytest.raises(ValueError, match="segment_id must be non-negative"): + Segment(segment_id=-1, text="test", start_time=0.0, end_time=1.0) + + def test_segment_duration(self) -> None: + """Test duration property calculation.""" + segment = Segment(segment_id=0, text="test", start_time=1.5, end_time=4.0) + assert segment.duration == 2.5 + + def test_segment_word_count_from_text(self) -> None: + """Test word_count from text when no words list.""" + segment = Segment(segment_id=0, text="Hello beautiful world", start_time=0.0, end_time=1.0) + assert segment.word_count == 3 + + def test_segment_word_count_from_words(self) -> None: + """Test word_count from words list when provided.""" + words = [ + WordTiming(word="Hello", start_time=0.0, end_time=0.3, probability=0.9), + WordTiming(word="world", start_time=0.3, end_time=0.5, probability=0.95), + ] + segment = Segment( + segment_id=0, + text="Hello world", + start_time=0.0, + end_time=0.5, + words=words, + ) + assert segment.word_count == 2 + + def test_segment_has_embedding_false(self) -> None: + """Test has_embedding returns False when no embedding.""" + segment = Segment(segment_id=0, text="test", start_time=0.0, end_time=1.0) + assert segment.has_embedding() is False + + def test_segment_has_embedding_empty_list(self) -> None: + """Test has_embedding returns False for empty embedding list.""" + segment = Segment(segment_id=0, text="test", start_time=0.0, end_time=1.0, embedding=[]) + assert segment.has_embedding() is False + + def test_segment_has_embedding_true(self) -> None: + """Test has_embedding returns True when embedding exists.""" + segment = Segment( + segment_id=0, + text="test", + start_time=0.0, + end_time=1.0, + embedding=[0.1, 0.2, 0.3], + ) + assert segment.has_embedding() is True +```` + +## File: tests/domain/test_summary.py +````python +"""Tests for Summary, KeyPoint, and ActionItem entities.""" + +from __future__ import annotations + +from datetime import datetime +from uuid import uuid4 + +import pytest + +from noteflow.domain.entities.summary import ActionItem, KeyPoint, Summary +from noteflow.domain.value_objects import MeetingId + + +class TestKeyPoint: + """Tests for KeyPoint entity.""" + + def test_key_point_basic(self) -> None: + """Test creating basic KeyPoint.""" + kp = KeyPoint(text="Important discussion about architecture") + assert kp.text == "Important discussion about architecture" + assert kp.segment_ids == [] + assert kp.start_time == 0.0 + assert kp.end_time == 0.0 + + def test_key_point_has_evidence_false(self) -> None: + """Test has_evidence returns False when no segment_ids.""" + kp = KeyPoint(text="No evidence") + assert kp.has_evidence() is False + + def test_key_point_has_evidence_true(self) -> None: + """Test has_evidence returns True with segment_ids.""" + kp = KeyPoint(text="With evidence", segment_ids=[1, 2, 3]) + assert kp.has_evidence() is True + + def test_key_point_with_timing(self) -> None: + """Test KeyPoint with timing information.""" + kp = KeyPoint( + text="Timed point", + segment_ids=[0, 1], + start_time=10.5, + end_time=25.0, + ) + assert kp.start_time == 10.5 + assert kp.end_time == 25.0 + + +class TestActionItem: + """Tests for ActionItem entity.""" + + def test_action_item_basic(self) -> None: + """Test creating basic ActionItem.""" + ai = ActionItem(text="Review PR #123") + assert ai.text == "Review PR #123" + assert ai.assignee == "" + assert ai.due_date is None + assert ai.priority == 0 + assert ai.segment_ids == [] + + def test_action_item_has_evidence_false(self) -> None: + """Test has_evidence returns False when no segment_ids.""" + ai = ActionItem(text="Task without evidence") + assert ai.has_evidence() is False + + def test_action_item_has_evidence_true(self) -> None: + """Test has_evidence returns True with segment_ids.""" + ai = ActionItem(text="Task with evidence", segment_ids=[5]) + assert ai.has_evidence() is True + + def test_action_item_is_assigned_false(self) -> None: + """Test is_assigned returns False when no assignee.""" + ai = ActionItem(text="Unassigned task") + assert ai.is_assigned() is False + + def test_action_item_is_assigned_true(self) -> None: + """Test is_assigned returns True with assignee.""" + ai = ActionItem(text="Assigned task", assignee="Alice") + assert ai.is_assigned() is True + + def test_action_item_has_due_date_false(self) -> None: + """Test has_due_date returns False when no due_date.""" + ai = ActionItem(text="No deadline") + assert ai.has_due_date() is False + + def test_action_item_has_due_date_true(self) -> None: + """Test has_due_date returns True with due_date.""" + ai = ActionItem(text="With deadline", due_date=datetime(2024, 12, 31)) + assert ai.has_due_date() is True + + +class TestSummary: + """Tests for Summary entity.""" + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Provide a meeting ID for tests.""" + return MeetingId(uuid4()) + + def test_summary_basic(self, meeting_id: MeetingId) -> None: + """Test creating basic Summary.""" + summary = Summary(meeting_id=meeting_id) + assert summary.meeting_id == meeting_id + assert summary.executive_summary == "" + assert summary.key_points == [] + assert summary.action_items == [] + assert summary.generated_at is None + assert summary.model_version == "" + + def test_summary_key_point_count(self, meeting_id: MeetingId) -> None: + """Test key_point_count property.""" + summary = Summary( + meeting_id=meeting_id, + key_points=[ + KeyPoint(text="Point 1"), + KeyPoint(text="Point 2"), + KeyPoint(text="Point 3"), + ], + ) + assert summary.key_point_count == 3 + + def test_summary_action_item_count(self, meeting_id: MeetingId) -> None: + """Test action_item_count property.""" + summary = Summary( + meeting_id=meeting_id, + action_items=[ + ActionItem(text="Task 1"), + ActionItem(text="Task 2"), + ], + ) + assert summary.action_item_count == 2 + + def test_all_points_have_evidence_true(self, meeting_id: MeetingId) -> None: + """Test all_points_have_evidence returns True when all evidenced.""" + summary = Summary( + meeting_id=meeting_id, + key_points=[ + KeyPoint(text="Point 1", segment_ids=[0]), + KeyPoint(text="Point 2", segment_ids=[1, 2]), + ], + ) + assert summary.all_points_have_evidence() is True + + def test_all_points_have_evidence_false(self, meeting_id: MeetingId) -> None: + """Test all_points_have_evidence returns False when some unevidenced.""" + summary = Summary( + meeting_id=meeting_id, + key_points=[ + KeyPoint(text="Point 1", segment_ids=[0]), + KeyPoint(text="Point 2"), # No evidence + ], + ) + assert summary.all_points_have_evidence() is False + + def test_all_actions_have_evidence_true(self, meeting_id: MeetingId) -> None: + """Test all_actions_have_evidence returns True when all evidenced.""" + summary = Summary( + meeting_id=meeting_id, + action_items=[ + ActionItem(text="Task 1", segment_ids=[0]), + ], + ) + assert summary.all_actions_have_evidence() is True + + def test_all_actions_have_evidence_false(self, meeting_id: MeetingId) -> None: + """Test all_actions_have_evidence returns False when some unevidenced.""" + summary = Summary( + meeting_id=meeting_id, + action_items=[ + ActionItem(text="Task 1"), # No evidence + ], + ) + assert summary.all_actions_have_evidence() is False + + def test_is_fully_evidenced_true(self, meeting_id: MeetingId) -> None: + """Test is_fully_evidenced returns True when all items evidenced.""" + summary = Summary( + meeting_id=meeting_id, + key_points=[KeyPoint(text="KP", segment_ids=[0])], + action_items=[ActionItem(text="AI", segment_ids=[1])], + ) + assert summary.is_fully_evidenced() is True + + def test_is_fully_evidenced_false_points(self, meeting_id: MeetingId) -> None: + """Test is_fully_evidenced returns False with unevidenced points.""" + summary = Summary( + meeting_id=meeting_id, + key_points=[KeyPoint(text="KP")], # No evidence + action_items=[ActionItem(text="AI", segment_ids=[1])], + ) + assert summary.is_fully_evidenced() is False + + def test_unevidenced_points(self, meeting_id: MeetingId) -> None: + """Test unevidenced_points property filters correctly.""" + kp_no_evidence = KeyPoint(text="No evidence") + kp_with_evidence = KeyPoint(text="With evidence", segment_ids=[0]) + summary = Summary( + meeting_id=meeting_id, + key_points=[kp_no_evidence, kp_with_evidence], + ) + unevidenced = summary.unevidenced_points + assert len(unevidenced) == 1 + assert unevidenced[0] == kp_no_evidence + + def test_unevidenced_actions(self, meeting_id: MeetingId) -> None: + """Test unevidenced_actions property filters correctly.""" + ai_no_evidence = ActionItem(text="No evidence") + ai_with_evidence = ActionItem(text="With evidence", segment_ids=[0]) + summary = Summary( + meeting_id=meeting_id, + action_items=[ai_no_evidence, ai_with_evidence], + ) + unevidenced = summary.unevidenced_actions + assert len(unevidenced) == 1 + assert unevidenced[0] == ai_no_evidence +```` + +## File: tests/domain/test_value_objects.py +````python +"""Tests for domain value objects.""" + +from __future__ import annotations + +from uuid import UUID + +import pytest + +from noteflow.domain.value_objects import MeetingId, MeetingState + + +class TestMeetingState: + """Tests for MeetingState enum.""" + + @pytest.mark.parametrize( + ("current", "target", "expected"), + [ + # UNSPECIFIED transitions + (MeetingState.UNSPECIFIED, MeetingState.CREATED, True), + (MeetingState.UNSPECIFIED, MeetingState.RECORDING, False), + # CREATED transitions + (MeetingState.CREATED, MeetingState.RECORDING, True), + (MeetingState.CREATED, MeetingState.ERROR, True), + (MeetingState.CREATED, MeetingState.STOPPED, False), + # RECORDING transitions (must go through STOPPING) + (MeetingState.RECORDING, MeetingState.STOPPING, True), + (MeetingState.RECORDING, MeetingState.STOPPED, False), + (MeetingState.RECORDING, MeetingState.ERROR, True), + (MeetingState.RECORDING, MeetingState.CREATED, False), + # STOPPING transitions + (MeetingState.STOPPING, MeetingState.STOPPED, True), + (MeetingState.STOPPING, MeetingState.ERROR, True), + (MeetingState.STOPPING, MeetingState.RECORDING, False), + (MeetingState.STOPPING, MeetingState.CREATED, False), + # STOPPED transitions + (MeetingState.STOPPED, MeetingState.COMPLETED, True), + (MeetingState.STOPPED, MeetingState.ERROR, True), + (MeetingState.STOPPED, MeetingState.RECORDING, False), + # COMPLETED transitions + (MeetingState.COMPLETED, MeetingState.ERROR, True), + (MeetingState.COMPLETED, MeetingState.RECORDING, False), + # ERROR is terminal + (MeetingState.ERROR, MeetingState.CREATED, False), + (MeetingState.ERROR, MeetingState.RECORDING, False), + ], + ) + def test_can_transition_to( + self, + current: MeetingState, + target: MeetingState, + expected: bool, + ) -> None: + """Test state transition validation.""" + assert current.can_transition_to(target) == expected + + @pytest.mark.parametrize( + ("value", "expected"), + [ + (0, MeetingState.UNSPECIFIED), + (1, MeetingState.CREATED), + (2, MeetingState.RECORDING), + (3, MeetingState.STOPPED), + (4, MeetingState.COMPLETED), + (5, MeetingState.ERROR), + (6, MeetingState.STOPPING), + ], + ) + def test_from_int_valid(self, value: int, expected: MeetingState) -> None: + """Test conversion from valid integers.""" + assert MeetingState.from_int(value) == expected + + def test_from_int_invalid_raises(self) -> None: + """Test conversion from invalid integer raises ValueError.""" + with pytest.raises(ValueError, match="Invalid meeting state"): + MeetingState.from_int(99) + + +class TestMeetingId: + """Tests for MeetingId NewType.""" + + def test_meeting_id_is_uuid(self) -> None: + """Test MeetingId wraps UUID.""" + uuid = UUID("12345678-1234-5678-1234-567812345678") + meeting_id = MeetingId(uuid) + assert meeting_id == uuid + + def test_meeting_id_string_conversion(self) -> None: + """Test MeetingId can be converted to string.""" + uuid = UUID("12345678-1234-5678-1234-567812345678") + meeting_id = MeetingId(uuid) + assert str(meeting_id) == "12345678-1234-5678-1234-567812345678" +```` + +## File: tests/grpc/__init__.py +````python +"""gRPC service tests.""" +```` + +## File: tests/grpc/test_partial_transcription.py +````python +"""Tests for partial transcription in the gRPC service.""" + +from __future__ import annotations + +import time +from dataclasses import dataclass +from unittest.mock import MagicMock + +import numpy as np +import pytest +from numpy.typing import NDArray + +from noteflow.grpc.service import NoteFlowServicer + + +@dataclass +class MockAsrResult: + """Mock ASR transcription result.""" + + text: str + start: float = 0.0 + end: float = 1.0 + language: str = "en" + language_probability: float = 0.99 + avg_logprob: float = -0.5 + no_speech_prob: float = 0.01 + + +def _create_mock_asr_engine(transcribe_results: list[str] | None = None) -> MagicMock: + """Create mock ASR engine with configurable transcription results.""" + engine = MagicMock() + engine.is_loaded = True + engine.model_size = "base" + + results = transcribe_results or ["Test transcription"] + + def _transcribe(_audio: NDArray[np.float32]) -> list[MockAsrResult]: + return [MockAsrResult(text=text) for text in results] + + async def _transcribe_async( + _audio: NDArray[np.float32], + _language: str | None = None, + ) -> list[MockAsrResult]: + return [MockAsrResult(text=text) for text in results] + + engine.transcribe = _transcribe + engine.transcribe_async = _transcribe_async + return engine + + +class TestPartialTranscriptionState: + """Tests for partial transcription state initialization.""" + + def test_init_streaming_state_creates_partial_buffer(self) -> None: + """Initialize streaming state should create empty partial buffer.""" + servicer = NoteFlowServicer() + + servicer._init_streaming_state("meeting-123", next_segment_id=0) + + assert "meeting-123" in servicer._partial_buffers + assert servicer._partial_buffers["meeting-123"] == [] + + def test_init_streaming_state_creates_last_partial_time(self) -> None: + """Initialize streaming state should set last partial time to now.""" + servicer = NoteFlowServicer() + before = time.time() + + servicer._init_streaming_state("meeting-123", next_segment_id=0) + + assert "meeting-123" in servicer._last_partial_time + assert servicer._last_partial_time["meeting-123"] >= before + + def test_init_streaming_state_creates_empty_last_text(self) -> None: + """Initialize streaming state should set last partial text to empty.""" + servicer = NoteFlowServicer() + + servicer._init_streaming_state("meeting-123", next_segment_id=0) + + assert "meeting-123" in servicer._last_partial_text + assert servicer._last_partial_text["meeting-123"] == "" + + def test_cleanup_streaming_state_removes_partial_state(self) -> None: + """Cleanup streaming state should remove all partial-related state.""" + servicer = NoteFlowServicer() + servicer._init_streaming_state("meeting-123", next_segment_id=0) + + servicer._cleanup_streaming_state("meeting-123") + + assert "meeting-123" not in servicer._partial_buffers + assert "meeting-123" not in servicer._last_partial_time + assert "meeting-123" not in servicer._last_partial_text + + +class TestClearPartialBuffer: + """Tests for _clear_partial_buffer method.""" + + def test_clear_partial_buffer_empties_buffer(self) -> None: + """Clear partial buffer should empty the audio buffer.""" + servicer = NoteFlowServicer() + servicer._partial_buffers["meeting-123"] = [np.zeros(1600, dtype=np.float32)] + + servicer._clear_partial_buffer("meeting-123") + + assert servicer._partial_buffers["meeting-123"] == [] + + def test_clear_partial_buffer_resets_last_text(self) -> None: + """Clear partial buffer should reset last partial text.""" + servicer = NoteFlowServicer() + servicer._last_partial_text["meeting-123"] = "Previous partial" + + servicer._clear_partial_buffer("meeting-123") + + assert servicer._last_partial_text["meeting-123"] == "" + + def test_clear_partial_buffer_updates_time(self) -> None: + """Clear partial buffer should update last partial time.""" + servicer = NoteFlowServicer() + servicer._last_partial_time["meeting-123"] = 0.0 + before = time.time() + + servicer._clear_partial_buffer("meeting-123") + + assert servicer._last_partial_time["meeting-123"] >= before + + def test_clear_partial_buffer_handles_missing_meeting(self) -> None: + """Clear partial buffer should handle missing meeting gracefully.""" + servicer = NoteFlowServicer() + + servicer._clear_partial_buffer("nonexistent") # Should not raise + + +class TestMaybeEmitPartial: + """Tests for _maybe_emit_partial method.""" + + @pytest.mark.asyncio + async def test_returns_none_when_asr_not_loaded(self) -> None: + """Return None when ASR engine is not loaded.""" + servicer = NoteFlowServicer() + servicer._init_streaming_state("meeting-123", next_segment_id=0) + + result = await servicer._maybe_emit_partial("meeting-123") + + assert result is None + + @pytest.mark.asyncio + async def test_returns_none_when_cadence_not_reached(self) -> None: + """Return None when not enough time has passed since last partial.""" + engine = _create_mock_asr_engine(["Test"]) + servicer = NoteFlowServicer(asr_engine=engine) + servicer._init_streaming_state("meeting-123", next_segment_id=0) + # Set last time to now (cadence not reached) + servicer._last_partial_time["meeting-123"] = time.time() + # Add some audio + audio = np.ones(16000, dtype=np.float32) * 0.1 # 1 second of audio + servicer._partial_buffers["meeting-123"].append(audio) + + result = await servicer._maybe_emit_partial("meeting-123") + + assert result is None + + @pytest.mark.asyncio + async def test_returns_none_when_buffer_empty(self) -> None: + """Return None when partial buffer is empty.""" + engine = _create_mock_asr_engine(["Test"]) + servicer = NoteFlowServicer(asr_engine=engine) + servicer._init_streaming_state("meeting-123", next_segment_id=0) + # Set last time to past (cadence reached) + servicer._last_partial_time["meeting-123"] = time.time() - 10.0 + + result = await servicer._maybe_emit_partial("meeting-123") + + assert result is None + + @pytest.mark.asyncio + async def test_returns_none_when_audio_too_short(self) -> None: + """Return None when buffered audio is less than minimum.""" + engine = _create_mock_asr_engine(["Test"]) + servicer = NoteFlowServicer(asr_engine=engine) + servicer._init_streaming_state("meeting-123", next_segment_id=0) + servicer._last_partial_time["meeting-123"] = time.time() - 10.0 + # Add only 0.1 seconds of audio (minimum is 0.5s) + audio = np.ones(1600, dtype=np.float32) * 0.1 # 0.1 second + servicer._partial_buffers["meeting-123"].append(audio) + + result = await servicer._maybe_emit_partial("meeting-123") + + assert result is None + + @pytest.mark.asyncio + async def test_emits_partial_when_conditions_met(self) -> None: + """Emit partial when cadence reached and sufficient audio buffered.""" + engine = _create_mock_asr_engine(["Hello world"]) + servicer = NoteFlowServicer(asr_engine=engine) + servicer._init_streaming_state("meeting-123", next_segment_id=0) + servicer._last_partial_time["meeting-123"] = time.time() - 10.0 + # Add 1 second of audio (above minimum of 0.5s) + audio = np.ones(16000, dtype=np.float32) * 0.1 + servicer._partial_buffers["meeting-123"].append(audio) + + result = await servicer._maybe_emit_partial("meeting-123") + + assert result is not None + assert result.update_type == 1 # UPDATE_TYPE_PARTIAL + assert result.partial_text == "Hello world" + assert result.meeting_id == "meeting-123" + + @pytest.mark.asyncio + async def test_debounces_duplicate_text(self) -> None: + """Return None when text is same as last partial (debounce).""" + engine = _create_mock_asr_engine(["Same text"]) + servicer = NoteFlowServicer(asr_engine=engine) + servicer._init_streaming_state("meeting-123", next_segment_id=0) + servicer._last_partial_time["meeting-123"] = time.time() - 10.0 + servicer._last_partial_text["meeting-123"] = "Same text" # Same as transcription + audio = np.ones(16000, dtype=np.float32) * 0.1 + servicer._partial_buffers["meeting-123"].append(audio) + + result = await servicer._maybe_emit_partial("meeting-123") + + assert result is None + + @pytest.mark.asyncio + async def test_updates_last_partial_state(self) -> None: + """Emitting partial should update last text and time.""" + engine = _create_mock_asr_engine(["New text"]) + servicer = NoteFlowServicer(asr_engine=engine) + servicer._init_streaming_state("meeting-123", next_segment_id=0) + servicer._last_partial_time["meeting-123"] = time.time() - 10.0 + audio = np.ones(16000, dtype=np.float32) * 0.1 + servicer._partial_buffers["meeting-123"].append(audio) + before = time.time() + + await servicer._maybe_emit_partial("meeting-123") + + assert servicer._last_partial_text["meeting-123"] == "New text" + assert servicer._last_partial_time["meeting-123"] >= before + + +class TestPartialCadence: + """Tests for partial transcription cadence constants.""" + + def test_partial_cadence_is_2_seconds(self) -> None: + """Partial cadence should be 2 seconds per spec.""" + assert NoteFlowServicer.PARTIAL_CADENCE_SECONDS == 2.0 + + def test_min_partial_audio_is_half_second(self) -> None: + """Minimum partial audio should be 0.5 seconds.""" + assert NoteFlowServicer.MIN_PARTIAL_AUDIO_SECONDS == 0.5 + + +class TestPartialBufferAccumulation: + """Tests for audio buffer accumulation during speech.""" + + @pytest.mark.asyncio + async def test_speech_audio_added_to_buffer(self) -> None: + """Speech audio should be accumulated in partial buffer.""" + engine = _create_mock_asr_engine() + servicer = NoteFlowServicer(asr_engine=engine) + servicer._init_streaming_state("meeting-123", next_segment_id=0) + + # Simulate speech detection by processing audio + audio = np.ones(1600, dtype=np.float32) * 0.1 + + # Mock VAD to return True (is_speech) + servicer._vad_instances["meeting-123"].process_chunk = MagicMock(return_value=True) + + updates = [] + async for update in servicer._process_audio_with_vad("meeting-123", audio): + updates.append(update) + + # Buffer should have audio added + assert len(servicer._partial_buffers["meeting-123"]) >= 1 + + @pytest.mark.asyncio + async def test_silence_does_not_add_to_buffer(self) -> None: + """Silent audio should not be added to partial buffer.""" + engine = _create_mock_asr_engine() + servicer = NoteFlowServicer(asr_engine=engine) + servicer._init_streaming_state("meeting-123", next_segment_id=0) + + audio = np.zeros(1600, dtype=np.float32) # Silence + + # Mock VAD to return False (is_silence) + servicer._vad_instances["meeting-123"].process_chunk = MagicMock(return_value=False) + + updates = [] + async for update in servicer._process_audio_with_vad("meeting-123", audio): + updates.append(update) + + # Buffer should still be empty + assert servicer._partial_buffers["meeting-123"] == [] + + +class TestPartialIntegrationWithFinal: + """Tests for partial buffer clearing when final segment emitted.""" + + @pytest.mark.asyncio + async def test_buffer_cleared_on_final_segment(self) -> None: + """Partial buffer should be cleared when a final segment is produced.""" + servicer = NoteFlowServicer() + servicer._init_streaming_state("meeting-123", next_segment_id=0) + + # Add some audio to buffer + audio = np.ones(16000, dtype=np.float32) * 0.1 + servicer._partial_buffers["meeting-123"].append(audio) + servicer._last_partial_text["meeting-123"] = "Some partial" + + # Clear buffer (simulates final segment emission) + servicer._clear_partial_buffer("meeting-123") + + assert servicer._partial_buffers["meeting-123"] == [] + assert servicer._last_partial_text["meeting-123"] == "" +```` + +## File: tests/infrastructure/asr/__init__.py +````python +"""ASR infrastructure tests.""" +```` + +## File: tests/infrastructure/audio/__init__.py +````python +"""Audio infrastructure tests package.""" +```` + +## File: tests/infrastructure/audio/conftest.py +````python +"""Test fixtures for audio infrastructure tests.""" + +from __future__ import annotations + +import numpy as np +import pytest +from numpy.typing import NDArray + +from noteflow.infrastructure.audio import TimestampedAudio + + +@pytest.fixture +def silence_audio() -> NDArray[np.float32]: + """Return silent audio (all zeros).""" + return np.zeros(1600, dtype=np.float32) # 100ms at 16kHz + + +@pytest.fixture +def full_scale_audio() -> NDArray[np.float32]: + """Return full-scale audio (all ones).""" + return np.ones(1600, dtype=np.float32) + + +@pytest.fixture +def half_scale_audio() -> NDArray[np.float32]: + """Return half-scale audio (all 0.5).""" + return np.full(1600, 0.5, dtype=np.float32) + + +@pytest.fixture +def sample_timestamped_audio() -> TimestampedAudio: + """Return sample timestamped audio chunk.""" + return TimestampedAudio( + frames=np.zeros(1600, dtype=np.float32), + timestamp=0.0, + duration=0.1, + ) + + +@pytest.fixture +def timestamped_audio_sequence() -> list[TimestampedAudio]: + """Return sequence of timestamped audio chunks for buffer tests.""" + return [ + TimestampedAudio( + frames=np.zeros(1600, dtype=np.float32), + timestamp=float(i) * 0.1, + duration=0.1, + ) + for i in range(10) + ] +```` + +## File: tests/infrastructure/audio/test_dto.py +````python +"""Tests for audio DTOs.""" + +from __future__ import annotations + +from dataclasses import FrozenInstanceError + +import numpy as np +import pytest + +from noteflow.infrastructure.audio import AudioDeviceInfo, TimestampedAudio + + +class TestAudioDeviceInfo: + """Tests for AudioDeviceInfo dataclass.""" + + def test_audio_device_info_creation(self) -> None: + """Test AudioDeviceInfo can be created with all fields.""" + device = AudioDeviceInfo( + device_id=0, + name="Test Microphone", + channels=2, + sample_rate=48000, + is_default=True, + ) + assert device.device_id == 0 + assert device.name == "Test Microphone" + assert device.channels == 2 + assert device.sample_rate == 48000 + assert device.is_default is True + + def test_audio_device_info_frozen(self) -> None: + """Test AudioDeviceInfo is immutable (frozen).""" + device = AudioDeviceInfo( + device_id=0, + name="Test", + channels=1, + sample_rate=16000, + is_default=False, + ) + with pytest.raises(FrozenInstanceError): + # Intentionally assign to frozen field to verify immutability + device.name = "Modified" # type: ignore[misc] + + +class TestTimestampedAudio: + """Tests for TimestampedAudio dataclass.""" + + def test_timestamped_audio_creation(self) -> None: + """Test TimestampedAudio can be created with valid values.""" + frames = np.zeros(1600, dtype=np.float32) + audio = TimestampedAudio( + frames=frames, + timestamp=1.0, + duration=0.1, + ) + assert len(audio.frames) == 1600 + assert audio.timestamp == 1.0 + assert audio.duration == 0.1 + + def test_timestamped_audio_negative_duration_raises(self) -> None: + """Test TimestampedAudio raises on negative duration.""" + frames = np.zeros(1600, dtype=np.float32) + with pytest.raises(ValueError, match="Duration must be non-negative"): + TimestampedAudio( + frames=frames, + timestamp=0.0, + duration=-0.1, + ) + + def test_timestamped_audio_negative_timestamp_raises(self) -> None: + """Test TimestampedAudio raises on negative timestamp.""" + frames = np.zeros(1600, dtype=np.float32) + with pytest.raises(ValueError, match="Timestamp must be non-negative"): + TimestampedAudio( + frames=frames, + timestamp=-1.0, + duration=0.1, + ) + + def test_timestamped_audio_zero_duration_valid(self) -> None: + """Test TimestampedAudio accepts zero duration.""" + frames = np.zeros(0, dtype=np.float32) + audio = TimestampedAudio( + frames=frames, + timestamp=0.0, + duration=0.0, + ) + assert audio.duration == 0.0 + + def test_timestamped_audio_zero_timestamp_valid(self) -> None: + """Test TimestampedAudio accepts zero timestamp.""" + frames = np.zeros(1600, dtype=np.float32) + audio = TimestampedAudio( + frames=frames, + timestamp=0.0, + duration=0.1, + ) + assert audio.timestamp == 0.0 +```` + +## File: tests/infrastructure/audio/test_reader.py +````python +"""Tests for MeetingAudioReader.""" + +from __future__ import annotations + +import json +from pathlib import Path +from uuid import uuid4 + +import numpy as np +import pytest + +from noteflow.infrastructure.audio.reader import MeetingAudioReader +from noteflow.infrastructure.audio.writer import MeetingAudioWriter +from noteflow.infrastructure.security.crypto import AesGcmCryptoBox +from noteflow.infrastructure.security.keystore import InMemoryKeyStore + + +@pytest.fixture +def crypto() -> AesGcmCryptoBox: + """Create crypto instance with in-memory keystore.""" + keystore = InMemoryKeyStore() + return AesGcmCryptoBox(keystore) + + +@pytest.fixture +def meetings_dir(tmp_path: Path) -> Path: + """Create temporary meetings directory.""" + return tmp_path / "meetings" + + +def test_audio_exists_requires_manifest( + crypto: AesGcmCryptoBox, + meetings_dir: Path, +) -> None: + """audio_exists should require both audio.enc and manifest.json.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True, exist_ok=True) + + # Only audio.enc present -> False + (meeting_dir / "audio.enc").write_bytes(b"") + reader = MeetingAudioReader(crypto, meetings_dir) + assert reader.audio_exists(meeting_id) is False + + # Add manifest.json -> True + (meeting_dir / "manifest.json").write_text(json.dumps({"sample_rate": 16000})) + assert reader.audio_exists(meeting_id) is True + + +def test_reader_uses_manifest_sample_rate( + crypto: AesGcmCryptoBox, + meetings_dir: Path, +) -> None: + """Reader should expose sample_rate from manifest and use it for durations.""" + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer = MeetingAudioWriter(crypto, meetings_dir) + writer.open(meeting_id, dek, wrapped_dek, sample_rate=48000) + writer.write_chunk(np.zeros(1600, dtype=np.float32)) # 1600 samples @ 48kHz + writer.close() + + reader = MeetingAudioReader(crypto, meetings_dir) + chunks = reader.load_meeting_audio(meeting_id) + + assert reader.sample_rate == 48000 + assert len(chunks) == 1 + assert chunks[0].duration == pytest.approx(1600 / 48000, rel=1e-6) +```` + +## File: tests/infrastructure/audio/test_ring_buffer.py +````python +"""Tests for TimestampedRingBuffer.""" + +from __future__ import annotations + +import numpy as np +import pytest + +from noteflow.infrastructure.audio import TimestampedAudio, TimestampedRingBuffer + + +class TestTimestampedRingBuffer: + """Tests for TimestampedRingBuffer class.""" + + def test_init_with_valid_duration(self) -> None: + """Test buffer initialization with valid max_duration.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + assert buffer.max_duration == 10.0 + assert buffer.duration == 0.0 + assert buffer.chunk_count == 0 + + def test_init_with_default_duration(self) -> None: + """Test buffer uses default max_duration of 30 seconds.""" + buffer = TimestampedRingBuffer() + assert buffer.max_duration == 30.0 + + def test_init_with_invalid_duration_raises(self) -> None: + """Test buffer raises on non-positive max_duration.""" + with pytest.raises(ValueError, match="max_duration must be positive"): + TimestampedRingBuffer(max_duration=0.0) + + with pytest.raises(ValueError, match="max_duration must be positive"): + TimestampedRingBuffer(max_duration=-1.0) + + def test_push_single_chunk(self, sample_timestamped_audio: TimestampedAudio) -> None: + """Test pushing single audio chunk.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + buffer.push(sample_timestamped_audio) + + assert buffer.chunk_count == 1 + assert buffer.duration == sample_timestamped_audio.duration + + def test_push_multiple_chunks(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None: + """Test pushing multiple audio chunks.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + for audio in timestamped_audio_sequence: + buffer.push(audio) + + assert buffer.chunk_count == 10 + assert buffer.duration == pytest.approx(1.0, rel=1e-9) # 10 chunks * 0.1s + + def test_push_evicts_old_at_capacity(self) -> None: + """Test old chunks are evicted when buffer exceeds max_duration.""" + buffer = TimestampedRingBuffer(max_duration=0.5) # 500ms max + + # Push 10 chunks of 0.1s each (1.0s total) + for i in range(10): + audio = TimestampedAudio( + frames=np.zeros(1600, dtype=np.float32), + timestamp=float(i) * 0.1, + duration=0.1, + ) + buffer.push(audio) + + # Should only keep ~5 chunks (0.5s worth) + assert buffer.duration <= 0.5 + assert buffer.chunk_count <= 6 # May keep one extra during eviction + + def test_get_window_returns_requested_duration( + self, timestamped_audio_sequence: list[TimestampedAudio] + ) -> None: + """Test get_window returns chunks for requested duration.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + for audio in timestamped_audio_sequence: + buffer.push(audio) + + # Request 0.3 seconds (should get ~3 chunks) + window = buffer.get_window(0.3) + total_duration = sum(a.duration for a in window) + + assert total_duration >= 0.3 + assert len(window) >= 3 + + def test_get_window_empty_returns_empty(self) -> None: + """Test get_window on empty buffer returns empty list.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + window = buffer.get_window(1.0) + + assert window == [] + + def test_get_window_negative_returns_empty( + self, sample_timestamped_audio: TimestampedAudio + ) -> None: + """Test get_window with negative duration returns empty list.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + buffer.push(sample_timestamped_audio) + + window = buffer.get_window(-1.0) + assert window == [] + + def test_get_window_zero_returns_empty( + self, sample_timestamped_audio: TimestampedAudio + ) -> None: + """Test get_window with zero duration returns empty list.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + buffer.push(sample_timestamped_audio) + + window = buffer.get_window(0.0) + assert window == [] + + def test_get_window_exceeds_buffer_returns_all( + self, timestamped_audio_sequence: list[TimestampedAudio] + ) -> None: + """Test get_window with duration > buffer returns all chunks.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + for audio in timestamped_audio_sequence: + buffer.push(audio) + + window = buffer.get_window(100.0) # Request more than available + assert len(window) == 10 + + def test_get_window_chronological_order( + self, timestamped_audio_sequence: list[TimestampedAudio] + ) -> None: + """Test get_window returns chunks in chronological order.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + for audio in timestamped_audio_sequence: + buffer.push(audio) + + window = buffer.get_window(1.0) + + # Verify timestamps are increasing + for i in range(1, len(window)): + assert window[i].timestamp >= window[i - 1].timestamp + + def test_get_all_returns_all_chunks( + self, timestamped_audio_sequence: list[TimestampedAudio] + ) -> None: + """Test get_all returns all buffered chunks.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + for audio in timestamped_audio_sequence: + buffer.push(audio) + + all_chunks = buffer.get_all() + assert len(all_chunks) == 10 + + def test_clear_removes_all(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None: + """Test clear removes all chunks and resets duration.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + for audio in timestamped_audio_sequence: + buffer.push(audio) + + buffer.clear() + + assert buffer.chunk_count == 0 + assert buffer.duration == 0.0 + assert len(buffer) == 0 + + def test_duration_property(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None: + """Test duration property tracks total buffered duration.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + + assert buffer.duration == 0.0 + + for i, audio in enumerate(timestamped_audio_sequence): + buffer.push(audio) + expected = (i + 1) * 0.1 + assert buffer.duration == pytest.approx(expected, rel=1e-9) + + def test_chunk_count_property(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None: + """Test chunk_count property tracks number of chunks.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + + for i, audio in enumerate(timestamped_audio_sequence): + buffer.push(audio) + assert buffer.chunk_count == i + 1 + + def test_max_duration_property(self) -> None: + """Test max_duration property returns configured value.""" + buffer = TimestampedRingBuffer(max_duration=15.0) + assert buffer.max_duration == 15.0 + + def test_len_returns_chunk_count( + self, timestamped_audio_sequence: list[TimestampedAudio] + ) -> None: + """Test __len__ returns chunk count.""" + buffer = TimestampedRingBuffer(max_duration=10.0) + for audio in timestamped_audio_sequence: + buffer.push(audio) + + assert len(buffer) == buffer.chunk_count +```` + +## File: tests/infrastructure/audio/test_writer.py +````python +"""Tests for MeetingAudioWriter.""" + +from __future__ import annotations + +import json +from pathlib import Path +from uuid import uuid4 + +import numpy as np +import pytest + +from noteflow.infrastructure.audio.writer import MeetingAudioWriter +from noteflow.infrastructure.security.crypto import AesGcmCryptoBox, ChunkedAssetReader +from noteflow.infrastructure.security.keystore import InMemoryKeyStore + + +@pytest.fixture +def crypto() -> AesGcmCryptoBox: + """Create crypto instance with in-memory keystore.""" + keystore = InMemoryKeyStore() + return AesGcmCryptoBox(keystore) + + +@pytest.fixture +def meetings_dir(tmp_path: Path) -> Path: + """Create temporary meetings directory.""" + return tmp_path / "meetings" + + +class TestMeetingAudioWriterBasics: + """Tests for MeetingAudioWriter basic operations.""" + + def test_writer_creates_meeting_directory( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test writer creates meeting directory structure.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + + meeting_dir = meetings_dir / meeting_id + assert meeting_dir.exists() + assert (meeting_dir / "manifest.json").exists() + assert (meeting_dir / "audio.enc").exists() + + writer.close() + + def test_manifest_contains_correct_metadata( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test manifest.json has required fields.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek, sample_rate=16000) + writer.close() + + manifest_path = meetings_dir / meeting_id / "manifest.json" + manifest = json.loads(manifest_path.read_text()) + + assert manifest["meeting_id"] == meeting_id + assert manifest["sample_rate"] == 16000 + assert manifest["channels"] == 1 + assert manifest["format"] == "pcm16" + assert "wrapped_dek" in manifest + assert "created_at" in manifest + + def test_write_chunk_converts_float32_to_pcm16( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test audio conversion from float32 to PCM16.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + + # Create test audio: 1600 samples = 0.1 seconds at 16kHz + test_audio = np.linspace(-1.0, 1.0, 1600, dtype=np.float32) + writer.write_chunk(test_audio) + + assert writer.bytes_written > 0 + # PCM16 = 2 bytes/sample = 3200 bytes raw, but encrypted with overhead + assert writer.bytes_written > 3200 + assert writer.chunk_count == 1 + + writer.close() + + def test_multiple_chunks_written( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test writing multiple audio chunks.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + + # Write 100 chunks + for _ in range(100): + audio = np.random.uniform(-0.5, 0.5, 1600).astype(np.float32) + writer.write_chunk(audio) + + # Should have written significant data + assert writer.bytes_written > 100 * 3200 # At least raw PCM16 size + assert writer.chunk_count == 100 + + writer.close() + + def test_write_chunk_clamps_audio_range( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test audio values outside [-1, 1] are clamped before encoding.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + writer.write_chunk(np.array([-2.0, 0.0, 2.0], dtype=np.float32)) + writer.close() + + audio_path = meetings_dir / meeting_id / "audio.enc" + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, dek) + + chunk_bytes = next(reader.read_chunks()) + pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16) + audio_float = pcm16.astype(np.float32) / 32767.0 + + assert audio_float.min() >= -1.0 + assert audio_float.max() <= 1.0 + + reader.close() + + +class TestMeetingAudioWriterErrors: + """Tests for MeetingAudioWriter error handling.""" + + def test_writer_raises_if_already_open( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test writer raises RuntimeError if opened twice.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(str(uuid4()), dek, wrapped_dek) + + with pytest.raises(RuntimeError, match="already open"): + writer.open(str(uuid4()), dek, wrapped_dek) + + writer.close() + + def test_writer_raises_if_write_when_not_open( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test writer raises RuntimeError if write called before open.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + audio = np.zeros(1600, dtype=np.float32) + + with pytest.raises(RuntimeError, match="not open"): + writer.write_chunk(audio) + + def test_close_is_idempotent( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test close can be called multiple times safely.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(str(uuid4()), dek, wrapped_dek) + writer.close() + writer.close() # Should not raise + writer.close() # Should not raise + + +class TestMeetingAudioWriterProperties: + """Tests for MeetingAudioWriter properties.""" + + def test_is_open_property( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test is_open property reflects writer state.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + assert writer.is_open is False + + writer.open(str(uuid4()), dek, wrapped_dek) + assert writer.is_open is True + + writer.close() + assert writer.is_open is False + + def test_meeting_dir_property( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test meeting_dir property returns correct path.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + meeting_id = str(uuid4()) + + assert writer.meeting_dir is None + + writer.open(meeting_id, dek, wrapped_dek) + assert writer.meeting_dir == meetings_dir / meeting_id + + writer.close() + assert writer.meeting_dir is None + + def test_bytes_written_when_closed( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test bytes_written returns 0 when not open.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + assert writer.bytes_written == 0 + + +class TestMeetingAudioWriterIntegration: + """Integration tests for audio roundtrip.""" + + def test_audio_roundtrip_encryption_decryption( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test writing audio, then reading it back encrypted.""" + # Write audio + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + + # Write 10 chunks of known audio + original_chunks: list[np.ndarray] = [] + for i in range(10): + audio = np.sin(2 * np.pi * 440 * np.linspace(i, i + 0.1, 1600)).astype(np.float32) + original_chunks.append(audio) + writer.write_chunk(audio) + + writer.close() + + # Read audio back + audio_path = meetings_dir / meeting_id / "audio.enc" + assert audio_path.exists() + + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, dek) + + read_chunks: list[np.ndarray] = [] + for chunk_bytes in reader.read_chunks(): + # Convert bytes back to PCM16 then to float32 + pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16) + audio_float = pcm16.astype(np.float32) / 32767.0 + read_chunks.append(audio_float) + + reader.close() + + # Verify we read same number of chunks + assert len(read_chunks) == len(original_chunks) + + # Verify audio content matches (within quantization error) + for orig, read in zip(original_chunks, read_chunks, strict=True): + # PCM16 quantization adds ~0.00003 max error + assert np.allclose(orig, read, atol=0.0001) + + def test_manifest_wrapped_dek_can_decrypt_audio( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test that wrapped_dek from manifest can decrypt audio file.""" + # Write audio + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + writer.write_chunk(np.zeros(1600, dtype=np.float32)) + writer.close() + + # Read manifest + manifest_path = meetings_dir / meeting_id / "manifest.json" + manifest = json.loads(manifest_path.read_text()) + wrapped_dek_hex = manifest["wrapped_dek"] + + # Unwrap DEK from manifest + unwrapped_dek = crypto.unwrap_dek(bytes.fromhex(wrapped_dek_hex)) + + # Use unwrapped DEK to read audio + audio_path = meetings_dir / meeting_id / "audio.enc" + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, unwrapped_dek) + + chunks = list(reader.read_chunks()) + assert len(chunks) == 1 # Should read the one chunk we wrote + + reader.close() +```` + +## File: tests/infrastructure/export/test_formatting.py +````python +"""Tests for export formatting helpers.""" + +from __future__ import annotations + +from datetime import datetime + +from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp + + +class TestFormatTimestamp: + """Tests for format_timestamp.""" + + def test_format_timestamp_under_hour(self) -> None: + assert format_timestamp(0) == "0:00" + assert format_timestamp(59) == "0:59" + assert format_timestamp(60) == "1:00" + assert format_timestamp(125) == "2:05" + + def test_format_timestamp_over_hour(self) -> None: + assert format_timestamp(3600) == "1:00:00" + assert format_timestamp(3661) == "1:01:01" + + +class TestFormatDatetime: + """Tests for format_datetime.""" + + def test_format_datetime_none(self) -> None: + assert format_datetime(None) == "" + + def test_format_datetime_value(self) -> None: + dt = datetime(2024, 1, 1, 12, 30, 15) + assert format_datetime(dt) == "2024-01-01 12:30:15" +```` + +## File: tests/infrastructure/export/test_html.py +````python +"""Tests for HTML exporter.""" + +from __future__ import annotations + +from noteflow.domain.entities import ActionItem, KeyPoint, Meeting, Segment, Summary +from noteflow.infrastructure.export.html import HtmlExporter + + +class TestHtmlExporter: + """Tests for HtmlExporter output.""" + + def test_export_escapes_html(self) -> None: + meeting = Meeting.create(title="") + segments = [ + Segment(segment_id=0, text="Hello ", start_time=0.0, end_time=1.0), + ] + summary = Summary( + meeting_id=meeting.id, + executive_summary="Summary with bold", + key_points=[KeyPoint(text="Key ")], + action_items=[ActionItem(text="Do ", assignee="bob<")], + ) + meeting.summary = summary + + exporter = HtmlExporter() + output = exporter.export(meeting, segments) + + assert "<Weekly & Sync>" in output + assert "Hello <team>" in output + assert "Summary with <b>bold</b>" in output + assert "Key <point>" in output + assert "@bob<" in output +```` + +## File: tests/infrastructure/export/test_markdown.py +````python +"""Tests for Markdown exporter.""" + +from __future__ import annotations + +from datetime import datetime + +from noteflow.domain.entities import ActionItem, KeyPoint, Meeting, Segment, Summary +from noteflow.infrastructure.export.markdown import MarkdownExporter + + +class TestMarkdownExporter: + """Tests for MarkdownExporter output.""" + + def test_export_includes_sections(self) -> None: + meeting = Meeting.create(title="Weekly Sync") + meeting.started_at = datetime(2024, 1, 1, 9, 0, 0) + meeting.ended_at = datetime(2024, 1, 1, 9, 30, 0) + + segments = [ + Segment(segment_id=0, text="Hello team", start_time=0.0, end_time=1.0), + Segment(segment_id=1, text="Next steps", start_time=1.0, end_time=2.0), + ] + + summary = Summary( + meeting_id=meeting.id, + executive_summary="Great meeting.", + key_points=[KeyPoint(text="KP1")], + action_items=[ActionItem(text="Do thing", assignee="alice")], + ) + meeting.summary = summary + + exporter = MarkdownExporter() + output = exporter.export(meeting, segments) + + assert "# Weekly Sync" in output + assert "## Meeting Info" in output + assert "## Transcript" in output + assert "**[0:00]** Hello team" in output + assert "## Summary" in output + assert "### Key Points" in output + assert "- KP1" in output + assert "### Action Items" in output + assert "- [ ] Do thing (@alice)" in output + assert "Exported from NoteFlow" in output +```` + +## File: tests/infrastructure/security/test_crypto.py +````python +"""Tests for crypto error paths and asset reader behavior.""" + +from __future__ import annotations + +import struct +from pathlib import Path + +import pytest + +from noteflow.infrastructure.security.crypto import ( + FILE_MAGIC, + FILE_VERSION, + AesGcmCryptoBox, + ChunkedAssetReader, + ChunkedAssetWriter, +) +from noteflow.infrastructure.security.keystore import InMemoryKeyStore + + +@pytest.fixture +def crypto() -> AesGcmCryptoBox: + """Crypto box with in-memory key store.""" + return AesGcmCryptoBox(InMemoryKeyStore()) + + +class TestAesGcmCryptoBox: + """Tests for AesGcmCryptoBox edge cases.""" + + def test_unwrap_dek_too_short_raises(self, crypto: AesGcmCryptoBox) -> None: + """unwrap_dek rejects payloads shorter than nonce+ciphertext+tag.""" + with pytest.raises(ValueError, match="Invalid wrapped DEK"): + crypto.unwrap_dek(b"short") + + +class TestChunkedAssetReader: + """Tests for ChunkedAssetReader validation.""" + + def test_open_invalid_magic_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None: + """Reader rejects files with invalid magic.""" + path = tmp_path / "bad_magic.enc" + path.write_bytes(b"BAD!" + bytes([FILE_VERSION])) + + reader = ChunkedAssetReader(crypto) + with pytest.raises(ValueError, match="Invalid file format"): + reader.open(path, crypto.generate_dek()) + + def test_open_invalid_version_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None: + """Reader rejects unsupported file versions.""" + path = tmp_path / "bad_version.enc" + path.write_bytes(FILE_MAGIC + bytes([FILE_VERSION + 1])) + + reader = ChunkedAssetReader(crypto) + with pytest.raises(ValueError, match="Unsupported file version"): + reader.open(path, crypto.generate_dek()) + + def test_read_truncated_chunk_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None: + """Reader errors on truncated chunk data.""" + path = tmp_path / "truncated.enc" + with path.open("wb") as handle: + handle.write(FILE_MAGIC) + handle.write(struct.pack("B", FILE_VERSION)) + handle.write(struct.pack(">I", 10)) # claim 10 bytes + handle.write(b"12345") # only 5 bytes provided + + reader = ChunkedAssetReader(crypto) + reader.open(path, crypto.generate_dek()) + with pytest.raises(ValueError, match="Truncated chunk"): + list(reader.read_chunks()) + + reader.close() + + def test_read_with_wrong_dek_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None: + """Decrypting with the wrong key fails.""" + path = tmp_path / "wrong_key.enc" + dek = crypto.generate_dek() + other_dek = crypto.generate_dek() + + writer = ChunkedAssetWriter(crypto) + writer.open(path, dek) + writer.write_chunk(b"hello") + writer.close() + + reader = ChunkedAssetReader(crypto) + reader.open(path, other_dek) + with pytest.raises(ValueError, match="Chunk decryption failed"): + list(reader.read_chunks()) + reader.close() +```` + +## File: tests/infrastructure/summarization/test_citation_verifier.py +````python +"""Tests for citation verification.""" + +from __future__ import annotations + +from uuid import uuid4 + +import pytest + +from noteflow.domain.entities import ActionItem, KeyPoint, Segment, Summary +from noteflow.domain.value_objects import MeetingId +from noteflow.infrastructure.summarization import SegmentCitationVerifier + + +def _segment(segment_id: int, text: str = "Test") -> Segment: + """Create a test segment.""" + return Segment( + segment_id=segment_id, + text=text, + start_time=segment_id * 5.0, + end_time=(segment_id + 1) * 5.0, + ) + + +def _key_point(text: str, segment_ids: list[int]) -> KeyPoint: + """Create a test key point.""" + return KeyPoint(text=text, segment_ids=segment_ids) + + +def _action_item(text: str, segment_ids: list[int]) -> ActionItem: + """Create a test action item.""" + return ActionItem(text=text, segment_ids=segment_ids) + + +def _summary( + key_points: list[KeyPoint] | None = None, + action_items: list[ActionItem] | None = None, +) -> Summary: + """Create a test summary.""" + return Summary( + meeting_id=MeetingId(uuid4()), + executive_summary="Test summary", + key_points=key_points or [], + action_items=action_items or [], + ) + + +class TestSegmentCitationVerifier: + """Tests for SegmentCitationVerifier.""" + + @pytest.fixture + def verifier(self) -> SegmentCitationVerifier: + """Create verifier instance.""" + return SegmentCitationVerifier() + + def test_verify_valid_citations(self, verifier: SegmentCitationVerifier) -> None: + """All citations valid should return is_valid=True.""" + segments = [_segment(0), _segment(1), _segment(2)] + summary = _summary( + key_points=[_key_point("Point 1", [0, 1])], + action_items=[_action_item("Action 1", [2])], + ) + + result = verifier.verify_citations(summary, segments) + + assert result.is_valid is True + assert result.invalid_key_point_indices == () + assert result.invalid_action_item_indices == () + assert result.missing_segment_ids == () + + def test_verify_invalid_key_point_citation(self, verifier: SegmentCitationVerifier) -> None: + """Invalid segment_id in key point should be detected.""" + segments = [_segment(0), _segment(1)] + summary = _summary( + key_points=[_key_point("Point 1", [0, 99])], # 99 doesn't exist + ) + + result = verifier.verify_citations(summary, segments) + + assert result.is_valid is False + assert result.invalid_key_point_indices == (0,) + assert result.invalid_action_item_indices == () + assert result.missing_segment_ids == (99,) + + def test_verify_invalid_action_item_citation(self, verifier: SegmentCitationVerifier) -> None: + """Invalid segment_id in action item should be detected.""" + segments = [_segment(0), _segment(1)] + summary = _summary( + action_items=[_action_item("Action 1", [50])], # 50 doesn't exist + ) + + result = verifier.verify_citations(summary, segments) + + assert result.is_valid is False + assert result.invalid_key_point_indices == () + assert result.invalid_action_item_indices == (0,) + assert result.missing_segment_ids == (50,) + + def test_verify_multiple_invalid_citations(self, verifier: SegmentCitationVerifier) -> None: + """Multiple invalid citations should all be detected.""" + segments = [_segment(0)] + summary = _summary( + key_points=[ + _key_point("Point 1", [0]), + _key_point("Point 2", [1]), # Invalid + _key_point("Point 3", [2]), # Invalid + ], + action_items=[ + _action_item("Action 1", [3]), # Invalid + ], + ) + + result = verifier.verify_citations(summary, segments) + + assert result.is_valid is False + assert result.invalid_key_point_indices == (1, 2) + assert result.invalid_action_item_indices == (0,) + assert result.missing_segment_ids == (1, 2, 3) + + def test_verify_empty_summary(self, verifier: SegmentCitationVerifier) -> None: + """Empty summary should be valid.""" + segments = [_segment(0)] + summary = _summary() + + result = verifier.verify_citations(summary, segments) + + assert result.is_valid is True + + def test_verify_empty_segments(self, verifier: SegmentCitationVerifier) -> None: + """Summary with citations but no segments should be invalid.""" + segments: list[Segment] = [] + summary = _summary(key_points=[_key_point("Point 1", [0])]) + + result = verifier.verify_citations(summary, segments) + + assert result.is_valid is False + assert result.missing_segment_ids == (0,) + + def test_verify_empty_citations(self, verifier: SegmentCitationVerifier) -> None: + """Key points/actions with empty segment_ids should be valid.""" + segments = [_segment(0)] + summary = _summary( + key_points=[_key_point("Point 1", [])], # No citations + action_items=[_action_item("Action 1", [])], # No citations + ) + + result = verifier.verify_citations(summary, segments) + + assert result.is_valid is True + + def test_invalid_count_property(self, verifier: SegmentCitationVerifier) -> None: + """invalid_count should sum key point and action item invalid counts.""" + segments = [_segment(0)] + summary = _summary( + key_points=[ + _key_point("Point 1", [1]), # Invalid + _key_point("Point 2", [2]), # Invalid + ], + action_items=[ + _action_item("Action 1", [3]), # Invalid + ], + ) + + result = verifier.verify_citations(summary, segments) + + assert result.invalid_count == 3 + + +class TestFilterInvalidCitations: + """Tests for filter_invalid_citations method.""" + + @pytest.fixture + def verifier(self) -> SegmentCitationVerifier: + """Create verifier instance.""" + return SegmentCitationVerifier() + + def test_filter_removes_invalid_segment_ids(self, verifier: SegmentCitationVerifier) -> None: + """Invalid segment_ids should be removed from citations.""" + segments = [_segment(0), _segment(1)] + summary = _summary( + key_points=[_key_point("Point 1", [0, 1, 99])], # 99 invalid + action_items=[_action_item("Action 1", [1, 50])], # 50 invalid + ) + + filtered = verifier.filter_invalid_citations(summary, segments) + + assert filtered.key_points[0].segment_ids == [0, 1] + assert filtered.action_items[0].segment_ids == [1] + + def test_filter_preserves_valid_citations(self, verifier: SegmentCitationVerifier) -> None: + """Valid citations should be preserved.""" + segments = [_segment(0), _segment(1), _segment(2)] + summary = _summary( + key_points=[_key_point("Point 1", [0, 1])], + action_items=[_action_item("Action 1", [2])], + ) + + filtered = verifier.filter_invalid_citations(summary, segments) + + assert filtered.key_points[0].segment_ids == [0, 1] + assert filtered.action_items[0].segment_ids == [2] + + def test_filter_preserves_other_fields(self, verifier: SegmentCitationVerifier) -> None: + """Non-citation fields should be preserved.""" + segments = [_segment(0)] + summary = Summary( + meeting_id=MeetingId(uuid4()), + executive_summary="Important meeting", + key_points=[KeyPoint(text="Key point", segment_ids=[0], start_time=1.0, end_time=2.0)], + action_items=[ActionItem(text="Action", segment_ids=[0], assignee="Alice", priority=2)], + model_version="test-1.0", + ) + + filtered = verifier.filter_invalid_citations(summary, segments) + + assert filtered.executive_summary == "Important meeting" + assert filtered.key_points[0].text == "Key point" + assert filtered.key_points[0].start_time == 1.0 + assert filtered.action_items[0].assignee == "Alice" + assert filtered.action_items[0].priority == 2 + assert filtered.model_version == "test-1.0" +```` + +## File: tests/infrastructure/summarization/test_mock_provider.py +````python +"""Tests for mock summarization provider.""" + +from __future__ import annotations + +from uuid import uuid4 + +import pytest + +from noteflow.domain.entities import Segment +from noteflow.domain.summarization import SummarizationRequest +from noteflow.domain.value_objects import MeetingId +from noteflow.infrastructure.summarization import MockSummarizer + + +def _segment( + segment_id: int, + text: str, + start: float = 0.0, + end: float = 5.0, +) -> Segment: + """Create a test segment.""" + return Segment( + segment_id=segment_id, + text=text, + start_time=start, + end_time=end, + ) + + +class TestMockSummarizer: + """Tests for MockSummarizer.""" + + @pytest.fixture + def summarizer(self) -> MockSummarizer: + """Create MockSummarizer instance.""" + return MockSummarizer(latency_ms=0.0) + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Create a test meeting ID.""" + return MeetingId(uuid4()) + + def test_provider_name(self, summarizer: MockSummarizer) -> None: + """Provider name should be 'mock'.""" + assert summarizer.provider_name == "mock" + + def test_is_available(self, summarizer: MockSummarizer) -> None: + """Mock provider should always be available.""" + assert summarizer.is_available is True + + def test_requires_cloud_consent(self, summarizer: MockSummarizer) -> None: + """Mock provider should not require cloud consent.""" + assert summarizer.requires_cloud_consent is False + + @pytest.mark.asyncio + async def test_summarize_returns_result( + self, + summarizer: MockSummarizer, + meeting_id: MeetingId, + ) -> None: + """Summarize should return a SummarizationResult.""" + segments = [ + _segment(0, "First segment text.", 0.0, 5.0), + _segment(1, "Second segment text.", 5.0, 10.0), + ] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert result.provider_name == "mock" + assert result.model_name == "mock-1.0" + assert result.summary.meeting_id == meeting_id + + @pytest.mark.asyncio + async def test_summarize_generates_executive_summary( + self, + summarizer: MockSummarizer, + meeting_id: MeetingId, + ) -> None: + """Summarize should generate executive summary with segment count.""" + segments = [ + _segment(0, "Hello", 0.0, 5.0), + _segment(1, "World", 5.0, 10.0), + _segment(2, "Test", 10.0, 15.0), + ] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert "3 segments" in result.summary.executive_summary + assert "15.0 seconds" in result.summary.executive_summary + + @pytest.mark.asyncio + async def test_summarize_generates_key_points_with_citations( + self, + summarizer: MockSummarizer, + meeting_id: MeetingId, + ) -> None: + """Key points should have valid segment_id citations.""" + segments = [ + _segment(0, "First point", 0.0, 5.0), + _segment(1, "Second point", 5.0, 10.0), + ] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert len(result.summary.key_points) == 2 + assert result.summary.key_points[0].segment_ids == [0] + assert result.summary.key_points[1].segment_ids == [1] + + @pytest.mark.asyncio + async def test_summarize_respects_max_key_points( + self, + summarizer: MockSummarizer, + meeting_id: MeetingId, + ) -> None: + """Key points should be limited to max_key_points.""" + segments = [_segment(i, f"Segment {i}", i * 5.0, (i + 1) * 5.0) for i in range(10)] + request = SummarizationRequest( + meeting_id=meeting_id, + segments=segments, + max_key_points=3, + ) + + result = await summarizer.summarize(request) + + assert len(result.summary.key_points) == 3 + + @pytest.mark.asyncio + async def test_summarize_extracts_action_items( + self, + summarizer: MockSummarizer, + meeting_id: MeetingId, + ) -> None: + """Action items should be extracted from segments with action keywords.""" + segments = [ + _segment(0, "General discussion", 0.0, 5.0), + _segment(1, "We need to fix the bug", 5.0, 10.0), + _segment(2, "TODO: Review the code", 10.0, 15.0), + _segment(3, "The meeting went well", 15.0, 20.0), + ] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert len(result.summary.action_items) == 2 + assert result.summary.action_items[0].segment_ids == [1] + assert result.summary.action_items[1].segment_ids == [2] + + @pytest.mark.asyncio + async def test_summarize_respects_max_action_items( + self, + summarizer: MockSummarizer, + meeting_id: MeetingId, + ) -> None: + """Action items should be limited to max_action_items.""" + segments = [_segment(i, f"TODO: task {i}", i * 5.0, (i + 1) * 5.0) for i in range(10)] + request = SummarizationRequest( + meeting_id=meeting_id, + segments=segments, + max_action_items=2, + ) + + result = await summarizer.summarize(request) + + assert len(result.summary.action_items) == 2 + + @pytest.mark.asyncio + async def test_summarize_sets_generated_at( + self, + summarizer: MockSummarizer, + meeting_id: MeetingId, + ) -> None: + """Summary should have generated_at timestamp.""" + segments = [_segment(0, "Test", 0.0, 5.0)] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert result.summary.generated_at is not None + + @pytest.mark.asyncio + async def test_summarize_empty_segments( + self, + summarizer: MockSummarizer, + meeting_id: MeetingId, + ) -> None: + """Summarize should handle empty segments list.""" + request = SummarizationRequest(meeting_id=meeting_id, segments=[]) + + result = await summarizer.summarize(request) + + assert result.summary.key_points == [] + assert result.summary.action_items == [] + assert "0 segments" in result.summary.executive_summary +```` + +## File: tests/infrastructure/summarization/test_ollama_provider.py +````python +"""Tests for Ollama summarization provider.""" + +from __future__ import annotations + +import json +import sys +import types +from typing import Any +from uuid import uuid4 + +import pytest + +from noteflow.domain.entities import Segment +from noteflow.domain.summarization import ( + InvalidResponseError, + ProviderUnavailableError, + SummarizationRequest, +) +from noteflow.domain.value_objects import MeetingId + + +def _segment( + segment_id: int, + text: str, + start: float = 0.0, + end: float = 5.0, +) -> Segment: + """Create a test segment.""" + return Segment( + segment_id=segment_id, + text=text, + start_time=start, + end_time=end, + ) + + +def _valid_json_response( + summary: str = "Test summary.", + key_points: list[dict[str, Any]] | None = None, + action_items: list[dict[str, Any]] | None = None, +) -> str: + """Build a valid JSON response string.""" + return json.dumps( + { + "executive_summary": summary, + "key_points": key_points or [], + "action_items": action_items or [], + } + ) + + +class TestOllamaSummarizerProperties: + """Tests for OllamaSummarizer properties.""" + + @pytest.fixture + def mock_ollama_module(self, monkeypatch: pytest.MonkeyPatch) -> types.ModuleType: + """Mock ollama module.""" + mock_client = types.SimpleNamespace( + list=lambda: {"models": []}, + chat=lambda **_: {"message": {"content": _valid_json_response()}}, + ) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + return mock_module + + def test_provider_name(self, mock_ollama_module: types.ModuleType) -> None: + """Provider name should be 'ollama'.""" + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + assert summarizer.provider_name == "ollama" + + def test_requires_cloud_consent_false(self, mock_ollama_module: types.ModuleType) -> None: + """Ollama should not require cloud consent (local processing).""" + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + assert summarizer.requires_cloud_consent is False + + def test_is_available_when_server_responds(self, mock_ollama_module: types.ModuleType) -> None: + """is_available should be True when server responds.""" + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + assert summarizer.is_available is True + + def test_is_available_false_when_connection_fails( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """is_available should be False when server unreachable.""" + + def raise_error() -> None: + raise ConnectionError("Connection refused") + + mock_client = types.SimpleNamespace(list=raise_error) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + assert summarizer.is_available is False + + +class TestOllamaSummarizerSummarize: + """Tests for OllamaSummarizer.summarize method.""" + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Create test meeting ID.""" + return MeetingId(uuid4()) + + @pytest.mark.asyncio + async def test_summarize_empty_segments( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Empty segments should return empty summary without calling LLM.""" + call_count = 0 + + def mock_chat(**_: Any) -> dict[str, Any]: + nonlocal call_count + call_count += 1 + return {"message": {"content": _valid_json_response()}} + + mock_client = types.SimpleNamespace(list=lambda: {}, chat=mock_chat) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + request = SummarizationRequest(meeting_id=meeting_id, segments=[]) + + result = await summarizer.summarize(request) + + assert result.summary.key_points == [] + assert result.summary.action_items == [] + assert call_count == 0 + + @pytest.mark.asyncio + async def test_summarize_returns_result( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Summarize should return SummarizationResult.""" + response = _valid_json_response( + summary="Meeting discussed project updates.", + key_points=[{"text": "Project on track", "segment_ids": [0]}], + action_items=[ + {"text": "Review code", "assignee": "Alice", "priority": 2, "segment_ids": [1]} + ], + ) + + mock_client = types.SimpleNamespace( + list=lambda: {}, + chat=lambda **_: {"message": {"content": response}}, + ) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + segments = [ + _segment(0, "Project is on track.", 0.0, 5.0), + _segment(1, "Alice needs to review the code.", 5.0, 10.0), + ] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert result.provider_name == "ollama" + assert result.summary.meeting_id == meeting_id + assert result.summary.executive_summary == "Meeting discussed project updates." + assert len(result.summary.key_points) == 1 + assert result.summary.key_points[0].segment_ids == [0] + assert len(result.summary.action_items) == 1 + assert result.summary.action_items[0].assignee == "Alice" + assert result.summary.action_items[0].priority == 2 + + @pytest.mark.asyncio + async def test_summarize_filters_invalid_segment_ids( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Invalid segment_ids in response should be filtered out.""" + response = _valid_json_response( + summary="Test", + key_points=[{"text": "Point", "segment_ids": [0, 99, 100]}], # 99, 100 invalid + ) + + mock_client = types.SimpleNamespace( + list=lambda: {}, + chat=lambda **_: {"message": {"content": response}}, + ) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + segments = [_segment(0, "Only segment")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert result.summary.key_points[0].segment_ids == [0] + + @pytest.mark.asyncio + async def test_summarize_respects_max_limits( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Response items exceeding max limits should be truncated.""" + response = _valid_json_response( + summary="Test", + key_points=[{"text": f"Point {i}", "segment_ids": [0]} for i in range(10)], + action_items=[{"text": f"Action {i}", "segment_ids": [0]} for i in range(10)], + ) + + mock_client = types.SimpleNamespace( + list=lambda: {}, + chat=lambda **_: {"message": {"content": response}}, + ) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + segments = [_segment(0, "Test segment")] + request = SummarizationRequest( + meeting_id=meeting_id, + segments=segments, + max_key_points=3, + max_action_items=2, + ) + + result = await summarizer.summarize(request) + + assert len(result.summary.key_points) == 3 + assert len(result.summary.action_items) == 2 + + @pytest.mark.asyncio + async def test_summarize_handles_markdown_fenced_json( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Markdown code fences around JSON should be stripped.""" + json_content = _valid_json_response(summary="Fenced response") + response = f"```json\n{json_content}\n```" + + mock_client = types.SimpleNamespace( + list=lambda: {}, + chat=lambda **_: {"message": {"content": response}}, + ) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert result.summary.executive_summary == "Fenced response" + + +class TestOllamaSummarizerErrors: + """Tests for OllamaSummarizer error handling.""" + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Create test meeting ID.""" + return MeetingId(uuid4()) + + @pytest.mark.asyncio + async def test_raises_unavailable_when_package_missing( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Should raise ProviderUnavailableError when ollama not installed.""" + # Remove ollama from sys.modules if present + monkeypatch.delitem(sys.modules, "ollama", raising=False) + + # Make import fail + import builtins + + original_import = builtins.__import__ + + def mock_import(name: str, *args: Any, **kwargs: Any) -> Any: + if name == "ollama": + raise ImportError("No module named 'ollama'") + return original_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", mock_import) + + # Need to reload the module to trigger fresh import + from noteflow.infrastructure.summarization import ollama_provider + + # Create fresh instance that will try to import + summarizer = ollama_provider.OllamaSummarizer() + summarizer._client = None # Force re-import attempt + + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + with pytest.raises(ProviderUnavailableError, match="ollama package not installed"): + await summarizer.summarize(request) + + @pytest.mark.asyncio + async def test_raises_unavailable_on_connection_error( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Should raise ProviderUnavailableError on connection failure.""" + + def raise_connection_error(**_: Any) -> None: + raise ConnectionRefusedError("Connection refused") + + mock_client = types.SimpleNamespace( + list=lambda: {}, + chat=raise_connection_error, + ) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + with pytest.raises(ProviderUnavailableError, match="Cannot connect"): + await summarizer.summarize(request) + + @pytest.mark.asyncio + async def test_raises_invalid_response_on_bad_json( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Should raise InvalidResponseError on malformed JSON.""" + mock_client = types.SimpleNamespace( + list=lambda: {}, + chat=lambda **_: {"message": {"content": "not valid json {{{"}}, + ) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + with pytest.raises(InvalidResponseError, match="Invalid JSON"): + await summarizer.summarize(request) + + @pytest.mark.asyncio + async def test_raises_invalid_response_on_empty_content( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Should raise InvalidResponseError on empty response.""" + mock_client = types.SimpleNamespace( + list=lambda: {}, + chat=lambda **_: {"message": {"content": ""}}, + ) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer() + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + with pytest.raises(InvalidResponseError, match="Empty response"): + await summarizer.summarize(request) + + +class TestOllamaSummarizerConfiguration: + """Tests for OllamaSummarizer configuration.""" + + @pytest.mark.asyncio + async def test_custom_model_name(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Custom model name should be used.""" + captured_model = None + + def capture_chat(**kwargs: Any) -> dict[str, Any]: + nonlocal captured_model + captured_model = kwargs.get("model") + return {"message": {"content": _valid_json_response()}} + + mock_client = types.SimpleNamespace(list=lambda: {}, chat=capture_chat) + mock_module = types.ModuleType("ollama") + mock_module.Client = lambda host: mock_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer(model="mistral") + meeting_id = MeetingId(uuid4()) + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + await summarizer.summarize(request) + + assert captured_model == "mistral" + + def test_custom_host(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Custom host should be passed to client.""" + captured_host = None + + def capture_client(host: str) -> types.SimpleNamespace: + nonlocal captured_host + captured_host = host + return types.SimpleNamespace( + list=lambda: {}, + chat=lambda **_: {"message": {"content": _valid_json_response()}}, + ) + + mock_module = types.ModuleType("ollama") + mock_module.Client = capture_client + monkeypatch.setitem(sys.modules, "ollama", mock_module) + + from noteflow.infrastructure.summarization import OllamaSummarizer + + summarizer = OllamaSummarizer(host="http://custom:8080") + _ = summarizer.is_available + + assert captured_host == "http://custom:8080" +```` + +## File: tests/infrastructure/triggers/conftest.py +````python +"""Test fixtures for trigger infrastructure tests.""" + +from __future__ import annotations + +import sys +import types +from collections.abc import Callable +from dataclasses import dataclass + +import pytest + + +@dataclass +class DummyWindow: + """Mock window object for pywinctl tests.""" + + title: str | None + + +@pytest.fixture +def mock_pywinctl(monkeypatch: pytest.MonkeyPatch) -> Callable[[str | None], None]: + """Factory fixture to install mocked pywinctl module. + + Usage: + mock_pywinctl("Zoom Meeting") # Window with title + mock_pywinctl(None) # No active window + """ + + def _install(title: str | None) -> None: + window = DummyWindow(title) if title is not None else None + module = types.SimpleNamespace(getActiveWindow=lambda: window) + monkeypatch.setitem(sys.modules, "pywinctl", module) + + return _install + + +@pytest.fixture +def mock_pywinctl_unavailable(monkeypatch: pytest.MonkeyPatch) -> None: + """Install pywinctl mock that raises ImportError.""" + + def raise_import_error() -> None: + msg = "No module named 'pywinctl'" + raise ImportError(msg) + + monkeypatch.setitem(sys.modules, "pywinctl", None) + + +@pytest.fixture +def mock_pywinctl_raises(monkeypatch: pytest.MonkeyPatch) -> None: + """Install pywinctl mock that raises RuntimeError on getActiveWindow.""" + + def raise_runtime_error() -> None: + msg = "No display available" + raise RuntimeError(msg) + + module = types.SimpleNamespace(getActiveWindow=raise_runtime_error) + monkeypatch.setitem(sys.modules, "pywinctl", module) +```` + +## File: tests/infrastructure/triggers/test_audio_activity.py +````python +"""Tests for audio activity trigger provider.""" + +from __future__ import annotations + +import time + +import numpy as np +import pytest + +from noteflow.infrastructure.audio import RmsLevelProvider +from noteflow.infrastructure.triggers.audio_activity import ( + AudioActivityProvider, + AudioActivitySettings, +) + + +def _settings(**overrides: object) -> AudioActivitySettings: + defaults: dict[str, object] = { + "enabled": True, + "threshold_db": -20.0, + "window_seconds": 10.0, + "min_active_ratio": 0.6, + "min_samples": 3, + "max_history": 10, + "weight": 0.3, + } | overrides + return AudioActivitySettings(**defaults) + + +def test_audio_activity_settings_validation() -> None: + """Settings should reject min_samples greater than max_history.""" + with pytest.raises(ValueError, match="min_samples"): + AudioActivitySettings( + enabled=True, + threshold_db=-20.0, + window_seconds=5.0, + min_active_ratio=0.5, + min_samples=11, + max_history=10, + weight=0.3, + ) + + +def test_audio_activity_provider_disabled_ignores_updates() -> None: + """Disabled provider should not emit signals.""" + provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=False)) + frames = np.ones(10, dtype=np.float32) + + provider.update(frames, timestamp=1.0) + + assert provider.get_signal() is None + + +def test_audio_activity_provider_emits_signal(monkeypatch: pytest.MonkeyPatch) -> None: + """Provider emits a signal when sustained activity passes ratio threshold.""" + provider = AudioActivityProvider(RmsLevelProvider(), _settings()) + active = np.ones(10, dtype=np.float32) + inactive = np.zeros(10, dtype=np.float32) + + provider.update(active, timestamp=1.0) + provider.update(active, timestamp=2.0) + provider.update(inactive, timestamp=3.0) + + monkeypatch.setattr(time, "monotonic", lambda: 4.0) + signal = provider.get_signal() + + assert signal is not None + assert signal.weight == pytest.approx(0.3) + + +def test_audio_activity_provider_window_excludes_old_samples( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Samples outside the window should not contribute to activity ratio.""" + provider = AudioActivityProvider(RmsLevelProvider(), _settings(window_seconds=2.0)) + active = np.ones(10, dtype=np.float32) + + provider.update(active, timestamp=1.0) + provider.update(active, timestamp=2.0) + provider.update(active, timestamp=3.0) + + monkeypatch.setattr(time, "monotonic", lambda: 10.0) + assert provider.get_signal() is None + + +def test_audio_activity_provider_source_property() -> None: + """Provider source should be AUDIO_ACTIVITY.""" + from noteflow.domain.triggers.entities import TriggerSource + + provider = AudioActivityProvider(RmsLevelProvider(), _settings()) + assert provider.source == TriggerSource.AUDIO_ACTIVITY + + +def test_audio_activity_provider_max_weight_property() -> None: + """Provider max_weight should reflect configured weight.""" + provider = AudioActivityProvider(RmsLevelProvider(), _settings(weight=0.5)) + assert provider.max_weight == pytest.approx(0.5) + + +def test_audio_activity_provider_is_enabled_reflects_settings() -> None: + """is_enabled should reflect settings.enabled.""" + enabled_provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=True)) + disabled_provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=False)) + + assert enabled_provider.is_enabled() is True + assert disabled_provider.is_enabled() is False + + +def test_audio_activity_provider_clear_history() -> None: + """clear_history should reset the activity history.""" + provider = AudioActivityProvider(RmsLevelProvider(), _settings()) + active = np.ones(10, dtype=np.float32) + + provider.update(active, timestamp=1.0) + provider.update(active, timestamp=2.0) + provider.update(active, timestamp=3.0) + + provider.clear_history() + + # After clearing, signal should be None due to insufficient samples + assert provider.get_signal() is None + + +def test_audio_activity_provider_insufficient_samples() -> None: + """Provider should return None when history has fewer than min_samples.""" + provider = AudioActivityProvider(RmsLevelProvider(), _settings(min_samples=5)) + active = np.ones(10, dtype=np.float32) + + # Add only 3 samples (less than min_samples=5) + provider.update(active, timestamp=1.0) + provider.update(active, timestamp=2.0) + provider.update(active, timestamp=3.0) + + assert provider.get_signal() is None + + +def test_audio_activity_provider_below_activity_ratio() -> None: + """Provider should return None when active ratio < min_active_ratio.""" + provider = AudioActivityProvider(RmsLevelProvider(), _settings(min_active_ratio=0.7)) + active = np.ones(10, dtype=np.float32) + inactive = np.zeros(10, dtype=np.float32) + + # Add 3 active, 7 inactive = 30% active ratio (below 70% threshold) + provider.update(active, timestamp=1.0) + provider.update(active, timestamp=2.0) + provider.update(active, timestamp=3.0) + provider.update(inactive, timestamp=4.0) + provider.update(inactive, timestamp=5.0) + provider.update(inactive, timestamp=6.0) + provider.update(inactive, timestamp=7.0) + provider.update(inactive, timestamp=8.0) + provider.update(inactive, timestamp=9.0) + provider.update(inactive, timestamp=10.0) + + assert provider.get_signal() is None + + +def test_audio_activity_provider_boundary_activity_ratio( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Provider should emit signal when ratio exactly equals min_active_ratio.""" + provider = AudioActivityProvider( + RmsLevelProvider(), + _settings(min_active_ratio=0.6, min_samples=5, max_history=10), + ) + active = np.ones(10, dtype=np.float32) + inactive = np.zeros(10, dtype=np.float32) + + # Add 6 active, 4 inactive = 60% active ratio (exactly at threshold) + provider.update(active, timestamp=1.0) + provider.update(active, timestamp=2.0) + provider.update(active, timestamp=3.0) + provider.update(active, timestamp=4.0) + provider.update(active, timestamp=5.0) + provider.update(active, timestamp=6.0) + provider.update(inactive, timestamp=7.0) + provider.update(inactive, timestamp=8.0) + provider.update(inactive, timestamp=9.0) + provider.update(inactive, timestamp=10.0) + + monkeypatch.setattr(time, "monotonic", lambda: 11.0) + signal = provider.get_signal() + + assert signal is not None + assert signal.weight == pytest.approx(0.3) +```` + +## File: tests/infrastructure/triggers/test_foreground_app.py +````python +"""Tests for foreground app trigger provider.""" + +from __future__ import annotations + +import sys +import types + +import pytest + +from noteflow.domain.triggers.entities import TriggerSource +from noteflow.infrastructure.triggers.foreground_app import ( + ForegroundAppProvider, + ForegroundAppSettings, +) + + +class DummyWindow: + """Mock window object for pywinctl tests.""" + + def __init__(self, title: str | None) -> None: + self.title = title + + +def _install_pywinctl(monkeypatch: pytest.MonkeyPatch, title: str | None) -> None: + """Install mocked pywinctl with specified window title.""" + window = DummyWindow(title) if title is not None else None + module = types.SimpleNamespace(getActiveWindow=lambda: window) + monkeypatch.setitem(sys.modules, "pywinctl", module) + + +def _settings(**overrides: object) -> ForegroundAppSettings: + """Create ForegroundAppSettings with defaults and overrides.""" + defaults: dict[str, object] = { + "enabled": True, + "weight": 0.4, + "meeting_apps": {"zoom"}, + "suppressed_apps": set(), + } | overrides + return ForegroundAppSettings(**defaults) + + +# --- Existing Tests --- + + +def test_foreground_app_provider_emits_signal(monkeypatch: pytest.MonkeyPatch) -> None: + """Provider emits signal when a meeting app is in foreground.""" + _install_pywinctl(monkeypatch, "Zoom Meeting") + provider = ForegroundAppProvider(_settings()) + + signal = provider.get_signal() + + assert signal is not None + assert signal.weight == pytest.approx(0.4) + assert signal.app_name == "Zoom Meeting" + + +def test_foreground_app_provider_suppressed(monkeypatch: pytest.MonkeyPatch) -> None: + """Suppressed apps should not emit signals.""" + _install_pywinctl(monkeypatch, "Zoom Meeting") + provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom"})) + + assert provider.get_signal() is None + + +def test_foreground_app_provider_unavailable(monkeypatch: pytest.MonkeyPatch) -> None: + """Unavailable provider should report disabled.""" + provider = ForegroundAppProvider(_settings()) + monkeypatch.setattr(provider, "_is_available", lambda: False) + + assert provider.is_enabled() is False + + +# --- New Tests --- + + +def test_foreground_app_provider_source_property() -> None: + """Provider source should be FOREGROUND_APP.""" + provider = ForegroundAppProvider(_settings()) + assert provider.source == TriggerSource.FOREGROUND_APP + + +def test_foreground_app_provider_max_weight_property() -> None: + """Provider max_weight should reflect configured weight.""" + provider = ForegroundAppProvider(_settings(weight=0.5)) + assert provider.max_weight == pytest.approx(0.5) + + +def test_foreground_app_settings_lowercases_apps() -> None: + """Settings __post_init__ should lowercase meeting_apps and suppressed_apps.""" + settings = ForegroundAppSettings( + enabled=True, + weight=0.4, + meeting_apps={"ZOOM", "Teams", "GoToMeeting"}, + suppressed_apps={"SLACK", "Discord"}, + ) + + assert "zoom" in settings.meeting_apps + assert "teams" in settings.meeting_apps + assert "gotomeeting" in settings.meeting_apps + assert "slack" in settings.suppressed_apps + assert "discord" in settings.suppressed_apps + # Original case should not be present + assert "ZOOM" not in settings.meeting_apps + assert "SLACK" not in settings.suppressed_apps + + +def test_foreground_app_provider_disabled_returns_none( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Provider should return None when enabled=False.""" + _install_pywinctl(monkeypatch, "Zoom Meeting") + provider = ForegroundAppProvider(_settings(enabled=False)) + + assert provider.get_signal() is None + + +def test_foreground_app_provider_no_window_returns_none( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Provider should return None when getActiveWindow() returns None.""" + _install_pywinctl(monkeypatch, None) + provider = ForegroundAppProvider(_settings()) + # Force availability check to succeed + provider._available = True + + assert provider.get_signal() is None + + +def test_foreground_app_provider_empty_title_returns_none( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Provider should return None when window title is empty string.""" + _install_pywinctl(monkeypatch, "") + provider = ForegroundAppProvider(_settings()) + provider._available = True + + assert provider.get_signal() is None + + +def test_foreground_app_provider_non_meeting_app_returns_none( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Provider should return None when foreground app is not a meeting app.""" + _install_pywinctl(monkeypatch, "Firefox Browser") + provider = ForegroundAppProvider(_settings(meeting_apps={"zoom", "teams"})) + provider._available = True + + assert provider.get_signal() is None + + +def test_foreground_app_provider_suppress_app() -> None: + """suppress_app should add lowercased app to suppressed_apps.""" + provider = ForegroundAppProvider(_settings(suppressed_apps=set())) + + provider.suppress_app("ZOOM") + provider.suppress_app("Teams") + + assert "zoom" in provider.suppressed_apps + assert "teams" in provider.suppressed_apps + + +def test_foreground_app_provider_unsuppress_app() -> None: + """unsuppress_app should remove app from suppressed_apps.""" + provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom", "teams"})) + + provider.unsuppress_app("zoom") + + assert "zoom" not in provider.suppressed_apps + assert "teams" in provider.suppressed_apps + + +def test_foreground_app_provider_add_meeting_app() -> None: + """add_meeting_app should add lowercased app to meeting_apps.""" + provider = ForegroundAppProvider(_settings(meeting_apps={"zoom"})) + + provider.add_meeting_app("WEBEX") + provider.add_meeting_app("RingCentral") + + assert "webex" in provider._settings.meeting_apps + assert "ringcentral" in provider._settings.meeting_apps + + +def test_foreground_app_provider_suppressed_apps_property() -> None: + """suppressed_apps property should return frozenset.""" + provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom", "teams"})) + + result = provider.suppressed_apps + + assert isinstance(result, frozenset) + assert "zoom" in result + assert "teams" in result + + +def test_foreground_app_provider_case_insensitive_matching( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Provider should match meeting apps case-insensitively.""" + _install_pywinctl(monkeypatch, "ZOOM MEETING - Conference Room") + provider = ForegroundAppProvider(_settings(meeting_apps={"zoom"})) + provider._available = True + + signal = provider.get_signal() + + assert signal is not None + assert signal.app_name == "ZOOM MEETING - Conference Room" + + +def test_foreground_app_provider_is_enabled_when_enabled_and_available( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """is_enabled should return True when both enabled and available.""" + _install_pywinctl(monkeypatch, "Some Window") + provider = ForegroundAppProvider(_settings(enabled=True)) + + assert provider.is_enabled() is True +```` + +## File: tests/infrastructure/__init__.py +````python +"""Infrastructure tests package.""" +```` + +## File: tests/infrastructure/test_converters.py +````python +"""Tests for infrastructure converters.""" + +from __future__ import annotations + +from noteflow.domain import entities +from noteflow.infrastructure.asr import dto +from noteflow.infrastructure.converters import AsrConverter, OrmConverter + + +class TestAsrConverter: + """Tests for AsrConverter.""" + + def test_word_timing_to_domain_maps_field_names(self) -> None: + """Test ASR start/end maps to domain start_time/end_time.""" + asr_word = dto.WordTiming(word="hello", start=1.5, end=2.0, probability=0.95) + + result = AsrConverter.word_timing_to_domain(asr_word) + + assert result.word == "hello" + assert result.start_time == 1.5 + assert result.end_time == 2.0 + assert result.probability == 0.95 + + def test_word_timing_to_domain_preserves_precision(self) -> None: + """Test timing values preserve floating point precision.""" + asr_word = dto.WordTiming( + word="test", + start=0.123456789, + end=0.987654321, + probability=0.999999, + ) + + result = AsrConverter.word_timing_to_domain(asr_word) + + assert result.start_time == 0.123456789 + assert result.end_time == 0.987654321 + assert result.probability == 0.999999 + + def test_word_timing_to_domain_returns_domain_type(self) -> None: + """Test converter returns domain WordTiming type.""" + asr_word = dto.WordTiming(word="test", start=1.0, end=2.0, probability=0.9) + + result = AsrConverter.word_timing_to_domain(asr_word) + + assert isinstance(result, entities.WordTiming) + + def test_result_to_domain_words_converts_all(self) -> None: + """Test batch conversion of ASR result words.""" + asr_result = dto.AsrResult( + text="hello world", + start=0.0, + end=2.0, + words=( + dto.WordTiming(word="hello", start=0.0, end=1.0, probability=0.9), + dto.WordTiming(word="world", start=1.0, end=2.0, probability=0.95), + ), + ) + + words = AsrConverter.result_to_domain_words(asr_result) + + assert len(words) == 2 + assert words[0].word == "hello" + assert words[0].start_time == 0.0 + assert words[1].word == "world" + assert words[1].start_time == 1.0 + + def test_result_to_domain_words_empty(self) -> None: + """Test conversion with empty words tuple.""" + asr_result = dto.AsrResult(text="", start=0.0, end=0.0, words=()) + + words = AsrConverter.result_to_domain_words(asr_result) + + assert words == [] + + +class TestOrmConverterToOrmKwargs: + """Tests for OrmConverter.word_timing_to_orm_kwargs.""" + + def test_converts_to_dict(self) -> None: + """Test domain to ORM kwargs conversion.""" + word = entities.WordTiming( + word="test", + start_time=1.5, + end_time=2.0, + probability=0.9, + ) + + result = OrmConverter.word_timing_to_orm_kwargs(word) + + assert result == { + "word": "test", + "start_time": 1.5, + "end_time": 2.0, + "probability": 0.9, + } + + def test_preserves_precision(self) -> None: + """Test floating point precision in kwargs.""" + word = entities.WordTiming( + word="precise", + start_time=0.123456789, + end_time=0.987654321, + probability=0.111111, + ) + + result = OrmConverter.word_timing_to_orm_kwargs(word) + + assert result["start_time"] == 0.123456789 + assert result["end_time"] == 0.987654321 + assert result["probability"] == 0.111111 +```` + +## File: tests/integration/__init__.py +````python +"""Integration tests using testcontainers.""" +```` + +## File: tests/integration/test_unit_of_work.py +````python +"""Integration tests for SqlAlchemyUnitOfWork.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import TYPE_CHECKING + +import pytest + +from noteflow.domain.entities import Meeting, Segment, Summary +from noteflow.domain.value_objects import MeetingState +from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + +@pytest.mark.integration +class TestUnitOfWork: + """Integration tests for SqlAlchemyUnitOfWork.""" + + async def test_uow_context_manager( + self, session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Test UoW works as async context manager.""" + async with SqlAlchemyUnitOfWork(session_factory) as uow: + assert uow.meetings is not None + assert uow.segments is not None + assert uow.summaries is not None + + async def test_uow_commit(self, session_factory: async_sessionmaker[AsyncSession]) -> None: + """Test UoW commit persists changes.""" + meeting = Meeting.create(title="Commit Test") + + async with SqlAlchemyUnitOfWork(session_factory) as uow: + await uow.meetings.create(meeting) + await uow.commit() + + # Verify in new UoW + async with SqlAlchemyUnitOfWork(session_factory) as uow: + retrieved = await uow.meetings.get(meeting.id) + assert retrieved is not None + assert retrieved.title == "Commit Test" + + async def test_uow_rollback(self, session_factory: async_sessionmaker[AsyncSession]) -> None: + """Test UoW rollback discards changes.""" + meeting = Meeting.create(title="Rollback Test") + + async with SqlAlchemyUnitOfWork(session_factory) as uow: + await uow.meetings.create(meeting) + await uow.rollback() + + # Verify not persisted + async with SqlAlchemyUnitOfWork(session_factory) as uow: + retrieved = await uow.meetings.get(meeting.id) + assert retrieved is None + + async def test_uow_auto_rollback_on_exception( + self, session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Test UoW auto-rollbacks on exception.""" + meeting = Meeting.create(title="Exception Test") + + with pytest.raises(ValueError, match="Test exception"): + async with SqlAlchemyUnitOfWork(session_factory) as uow: + await uow.meetings.create(meeting) + raise ValueError("Test exception") + + # Verify not persisted + async with SqlAlchemyUnitOfWork(session_factory) as uow: + retrieved = await uow.meetings.get(meeting.id) + assert retrieved is None + + async def test_uow_transactional_consistency( + self, session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Test UoW provides transactional consistency across repos.""" + meeting = Meeting.create(title="Transactional Test") + segment = Segment( + segment_id=0, + text="Hello", + start_time=0.0, + end_time=1.0, + meeting_id=meeting.id, + ) + summary = Summary( + meeting_id=meeting.id, + executive_summary="Test summary", + generated_at=datetime.now(UTC), + ) + + # Create meeting, segment, and summary in same transaction + async with SqlAlchemyUnitOfWork(session_factory) as uow: + await uow.meetings.create(meeting) + await uow.segments.add(meeting.id, segment) + await uow.summaries.save(summary) + await uow.commit() + + # Verify all persisted + async with SqlAlchemyUnitOfWork(session_factory) as uow: + m = await uow.meetings.get(meeting.id) + segs = await uow.segments.get_by_meeting(meeting.id) + s = await uow.summaries.get_by_meeting(meeting.id) + + assert m is not None + assert len(segs) == 1 + assert s is not None + + async def test_uow_repository_caching( + self, session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Test UoW caches repository instances.""" + async with SqlAlchemyUnitOfWork(session_factory) as uow: + meetings1 = uow.meetings + meetings2 = uow.meetings + assert meetings1 is meetings2 + + segments1 = uow.segments + segments2 = uow.segments + assert segments1 is segments2 + + async def test_uow_multiple_operations( + self, session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Test UoW handles multiple operations in sequence.""" + meeting = Meeting.create(title="Multi-op Test") + + async with SqlAlchemyUnitOfWork(session_factory) as uow: + # Create + await uow.meetings.create(meeting) + await uow.commit() + + # Update + meeting.start_recording() + await uow.meetings.update(meeting) + await uow.commit() + + # Add segment + segment = Segment(segment_id=0, text="Test", start_time=0.0, end_time=1.0) + await uow.segments.add(meeting.id, segment) + await uow.commit() + + # Verify final state + async with SqlAlchemyUnitOfWork(session_factory) as uow: + m = await uow.meetings.get(meeting.id) + segs = await uow.segments.get_by_meeting(meeting.id) + + assert m is not None + assert m.state == MeetingState.RECORDING + assert len(segs) == 1 +```` + +## File: tests/__init__.py +````python +"""NoteFlow test suite.""" +```` + +## File: tests/conftest.py +````python +"""Global test fixtures to mock optional extra dependencies. + +These stubs allow running the suite without installing heavy/optional packages +like openai/anthropic/ollama/pywinctl, while individual tests can still +override with more specific monkeypatches when needed. +""" + +from __future__ import annotations + +import sys +import types +from types import SimpleNamespace + +import pytest + + +@pytest.fixture(autouse=True, scope="session") +def mock_optional_extras() -> None: + """Install lightweight stubs for optional extra deps if absent.""" + + if "openai" not in sys.modules: + try: + import openai as _openai # noqa: F401 + except ImportError: + + def _default_create(**_: object) -> SimpleNamespace: + return SimpleNamespace( + choices=[SimpleNamespace(message=SimpleNamespace(content="{}"))], + usage=SimpleNamespace(total_tokens=0), + ) + + openai_module = types.ModuleType("openai") + openai_module.OpenAI = lambda **kwargs: SimpleNamespace( + chat=SimpleNamespace(completions=SimpleNamespace(create=_default_create)) + ) + sys.modules["openai"] = openai_module + + if "anthropic" not in sys.modules: + try: + import anthropic as _anthropic # noqa: F401 + except ImportError: + + def _default_messages_create(**_: object) -> SimpleNamespace: + return SimpleNamespace( + content=[SimpleNamespace(text="{}")], + usage=SimpleNamespace(input_tokens=0, output_tokens=0), + ) + + anthropic_module = types.ModuleType("anthropic") + anthropic_module.Anthropic = lambda **kwargs: SimpleNamespace( + messages=SimpleNamespace(create=_default_messages_create) + ) + sys.modules["anthropic"] = anthropic_module + + if "ollama" not in sys.modules: + try: + import ollama as _ollama # noqa: F401 + except ImportError: + + def _default_chat(**_: object) -> dict[str, object]: + return { + "message": { + "content": '{"executive_summary": "", "key_points": [], "action_items": []}' + }, + "eval_count": 0, + "prompt_eval_count": 0, + } + + ollama_module = types.ModuleType("ollama") + ollama_module.Client = lambda **kwargs: SimpleNamespace( + list=lambda: {}, chat=_default_chat + ) + sys.modules["ollama"] = ollama_module + + # pywinctl depends on pymonctl, which may fail in headless environments + # Mock both if not already present + if "pymonctl" not in sys.modules: + try: + import pymonctl as _pymonctl # noqa: F401 + except Exception: + # Mock pymonctl for headless environments (Xlib.error.DisplayNameError, etc.) + pymonctl_module = types.ModuleType("pymonctl") + pymonctl_module.getAllMonitors = lambda: [] + sys.modules["pymonctl"] = pymonctl_module + + if "pywinctl" not in sys.modules: + try: + import pywinctl as _pywinctl # noqa: F401 + except Exception: + # ImportError: package not installed + # OSError/Xlib errors: pywinctl may fail in headless environments + pywinctl_module = types.ModuleType("pywinctl") + pywinctl_module.getActiveWindow = lambda: None + pywinctl_module.getAllWindows = lambda: [] + pywinctl_module.getAllTitles = lambda: [] + sys.modules["pywinctl"] = pywinctl_module +```` + +## File: src/noteflow/application/services/meeting_service.py +````python +"""Meeting application service. + +Orchestrates meeting-related use cases with persistence. +""" + +from __future__ import annotations + +import logging +import shutil +from collections.abc import Sequence +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING + +from noteflow.domain.entities import ( + ActionItem, + Annotation, + KeyPoint, + Meeting, + Segment, + Summary, + WordTiming, +) +from noteflow.domain.value_objects import AnnotationId, AnnotationType + +if TYPE_CHECKING: + from collections.abc import Sequence as SequenceType + + from noteflow.domain.ports.unit_of_work import UnitOfWork + from noteflow.domain.value_objects import MeetingId, MeetingState + +logger = logging.getLogger(__name__) + + +class MeetingService: + """Application service for meeting operations. + + Provides use cases for managing meetings, segments, and summaries. + All methods are async and expect a UnitOfWork to be provided. + """ + + def __init__(self, uow: UnitOfWork) -> None: + """Initialize the meeting service. + + Args: + uow: Unit of work for persistence. + """ + self._uow = uow + + async def create_meeting( + self, + title: str, + metadata: dict[str, str] | None = None, + ) -> Meeting: + """Create a new meeting. + + Args: + title: Meeting title. + metadata: Optional metadata. + + Returns: + Created meeting. + """ + meeting = Meeting.create(title=title, metadata=metadata or {}) + + async with self._uow: + saved = await self._uow.meetings.create(meeting) + await self._uow.commit() + return saved + + async def get_meeting(self, meeting_id: MeetingId) -> Meeting | None: + """Get a meeting by ID. + + Args: + meeting_id: Meeting identifier. + + Returns: + Meeting if found, None otherwise. + """ + async with self._uow: + return await self._uow.meetings.get(meeting_id) + + async def list_meetings( + self, + states: list[MeetingState] | None = None, + limit: int = 100, + offset: int = 0, + sort_desc: bool = True, + ) -> tuple[Sequence[Meeting], int]: + """List meetings with optional filtering. + + Args: + states: Optional list of states to filter by. + limit: Maximum number of meetings to return. + offset: Number of meetings to skip. + sort_desc: Sort by created_at descending if True. + + Returns: + Tuple of (meetings list, total count). + """ + async with self._uow: + return await self._uow.meetings.list_all( + states=states, + limit=limit, + offset=offset, + sort_desc=sort_desc, + ) + + async def start_recording(self, meeting_id: MeetingId) -> Meeting | None: + """Start recording a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + Updated meeting, or None if not found. + """ + async with self._uow: + meeting = await self._uow.meetings.get(meeting_id) + if meeting is None: + return None + + meeting.start_recording() + await self._uow.meetings.update(meeting) + await self._uow.commit() + return meeting + + async def stop_meeting(self, meeting_id: MeetingId) -> Meeting | None: + """Stop a meeting through graceful STOPPING state. + + Transitions: RECORDING -> STOPPING -> STOPPED + + Args: + meeting_id: Meeting identifier. + + Returns: + Updated meeting, or None if not found. + """ + async with self._uow: + meeting = await self._uow.meetings.get(meeting_id) + if meeting is None: + return None + + # Graceful shutdown: RECORDING -> STOPPING -> STOPPED + meeting.begin_stopping() + meeting.stop_recording() + await self._uow.meetings.update(meeting) + await self._uow.commit() + return meeting + + async def complete_meeting(self, meeting_id: MeetingId) -> Meeting | None: + """Mark a meeting as completed. + + Args: + meeting_id: Meeting identifier. + + Returns: + Updated meeting, or None if not found. + """ + async with self._uow: + meeting = await self._uow.meetings.get(meeting_id) + if meeting is None: + return None + + meeting.complete() + await self._uow.meetings.update(meeting) + await self._uow.commit() + return meeting + + async def delete_meeting( + self, + meeting_id: MeetingId, + meetings_dir: Path | None = None, + ) -> bool: + """Delete meeting with complete cleanup. + + Removes: + 1. Filesystem assets (audio, manifest) if meetings_dir provided + 2. Database records (cascade deletes children) + + Args: + meeting_id: Meeting identifier. + meetings_dir: Base directory for meeting assets. + + Returns: + True if deleted, False if not found. + """ + async with self._uow: + meeting = await self._uow.meetings.get(meeting_id) + if meeting is None: + return False + + # Delete filesystem assets first (if directory provided) + if meetings_dir is not None: + meeting_dir = meetings_dir / str(meeting_id) + if meeting_dir.exists(): + shutil.rmtree(meeting_dir) + logger.info( + "Deleted meeting assets at %s", + meeting_dir, + ) + + # Delete DB record (cascade handles children) + success = await self._uow.meetings.delete(meeting_id) + if success: + await self._uow.commit() + logger.info("Deleted meeting %s", meeting_id) + + return success + + async def add_segment( + self, + meeting_id: MeetingId, + segment_id: int, + text: str, + start_time: float, + end_time: float, + words: list[WordTiming] | None = None, + language: str = "en", + language_confidence: float = 0.0, + avg_logprob: float = 0.0, + no_speech_prob: float = 0.0, + ) -> Segment: + """Add a transcript segment to a meeting. + + Args: + meeting_id: Meeting identifier. + segment_id: Segment sequence number. + text: Transcript text. + start_time: Start time in seconds. + end_time: End time in seconds. + words: Optional word-level timing. + language: Detected language code. + language_confidence: Language detection confidence. + avg_logprob: Average log probability. + no_speech_prob: No-speech probability. + + Returns: + Added segment. + """ + segment = Segment( + segment_id=segment_id, + text=text, + start_time=start_time, + end_time=end_time, + meeting_id=meeting_id, + words=words or [], + language=language, + language_confidence=language_confidence, + avg_logprob=avg_logprob, + no_speech_prob=no_speech_prob, + ) + + async with self._uow: + saved = await self._uow.segments.add(meeting_id, segment) + await self._uow.commit() + return saved + + async def add_segments_batch( + self, + meeting_id: MeetingId, + segments: Sequence[Segment], + ) -> Sequence[Segment]: + """Add multiple segments in batch. + + Args: + meeting_id: Meeting identifier. + segments: Segments to add. + + Returns: + Added segments. + """ + async with self._uow: + saved = await self._uow.segments.add_batch(meeting_id, segments) + await self._uow.commit() + return saved + + async def get_segments( + self, + meeting_id: MeetingId, + include_words: bool = True, + ) -> Sequence[Segment]: + """Get all segments for a meeting. + + Args: + meeting_id: Meeting identifier. + include_words: Include word-level timing. + + Returns: + List of segments ordered by segment_id. + """ + async with self._uow: + return await self._uow.segments.get_by_meeting( + meeting_id, + include_words=include_words, + ) + + async def search_segments( + self, + query_embedding: list[float], + limit: int = 10, + meeting_id: MeetingId | None = None, + ) -> Sequence[tuple[Segment, float]]: + """Search segments by semantic similarity. + + Args: + query_embedding: Query embedding vector. + limit: Maximum number of results. + meeting_id: Optional meeting to restrict search to. + + Returns: + List of (segment, similarity_score) tuples. + """ + async with self._uow: + return await self._uow.segments.search_semantic( + query_embedding=query_embedding, + limit=limit, + meeting_id=meeting_id, + ) + + async def save_summary( + self, + meeting_id: MeetingId, + executive_summary: str, + key_points: list[KeyPoint] | None = None, + action_items: list[ActionItem] | None = None, + model_version: str = "", + ) -> Summary: + """Save or update a meeting summary. + + Args: + meeting_id: Meeting identifier. + executive_summary: Executive summary text. + key_points: List of key points. + action_items: List of action items. + model_version: Model version that generated the summary. + + Returns: + Saved summary. + """ + summary = Summary( + meeting_id=meeting_id, + executive_summary=executive_summary, + key_points=key_points or [], + action_items=action_items or [], + generated_at=datetime.now(UTC), + model_version=model_version, + ) + + async with self._uow: + saved = await self._uow.summaries.save(summary) + await self._uow.commit() + return saved + + async def get_summary(self, meeting_id: MeetingId) -> Summary | None: + """Get summary for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + Summary if exists, None otherwise. + """ + async with self._uow: + return await self._uow.summaries.get_by_meeting(meeting_id) + + # Annotation methods + + async def add_annotation( + self, + meeting_id: MeetingId, + annotation_type: AnnotationType, + text: str, + start_time: float, + end_time: float, + segment_ids: list[int] | None = None, + ) -> Annotation: + """Add an annotation to a meeting. + + Args: + meeting_id: Meeting identifier. + annotation_type: Type of annotation. + text: Annotation text. + start_time: Start time in seconds. + end_time: End time in seconds. + segment_ids: Optional list of linked segment IDs. + + Returns: + Added annotation. + """ + from uuid import uuid4 + + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=meeting_id, + annotation_type=annotation_type, + text=text, + start_time=start_time, + end_time=end_time, + segment_ids=segment_ids or [], + ) + + async with self._uow: + saved = await self._uow.annotations.add(annotation) + await self._uow.commit() + return saved + + async def get_annotation(self, annotation_id: AnnotationId) -> Annotation | None: + """Get an annotation by ID. + + Args: + annotation_id: Annotation identifier. + + Returns: + Annotation if found, None otherwise. + """ + async with self._uow: + return await self._uow.annotations.get(annotation_id) + + async def get_annotations( + self, + meeting_id: MeetingId, + ) -> SequenceType[Annotation]: + """Get all annotations for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + List of annotations ordered by start_time. + """ + async with self._uow: + return await self._uow.annotations.get_by_meeting(meeting_id) + + async def get_annotations_in_range( + self, + meeting_id: MeetingId, + start_time: float, + end_time: float, + ) -> SequenceType[Annotation]: + """Get annotations within a time range. + + Args: + meeting_id: Meeting identifier. + start_time: Start of time range in seconds. + end_time: End of time range in seconds. + + Returns: + List of annotations overlapping the time range. + """ + async with self._uow: + return await self._uow.annotations.get_by_time_range(meeting_id, start_time, end_time) + + async def update_annotation(self, annotation: Annotation) -> Annotation: + """Update an existing annotation. + + Args: + annotation: Annotation with updated fields. + + Returns: + Updated annotation. + + Raises: + ValueError: If annotation does not exist. + """ + async with self._uow: + updated = await self._uow.annotations.update(annotation) + await self._uow.commit() + return updated + + async def delete_annotation(self, annotation_id: AnnotationId) -> bool: + """Delete an annotation. + + Args: + annotation_id: Annotation identifier. + + Returns: + True if deleted, False if not found. + """ + async with self._uow: + success = await self._uow.annotations.delete(annotation_id) + if success: + await self._uow.commit() + return success +```` + +## File: src/noteflow/application/services/retention_service.py +````python +"""Service for automatic meeting retention and cleanup.""" + +from __future__ import annotations + +import logging +from collections.abc import Callable +from dataclasses import dataclass +from datetime import UTC, datetime, timedelta +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from noteflow.domain.entities import Meeting + from noteflow.domain.ports.unit_of_work import UnitOfWork + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class RetentionReport: + """Result of retention cleanup run. + + Attributes: + meetings_checked: Number of meetings that matched cutoff criteria. + meetings_deleted: Number of meetings successfully deleted. + errors: List of error messages for failed deletions. + """ + + meetings_checked: int + meetings_deleted: int + errors: tuple[str, ...] + + +class RetentionService: + """Manage automatic deletion of expired meetings. + + Find and delete meetings that have been completed longer than + the configured retention period. + """ + + def __init__( + self, + uow_factory: Callable[[], UnitOfWork], + retention_days: int, + meetings_dir: Path | None = None, + enabled: bool = False, + ) -> None: + """Initialize retention service. + + Args: + uow_factory: Factory that returns a fresh UnitOfWork instance per call. + retention_days: Days to retain completed meetings. + meetings_dir: Base directory for meeting assets. + enabled: Whether retention is enabled. + """ + self._uow_factory = uow_factory + self._retention_days = retention_days + self._meetings_dir = meetings_dir + self._enabled = enabled + + @property + def is_enabled(self) -> bool: + """Check if retention is enabled.""" + return self._enabled + + @property + def retention_days(self) -> int: + """Get configured retention days.""" + return self._retention_days + + @property + def cutoff_date(self) -> datetime: + """Calculate cutoff date for retention.""" + return datetime.now(UTC) - timedelta(days=self._retention_days) + + async def find_expired_meetings(self) -> list[Meeting]: + """Find meetings older than retention period. + + Returns: + List of meetings eligible for deletion. + """ + uow = self._uow_factory() + async with uow: + return list(await uow.meetings.find_older_than(self.cutoff_date)) + + async def run_cleanup(self, dry_run: bool = False) -> RetentionReport: + """Execute retention cleanup. + + Args: + dry_run: If True, report but don't delete. + + Returns: + Report of cleanup results. + """ + if not self._enabled and not dry_run: + logger.info("Retention disabled, skipping cleanup") + return RetentionReport( + meetings_checked=0, + meetings_deleted=0, + errors=(), + ) + + cutoff = self.cutoff_date + logger.info( + "Running retention cleanup (dry_run=%s, cutoff=%s)", + dry_run, + cutoff.isoformat(), + ) + + expired = await self.find_expired_meetings() + deleted = 0 + errors: list[str] = [] + + for meeting in expired: + if dry_run: + logger.info( + "Would delete expired meeting: id=%s, ended_at=%s", + meeting.id, + meeting.ended_at, + ) + continue + + try: + # Import here to avoid circular imports + from noteflow.application.services import MeetingService + + # Use a fresh UnitOfWork instance for each deletion + meeting_svc = MeetingService(self._uow_factory()) + success = await meeting_svc.delete_meeting( + meeting.id, + meetings_dir=self._meetings_dir, + ) + if success: + deleted += 1 + logger.info( + "Deleted expired meeting: id=%s", + meeting.id, + ) + except Exception as e: + error_msg = f"{meeting.id}: {e}" + errors.append(error_msg) + logger.warning("Failed to delete meeting %s: %s", meeting.id, e) + + logger.info( + "Retention cleanup complete: checked=%d, deleted=%d, errors=%d", + len(expired), + deleted, + len(errors), + ) + + return RetentionReport( + meetings_checked=len(expired), + meetings_deleted=deleted, + errors=tuple(errors), + ) +```` + +## File: src/noteflow/application/services/summarization_service.py +````python +"""Summarization orchestration service. + +Coordinate provider selection, consent handling, and citation verification. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from enum import Enum +from typing import TYPE_CHECKING + +from noteflow.domain.summarization import ( + CitationVerificationResult, + ProviderUnavailableError, + SummarizationRequest, + SummarizationResult, +) + +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable, Sequence + + from noteflow.domain.entities import Segment, Summary + from noteflow.domain.summarization import CitationVerifier, SummarizerProvider + from noteflow.domain.value_objects import MeetingId + + # Type alias for persistence callback + PersistCallback = Callable[[Summary], Awaitable[None]] + +logger = logging.getLogger(__name__) + + +class SummarizationMode(Enum): + """Available summarization modes.""" + + MOCK = "mock" + LOCAL = "local" # Ollama + CLOUD = "cloud" # OpenAI/Anthropic + + +@dataclass +class SummarizationServiceSettings: + """Configuration for summarization service. + + Attributes: + default_mode: Default summarization mode. + cloud_consent_granted: Whether user has consented to cloud processing. + fallback_to_local: Fall back to local if cloud unavailable. + verify_citations: Whether to verify citations after summarization. + filter_invalid_citations: Remove invalid citations from result. + max_key_points: Default maximum key points. + max_action_items: Default maximum action items. + """ + + default_mode: SummarizationMode = SummarizationMode.LOCAL + cloud_consent_granted: bool = False + fallback_to_local: bool = True + verify_citations: bool = True + filter_invalid_citations: bool = True + max_key_points: int = 5 + max_action_items: int = 10 + + +@dataclass +class SummarizationServiceResult: + """Result from summarization service. + + Attributes: + result: The raw summarization result from the provider. + verification: Citation verification result (if verification enabled). + filtered_summary: Summary with invalid citations removed (if filtering enabled). + provider_used: Which provider was actually used. + fallback_used: Whether a fallback provider was used. + """ + + result: SummarizationResult + verification: CitationVerificationResult | None = None + filtered_summary: Summary | None = None + provider_used: str = "" + fallback_used: bool = False + + @property + def summary(self) -> Summary: + """Get the best available summary (filtered if available).""" + return self.filtered_summary or self.result.summary + + @property + def has_invalid_citations(self) -> bool: + """Check if summary has invalid citations.""" + return self.verification is not None and not self.verification.is_valid + + +@dataclass +class SummarizationService: + """Orchestrate summarization with provider selection and citation verification. + + Manages provider selection based on mode and availability, handles + cloud consent requirements, and verifies/filters citation integrity. + """ + + providers: dict[SummarizationMode, SummarizerProvider] = field(default_factory=dict) + verifier: CitationVerifier | None = None + settings: SummarizationServiceSettings = field(default_factory=SummarizationServiceSettings) + on_persist: PersistCallback | None = None + + def register_provider(self, mode: SummarizationMode, provider: SummarizerProvider) -> None: + """Register a provider for a specific mode. + + Args: + mode: The mode this provider handles. + provider: The provider implementation. + """ + self.providers[mode] = provider + logger.debug("Registered %s provider: %s", mode.value, provider.provider_name) + + def set_verifier(self, verifier: CitationVerifier) -> None: + """Set the citation verifier. + + Args: + verifier: Citation verifier implementation. + """ + self.verifier = verifier + + def get_available_modes(self) -> list[SummarizationMode]: + """Get list of currently available summarization modes. + + Returns: + List of available modes based on registered providers. + """ + available = [] + for mode, provider in self.providers.items(): + if mode == SummarizationMode.CLOUD: + if provider.is_available and self.settings.cloud_consent_granted: + available.append(mode) + elif provider.is_available: + available.append(mode) + return available + + def is_mode_available(self, mode: SummarizationMode) -> bool: + """Check if a specific mode is available. + + Args: + mode: The mode to check. + + Returns: + True if mode is available. + """ + return mode in self.get_available_modes() + + def grant_cloud_consent(self) -> None: + """Grant consent for cloud processing.""" + self.settings.cloud_consent_granted = True + logger.info("Cloud consent granted") + + def revoke_cloud_consent(self) -> None: + """Revoke consent for cloud processing.""" + self.settings.cloud_consent_granted = False + logger.info("Cloud consent revoked") + + async def summarize( + self, + meeting_id: MeetingId, + segments: Sequence[Segment], + mode: SummarizationMode | None = None, + max_key_points: int | None = None, + max_action_items: int | None = None, + ) -> SummarizationServiceResult: + """Generate evidence-linked summary for meeting transcript. + + Args: + meeting_id: The meeting ID. + segments: Transcript segments to summarize. + mode: Override default mode (None uses settings default). + max_key_points: Override default max key points. + max_action_items: Override default max action items. + + Returns: + SummarizationServiceResult with summary and verification. + + Raises: + SummarizationError: If summarization fails and no fallback available. + ProviderUnavailableError: If no provider is available for the mode. + """ + target_mode = mode or self.settings.default_mode + fallback_used = False + + # Get provider, potentially with fallback + provider, actual_mode = self._get_provider_with_fallback(target_mode) + if actual_mode != target_mode: + fallback_used = True + logger.info( + "Falling back from %s to %s mode", + target_mode.value, + actual_mode.value, + ) + + # Build request + request = SummarizationRequest( + meeting_id=meeting_id, + segments=segments, + max_key_points=max_key_points or self.settings.max_key_points, + max_action_items=max_action_items or self.settings.max_action_items, + ) + + # Execute summarization + logger.info( + "Summarizing %d segments with %s provider", + len(segments), + provider.provider_name, + ) + result = await provider.summarize(request) + + # Build service result + service_result = SummarizationServiceResult( + result=result, + provider_used=provider.provider_name, + fallback_used=fallback_used, + ) + + # Verify citations if enabled + if self.settings.verify_citations and self.verifier is not None: + verification = self.verifier.verify_citations(result.summary, list(segments)) + service_result.verification = verification + + if not verification.is_valid: + logger.warning( + "Summary has %d invalid citations", + verification.invalid_count, + ) + + # Filter if enabled + if self.settings.filter_invalid_citations: + service_result.filtered_summary = self._filter_citations( + result.summary, list(segments) + ) + + # Persist summary if callback provided + if self.on_persist is not None: + await self.on_persist(service_result.summary) + logger.debug("Summary persisted for meeting %s", meeting_id) + + return service_result + + def _get_provider_with_fallback( + self, mode: SummarizationMode + ) -> tuple[SummarizerProvider, SummarizationMode]: + """Get provider for mode, with fallback if unavailable. + + Args: + mode: Requested mode. + + Returns: + Tuple of (provider, actual_mode). + + Raises: + ProviderUnavailableError: If no provider available. + """ + # Check requested mode + if mode in self.providers: + provider = self.providers[mode] + + # Check cloud consent + if mode == SummarizationMode.CLOUD and not self.settings.cloud_consent_granted: + logger.warning("Cloud mode requested but consent not granted") + if self.settings.fallback_to_local: + return self._get_fallback_provider(mode) + raise ProviderUnavailableError("Cloud consent not granted") + + if provider.is_available: + return provider, mode + + # Provider exists but unavailable + if self.settings.fallback_to_local and mode != SummarizationMode.MOCK: + return self._get_fallback_provider(mode) + + raise ProviderUnavailableError(f"No provider available for mode: {mode.value}") + + def _get_fallback_provider( + self, original_mode: SummarizationMode + ) -> tuple[SummarizerProvider, SummarizationMode]: + """Get fallback provider when primary unavailable. + + Fallback order: LOCAL -> MOCK + + Args: + original_mode: The mode that was unavailable. + + Returns: + Tuple of (provider, mode). + + Raises: + ProviderUnavailableError: If no fallback available. + """ + fallback_order = [SummarizationMode.LOCAL, SummarizationMode.MOCK] + + for fallback_mode in fallback_order: + if fallback_mode == original_mode: + continue + if fallback_mode in self.providers: + provider = self.providers[fallback_mode] + if provider.is_available: + return provider, fallback_mode + + raise ProviderUnavailableError("No fallback provider available") + + def _filter_citations(self, summary: Summary, segments: list[Segment]) -> Summary: + """Filter invalid citations from summary. + + Args: + summary: Summary to filter. + segments: Available segments. + + Returns: + Summary with invalid citations removed. + """ + if self.verifier is None: + return summary + + # Use verifier's filter method if available + if hasattr(self.verifier, "filter_invalid_citations"): + return self.verifier.filter_invalid_citations(summary, segments) + + return summary + + def set_default_mode(self, mode: SummarizationMode) -> None: + """Set the default summarization mode. + + Args: + mode: New default mode. + """ + self.settings.default_mode = mode + logger.info("Default summarization mode set to %s", mode.value) + + def set_persist_callback(self, callback: PersistCallback | None) -> None: + """Set callback for persisting summaries after generation. + + Args: + callback: Async function that persists a Summary, or None to disable. + """ + self.on_persist = callback +```` + +## File: src/noteflow/cli/retention.py +````python +"""CLI command for retention cleanup. + +Usage: + python -m noteflow.cli.retention cleanup [--dry-run] + python -m noteflow.cli.retention status +""" + +from __future__ import annotations + +import argparse +import asyncio +import logging +import sys + +from noteflow.application.services import RetentionService +from noteflow.config.settings import get_settings +from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +async def _run_cleanup(dry_run: bool) -> int: + """Execute retention cleanup. + + Args: + dry_run: If True, report but don't delete. + + Returns: + Exit code (0 for success, 1 for errors). + """ + settings = get_settings() + + if not settings.retention_enabled and not dry_run: + logger.warning( + "Retention is disabled. Set NOTEFLOW_RETENTION_ENABLED=true or use --dry-run" + ) + return 1 + + uow_factory = SqlAlchemyUnitOfWork.factory_from_settings(settings) + service = RetentionService( + uow_factory=uow_factory, + retention_days=settings.retention_days, + meetings_dir=settings.meetings_dir, + enabled=settings.retention_enabled, + ) + + logger.info( + "Running retention cleanup (dry_run=%s, retention_days=%d, cutoff=%s)", + dry_run, + service.retention_days, + service.cutoff_date.isoformat(), + ) + + report = await service.run_cleanup(dry_run=dry_run) + + print("\nRetention Cleanup Report:") + print(f" Meetings checked: {report.meetings_checked}") + print(f" Meetings deleted: {report.meetings_deleted}") + + if report.errors: + print(f" Errors: {len(report.errors)}") + for err in report.errors: + print(f" - {err}") + return 1 + + return 0 + + +async def _show_status() -> int: + """Show retention status and pending deletions. + + Returns: + Exit code (always 0). + """ + settings = get_settings() + + uow_factory = SqlAlchemyUnitOfWork.factory_from_settings(settings) + service = RetentionService( + uow_factory=uow_factory, + retention_days=settings.retention_days, + meetings_dir=settings.meetings_dir, + enabled=settings.retention_enabled, + ) + + expired = await service.find_expired_meetings() + + print("\nRetention Status:") + print(f" Enabled: {settings.retention_enabled}") + print(f" Retention days: {settings.retention_days}") + print(f" Check interval: {settings.retention_check_interval_hours} hours") + print(f" Cutoff date: {service.cutoff_date.isoformat()}") + print(f" Meetings pending deletion: {len(expired)}") + + if expired: + print("\n Pending deletions:") + for meeting in expired[:10]: # Show first 10 + print(f" - {meeting.id}: {meeting.title} (ended: {meeting.ended_at})") + if len(expired) > 10: + print(f" ... and {len(expired) - 10} more") + + return 0 + + +def main() -> None: + """Entry point for retention CLI.""" + parser = argparse.ArgumentParser( + description="NoteFlow meeting retention management", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # cleanup command + cleanup_parser = subparsers.add_parser("cleanup", help="Run retention cleanup") + cleanup_parser.add_argument( + "--dry-run", + action="store_true", + help="Report what would be deleted without deleting", + ) + + # status command + subparsers.add_parser("status", help="Show retention status") + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + if args.command == "cleanup": + exit_code = asyncio.run(_run_cleanup(dry_run=args.dry_run)) + elif args.command == "status": + exit_code = asyncio.run(_show_status()) + else: + parser.print_help() + exit_code = 1 + + sys.exit(exit_code) + + +if __name__ == "__main__": + main() +```` + +## File: src/noteflow/client/components/_async_mixin.py +````python +"""Mixin for async operations with loading/error state management. + +Provides standardized handling for UI components that perform async operations, +including loading state, error handling, and UI thread dispatch. +""" + +from __future__ import annotations + +from collections.abc import Awaitable, Callable +from typing import TYPE_CHECKING, TypeVar + +if TYPE_CHECKING: + import flet as ft + + +T = TypeVar("T") + + +class AsyncOperationMixin[T]: + """Mixin providing standardized async operation handling. + + Manages loading state, error handling, and UI thread dispatch for + Flet components that perform async operations. + + Components using this mixin must have: + - `_page: ft.Page | None` attribute for UI updates + """ + + _page: ft.Page | None + + async def run_async_operation( + self, + operation: Callable[[], Awaitable[T]], + on_success: Callable[[T], None], + on_error: Callable[[str], None], + set_loading: Callable[[bool], None], + ) -> T | None: + """Run async operation with standardized state management. + + Handles loading state, error catching, and UI thread dispatch. + All callbacks are dispatched to the UI thread. + + Args: + operation: Async callable to execute. + on_success: Callback with result on success (called on UI thread). + on_error: Callback with error message on failure (called on UI thread). + set_loading: Callback to set loading state (called on UI thread). + + Returns: + Result of operation on success, None on failure. + """ + self._dispatch_ui(lambda: set_loading(True)) + try: + result = await operation() + # Capture result for closure + self._dispatch_ui(lambda r=result: on_success(r)) # type: ignore[misc] + return result + except Exception as e: + error_msg = str(e) + self._dispatch_ui(lambda msg=error_msg: on_error(msg)) # type: ignore[misc] + return None + finally: + self._dispatch_ui(lambda: set_loading(False)) + + def _dispatch_ui(self, callback: Callable[[], None]) -> None: + """Dispatch callback to UI thread. + + Safe to call even if page is None (no-op in that case). + + Args: + callback: Function to execute on UI thread. + """ + if not self._page: + return + + async def _runner() -> None: + callback() + + # Flet expects a coroutine function here; schedule it. + self._page.run_task(_runner) +```` + +## File: src/noteflow/client/components/annotation_display.py +````python +"""Annotation display component for meeting review. + +Display existing annotations during meeting review with type badges and clickable timestamps. +Reuses patterns from MeetingLibraryComponent (ListView) and SummaryPanelComponent (type badges). +""" + +from __future__ import annotations + +import logging +from collections.abc import Callable +from typing import TYPE_CHECKING + +import flet as ft + +# REUSE existing formatting utility +from noteflow.infrastructure.export._formatting import format_timestamp + +if TYPE_CHECKING: + from noteflow.client.state import AppState + from noteflow.grpc.client import AnnotationInfo + +logger = logging.getLogger(__name__) + +# Annotation type colors (reused pattern from summary_panel.py) +ANNOTATION_TYPE_COLORS: dict[str, str] = { + "action_item": ft.Colors.GREEN_400, + "decision": ft.Colors.BLUE_400, + "note": ft.Colors.GREY_400, + "risk": ft.Colors.ORANGE_400, +} + +ANNOTATION_TYPE_ICONS: dict[str, str] = { + "action_item": ft.Icons.CHECK_CIRCLE_OUTLINE, + "decision": ft.Icons.GAVEL, + "note": ft.Icons.NOTE, + "risk": ft.Icons.WARNING, +} + +ANNOTATION_TYPE_LABELS: dict[str, str] = { + "action_item": "Action", + "decision": "Decision", + "note": "Note", + "risk": "Risk", +} + + +class AnnotationDisplayComponent: + """Display existing annotations during meeting review. + + Shows annotations sorted by start_time with type badges and clickable timestamps. + Reuses ListView pattern from MeetingLibraryComponent. + """ + + def __init__( + self, + state: AppState, + on_annotation_seek: Callable[[float], None] | None = None, + ) -> None: + """Initialize annotation display. + + Args: + state: Centralized application state. + on_annotation_seek: Callback when annotation is clicked (seek to timestamp). + """ + self._state = state + self._on_annotation_seek = on_annotation_seek + + # UI elements + self._list_view: ft.ListView | None = None + self._header_text: ft.Text | None = None + self._container: ft.Container | None = None + + # State + self._annotations: list[AnnotationInfo] = [] + + def build(self) -> ft.Container: + """Build annotation display UI. + + Returns: + Container with annotation list. + """ + self._header_text = ft.Text( + "Annotations (0)", + size=14, + weight=ft.FontWeight.BOLD, + ) + + self._list_view = ft.ListView( + spacing=5, + padding=10, + height=150, + ) + + self._container = ft.Container( + content=ft.Column( + [ + self._header_text, + ft.Container( + content=self._list_view, + border=ft.border.all(1, ft.Colors.GREY_400), + border_radius=8, + ), + ], + spacing=5, + ), + visible=False, # Hidden until annotations loaded + ) + return self._container + + def load_annotations(self, annotations: list[AnnotationInfo]) -> None: + """Load and display annotations. + + Args: + annotations: List of annotations to display. + """ + # Sort by start_time + self._annotations = sorted(annotations, key=lambda a: a.start_time) + self._state.run_on_ui_thread(self._render_annotations) + + def clear(self) -> None: + """Clear all annotations.""" + self._annotations = [] + if self._list_view: + self._list_view.controls.clear() + if self._header_text: + self._header_text.value = "Annotations (0)" + if self._container: + self._container.visible = False + self._state.request_update() + + def _render_annotations(self) -> None: + """Render annotation list (UI thread only).""" + if not self._list_view or not self._header_text or not self._container: + return + + self._list_view.controls.clear() + + for annotation in self._annotations: + self._list_view.controls.append(self._create_annotation_row(annotation)) + + # Update header and visibility + count = len(self._annotations) + self._header_text.value = f"Annotations ({count})" + self._container.visible = count > 0 + + self._state.request_update() + + def _create_annotation_row(self, annotation: AnnotationInfo) -> ft.Container: + """Create a row for an annotation. + + Args: + annotation: Annotation to display. + + Returns: + Container with annotation details. + """ + # Get type styling + atype = annotation.annotation_type + color = ANNOTATION_TYPE_COLORS.get(atype, ft.Colors.GREY_400) + icon = ANNOTATION_TYPE_ICONS.get(atype, ft.Icons.NOTE) + label = ANNOTATION_TYPE_LABELS.get(atype, atype.title()) + + # Format timestamp + time_str = format_timestamp(annotation.start_time) + + # Type badge + badge = ft.Container( + content=ft.Row( + [ + ft.Icon(icon, size=12, color=color), + ft.Text(label, size=10, color=color, weight=ft.FontWeight.BOLD), + ], + spacing=2, + ), + bgcolor=f"{color}20", # 20% opacity background + padding=ft.padding.symmetric(horizontal=6, vertical=2), + border_radius=4, + ) + + # Annotation text (truncated if long) + text = annotation.text + display_text = f"{text[:80]}..." if len(text) > 80 else text + + row = ft.Row( + [ + badge, + ft.Text(time_str, size=11, color=ft.Colors.GREY_600, width=50), + ft.Text(display_text, size=12, expand=True), + ], + spacing=10, + ) + + return ft.Container( + content=row, + padding=8, + border_radius=4, + on_click=lambda e, a=annotation: self._on_annotation_click(a), + ink=True, + ) + + def _on_annotation_click(self, annotation: AnnotationInfo) -> None: + """Handle annotation row click. + + Args: + annotation: Clicked annotation. + """ + if self._on_annotation_seek: + self._on_annotation_seek(annotation.start_time) + logger.debug( + "Annotation seek: type=%s, time=%.2f", + annotation.annotation_type, + annotation.start_time, + ) +```` + +## File: src/noteflow/client/components/annotation_toolbar.py +````python +"""Annotation toolbar component for adding action items, decisions, and notes. + +Uses AnnotationInfo from grpc.client and NoteFlowClient.add_annotation(). +Does not recreate any types - imports and uses existing ones. +""" + +from __future__ import annotations + +import logging +from collections.abc import Callable +from typing import TYPE_CHECKING + +import flet as ft + +if TYPE_CHECKING: + from noteflow.client.state import AppState + from noteflow.grpc.client import NoteFlowClient + +logger = logging.getLogger(__name__) + + +class AnnotationToolbarComponent: + """Toolbar for adding annotations during recording or playback. + + Uses NoteFlowClient.add_annotation() to persist annotations. + """ + + def __init__( + self, + state: AppState, + get_client: Callable[[], NoteFlowClient | None], + ) -> None: + """Initialize annotation toolbar. + + Args: + state: Centralized application state. + get_client: Callable that returns current gRPC client or None. + """ + self._state = state + self._get_client = get_client + + # UI elements + self._action_btn: ft.ElevatedButton | None = None + self._decision_btn: ft.ElevatedButton | None = None + self._note_btn: ft.ElevatedButton | None = None + self._risk_btn: ft.ElevatedButton | None = None + self._row: ft.Row | None = None + + # Dialog elements + self._dialog: ft.AlertDialog | None = None + self._text_field: ft.TextField | None = None + self._current_annotation_type: str = "" + + def build(self) -> ft.Row: + """Build annotation toolbar UI. + + Returns: + Row containing annotation buttons. + """ + self._action_btn = ft.ElevatedButton( + "Action Item", + icon=ft.Icons.CHECK_CIRCLE_OUTLINE, + on_click=lambda e: self._show_annotation_dialog("action_item"), + disabled=True, + ) + self._decision_btn = ft.ElevatedButton( + "Decision", + icon=ft.Icons.GAVEL, + on_click=lambda e: self._show_annotation_dialog("decision"), + disabled=True, + ) + self._note_btn = ft.ElevatedButton( + "Note", + icon=ft.Icons.NOTE_ADD, + on_click=lambda e: self._show_annotation_dialog("note"), + disabled=True, + ) + self._risk_btn = ft.ElevatedButton( + "Risk", + icon=ft.Icons.WARNING_AMBER, + on_click=lambda e: self._show_annotation_dialog("risk"), + disabled=True, + ) + + self._row = ft.Row( + [self._action_btn, self._decision_btn, self._note_btn, self._risk_btn], + visible=False, + ) + return self._row + + def set_enabled(self, enabled: bool) -> None: + """Enable or disable annotation buttons. + + Args: + enabled: Whether buttons should be enabled. + """ + if self._action_btn: + self._action_btn.disabled = not enabled + if self._decision_btn: + self._decision_btn.disabled = not enabled + if self._note_btn: + self._note_btn.disabled = not enabled + if self._risk_btn: + self._risk_btn.disabled = not enabled + self._state.request_update() + + def set_visible(self, visible: bool) -> None: + """Set visibility of annotation toolbar. + + Args: + visible: Whether toolbar should be visible. + """ + if self._row: + self._row.visible = visible + self._state.request_update() + + def _show_annotation_dialog(self, annotation_type: str) -> None: + """Show dialog for entering annotation text. + + Args: + annotation_type: Type of annotation (action_item, decision, note). + """ + self._current_annotation_type = annotation_type + + # Format type for display + type_display = annotation_type.replace("_", " ").title() + + self._text_field = ft.TextField( + label=f"{type_display} Text", + multiline=True, + min_lines=2, + max_lines=4, + width=400, + autofocus=True, + ) + + self._dialog = ft.AlertDialog( + title=ft.Text(f"Add {type_display}"), + content=self._text_field, + actions=[ + ft.TextButton("Cancel", on_click=self._close_dialog), + ft.ElevatedButton("Add", on_click=self._submit_annotation), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + + # Show dialog + if self._state._page: + self._state._page.dialog = self._dialog + self._dialog.open = True + self._state.request_update() + + def _close_dialog(self, e: ft.ControlEvent | None = None) -> None: + """Close the annotation dialog.""" + if self._dialog: + self._dialog.open = False + self._state.request_update() + + def _submit_annotation(self, e: ft.ControlEvent) -> None: + """Submit the annotation to the server.""" + if not self._text_field: + return + + text = self._text_field.value or "" + if not text.strip(): + return + + self._close_dialog() + + # Get current timestamp + timestamp = self._get_current_timestamp() + + # Submit to server + client = self._get_client() + if not client: + logger.warning("No gRPC client available for annotation") + return + + meeting = self._state.current_meeting + if not meeting: + logger.warning("No current meeting for annotation") + return + + try: + if annotation := client.add_annotation( + meeting_id=meeting.id, + annotation_type=self._current_annotation_type, + text=text.strip(), + start_time=timestamp, + end_time=timestamp, # Point annotation + ): + self._state.annotations.append(annotation) + logger.info( + "Added annotation: %s at %.2f", self._current_annotation_type, timestamp + ) + else: + logger.error("Failed to add annotation") + except Exception as exc: + logger.error("Error adding annotation: %s", exc) + + def _get_current_timestamp(self) -> float: + """Get current timestamp for annotation. + + Returns timestamp from playback position (during playback) or + recording elapsed time (during recording). + + Returns: + Current timestamp in seconds. + """ + # During playback, use playback position + if self._state.playback_position > 0: + return self._state.playback_position + + # During recording, use elapsed seconds + return float(self._state.elapsed_seconds) +```` + +## File: src/noteflow/client/components/playback_controls.py +````python +"""Playback controls component with play/pause/stop and timeline. + +Uses SoundDevicePlayback from infrastructure.audio and format_timestamp from _formatting. +Does not recreate any types - imports and uses existing ones. +""" + +from __future__ import annotations + +import logging +from collections.abc import Callable +from typing import TYPE_CHECKING, Final + +import flet as ft + +from noteflow.client.components._thread_mixin import BackgroundWorkerMixin + +# REUSE existing types - do not recreate +from noteflow.infrastructure.audio import PlaybackState +from noteflow.infrastructure.export._formatting import format_timestamp + +if TYPE_CHECKING: + from noteflow.client.state import AppState + +logger = logging.getLogger(__name__) + +POSITION_POLL_INTERVAL: Final[float] = 0.1 # 100ms for smooth timeline updates + + +class PlaybackControlsComponent(BackgroundWorkerMixin): + """Audio playback controls with play/pause/stop and timeline. + + Uses SoundDevicePlayback from state and format_timestamp from _formatting. + """ + + def __init__( + self, + state: AppState, + on_position_change: Callable[[float], None] | None = None, + ) -> None: + """Initialize playback controls component. + + Args: + state: Centralized application state. + on_position_change: Callback when playback position changes. + """ + self._state = state + self._on_position_change = on_position_change + self._init_worker() + + # UI elements + self._play_btn: ft.IconButton | None = None + self._stop_btn: ft.IconButton | None = None + self._position_label: ft.Text | None = None + self._duration_label: ft.Text | None = None + self._timeline_slider: ft.Slider | None = None + self._row: ft.Row | None = None + + def build(self) -> ft.Row: + """Build playback controls UI. + + Returns: + Row containing playback buttons and timeline. + """ + self._play_btn = ft.IconButton( + icon=ft.Icons.PLAY_ARROW, + icon_color=ft.Colors.GREEN, + tooltip="Play", + on_click=self._on_play_click, + disabled=True, + ) + self._stop_btn = ft.IconButton( + icon=ft.Icons.STOP, + icon_color=ft.Colors.RED, + tooltip="Stop", + on_click=self._on_stop_click, + disabled=True, + ) + self._position_label = ft.Text("00:00", size=12, width=50) + self._duration_label = ft.Text("00:00", size=12, width=50) + self._timeline_slider = ft.Slider( + min=0, + max=100, + value=0, + expand=True, + on_change=self._on_slider_change, + disabled=True, + ) + + self._row = ft.Row( + [ + self._play_btn, + self._stop_btn, + self._position_label, + self._timeline_slider, + self._duration_label, + ], + visible=False, + ) + return self._row + + def set_visible(self, visible: bool) -> None: + """Set visibility of playback controls. + + Args: + visible: Whether controls should be visible. + """ + if self._row: + self._row.visible = visible + self._state.request_update() + + def load_audio(self) -> None: + """Load session audio buffer for playback.""" + buffer = self._state.session_audio_buffer + if not buffer: + logger.warning("No audio in session buffer") + return + + # Play through SoundDevicePlayback + self._state.playback.play(buffer) + self._state.playback.pause() # Load but don't start + + # Update UI state + duration = self._state.playback.total_duration + self._state.playback_position = 0.0 + + self._state.run_on_ui_thread(lambda: self._update_loaded_state(duration)) + + def _update_loaded_state(self, duration: float) -> None: + """Update UI after audio is loaded (UI thread only).""" + if self._play_btn: + self._play_btn.disabled = False + if self._stop_btn: + self._stop_btn.disabled = False + if self._timeline_slider: + self._timeline_slider.disabled = False + self._timeline_slider.max = max(duration, 0.1) + self._timeline_slider.value = 0 + if self._duration_label: + self._duration_label.value = format_timestamp(duration) + if self._position_label: + self._position_label.value = "00:00" + + self.set_visible(True) + self._state.request_update() + + def seek(self, position: float) -> None: + """Seek to a specific position. + + Args: + position: Position in seconds. + """ + if self._state.playback.seek(position): + self._state.playback_position = position + self._state.run_on_ui_thread(self._update_position_display) + + def _on_play_click(self, e: ft.ControlEvent) -> None: + """Handle play/pause button click.""" + playback = self._state.playback + + if playback.state == PlaybackState.PLAYING: + playback.pause() + self._stop_polling() + self._update_play_button(playing=False) + elif playback.state == PlaybackState.PAUSED: + playback.resume() + self._start_polling() + self._update_play_button(playing=True) + elif buffer := self._state.session_audio_buffer: + playback.play(buffer) + self._start_polling() + self._update_play_button(playing=True) + + def _on_stop_click(self, e: ft.ControlEvent) -> None: + """Handle stop button click.""" + self._stop_polling() + self._state.playback.stop() + self._state.playback_position = 0.0 + self._update_play_button(playing=False) + self._state.run_on_ui_thread(self._update_position_display) + + def _on_slider_change(self, e: ft.ControlEvent) -> None: + """Handle timeline slider change.""" + if self._timeline_slider: + position = float(self._timeline_slider.value or 0) + self.seek(position) + + def _update_play_button(self, *, playing: bool) -> None: + """Update play button icon based on state.""" + if self._play_btn: + if playing: + self._play_btn.icon = ft.Icons.PAUSE + self._play_btn.tooltip = "Pause" + else: + self._play_btn.icon = ft.Icons.PLAY_ARROW + self._play_btn.tooltip = "Play" + self._state.request_update() + + def _start_polling(self) -> None: + """Start position polling thread.""" + self._start_worker(self._poll_loop, "PlaybackPositionPoll") + + def _stop_polling(self) -> None: + """Stop position polling thread.""" + self._stop_worker() + + def _poll_loop(self) -> None: + """Background polling loop for position updates.""" + while self._should_run(): + playback = self._state.playback + + if playback.state == PlaybackState.PLAYING: + position = playback.current_position + self._state.playback_position = position + self._state.run_on_ui_thread(self._update_position_display) + + # Notify callback + if self._on_position_change: + try: + self._on_position_change(position) + except Exception as e: + logger.error("Position change callback error: %s", e) + + elif playback.state == PlaybackState.STOPPED: + # Playback finished - update UI and stop polling + self._state.run_on_ui_thread(self._on_playback_finished) + break + + self._wait_interval(POSITION_POLL_INTERVAL) + + def _update_position_display(self) -> None: + """Update position display elements (UI thread only).""" + position = self._state.playback_position + + if self._position_label: + self._position_label.value = format_timestamp(position) + + if self._timeline_slider and not self._timeline_slider.disabled: + # Only update if user isn't dragging + self._timeline_slider.value = position + + self._state.request_update() + + def _on_playback_finished(self) -> None: + """Handle playback completion (UI thread only).""" + self._update_play_button(playing=False) + self._state.playback_position = 0.0 + self._update_position_display() +```` + +## File: src/noteflow/client/components/recording_timer.py +````python +"""Recording timer component with background thread. + +Uses format_timestamp() from infrastructure/export/_formatting.py (not local implementation). +""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Final + +import flet as ft + +from noteflow.client.components._thread_mixin import BackgroundWorkerMixin + +# REUSE existing formatting utility - do not recreate +from noteflow.infrastructure.export._formatting import format_timestamp + +if TYPE_CHECKING: + from noteflow.client.state import AppState + +TIMER_UPDATE_INTERVAL: Final[float] = 1.0 + + +class RecordingTimerComponent(BackgroundWorkerMixin): + """Recording duration timer with background thread. + + Uses format_timestamp() from export._formatting (not local implementation). + """ + + def __init__(self, state: AppState) -> None: + """Initialize timer component. + + Args: + state: Centralized application state. + """ + self._state = state + self._init_worker() + + self._dot: ft.Icon | None = None + self._label: ft.Text | None = None + self._row: ft.Row | None = None + + def build(self) -> ft.Row: + """Build timer UI elements. + + Returns: + Row containing recording dot and time label. + """ + self._dot = ft.Icon( + ft.Icons.FIBER_MANUAL_RECORD, + color=ft.Colors.RED, + size=16, + ) + self._label = ft.Text( + "00:00", + size=20, + weight=ft.FontWeight.BOLD, + color=ft.Colors.RED, + ) + self._row = ft.Row( + controls=[self._dot, self._label], + visible=False, + ) + return self._row + + def start(self) -> None: + """Start the recording timer.""" + self._state.recording_start_time = time.time() + self._state.elapsed_seconds = 0 + + if self._row: + self._row.visible = True + if self._label: + self._label.value = "00:00" + + self._start_worker(self._timer_loop, "RecordingTimer") + self._state.request_update() + + def stop(self) -> None: + """Stop the recording timer.""" + self._stop_worker(timeout=2.0) + + if self._row: + self._row.visible = False + + self._state.recording_start_time = None + self._state.request_update() + + def _timer_loop(self) -> None: + """Background timer loop.""" + while self._should_run(): + if self._state.recording_start_time is not None: + self._state.elapsed_seconds = int(time.time() - self._state.recording_start_time) + self._state.run_on_ui_thread(self._update_display) + self._wait_interval(TIMER_UPDATE_INTERVAL) + + def _update_display(self) -> None: + """Update timer display (UI thread only).""" + if not self._label: + return + + # REUSE existing format_timestamp from _formatting.py + self._label.value = format_timestamp(float(self._state.elapsed_seconds)) + self._state.request_update() +```` + +## File: src/noteflow/client/components/summary_panel.py +````python +"""Summary panel component for evidence-linked meeting summaries. + +Uses existing patterns from MeetingLibraryComponent and TranscriptComponent. +Does not recreate any types - imports and uses existing domain entities. +""" + +from __future__ import annotations + +import logging +from collections.abc import Callable +from typing import TYPE_CHECKING +from uuid import UUID + +import flet as ft + +if TYPE_CHECKING: + from noteflow.application.services import SummarizationService + from noteflow.client.state import AppState + from noteflow.domain.entities import ActionItem, KeyPoint, Summary + +from noteflow.domain.value_objects import MeetingId + +logger = logging.getLogger(__name__) + +# Priority color mapping +PRIORITY_COLORS: dict[int, str] = { + 0: ft.Colors.GREY_400, # Unspecified + 1: ft.Colors.BLUE_400, # Low + 2: ft.Colors.ORANGE_400, # Medium + 3: ft.Colors.RED_400, # High +} + +PRIORITY_LABELS: dict[int, str] = { + 0: "—", + 1: "Low", + 2: "Med", + 3: "High", +} + + +class SummaryPanelComponent: + """Summary panel with evidence-linked key points and action items. + + Displays executive summary, key points with citations, and action items + with priority badges. Citation chips link back to transcript segments. + """ + + def __init__( + self, + state: AppState, + get_service: Callable[[], SummarizationService | None], + on_citation_click: Callable[[int], None] | None = None, + ) -> None: + """Initialize summary panel. + + Args: + state: Centralized application state. + get_service: Callable to get summarization service. + on_citation_click: Callback when citation chip is clicked (segment_id). + """ + self._state = state + self._get_service = get_service + self._on_citation_click = on_citation_click + + # Uncited drafts tracking + self._show_uncited: bool = False + self._original_summary: Summary | None = None + self._filtered_summary: Summary | None = None + self._uncited_key_points: int = 0 + self._uncited_action_items: int = 0 + + # UI references (set in build) + self._container: ft.Container | None = None + self._summary_text: ft.Text | None = None + self._key_points_list: ft.ListView | None = None + self._action_items_list: ft.ListView | None = None + self._generate_btn: ft.ElevatedButton | None = None + self._loading_indicator: ft.ProgressRing | None = None + self._error_text: ft.Text | None = None + self._uncited_toggle: ft.Switch | None = None + self._uncited_count_text: ft.Text | None = None + + def build(self) -> ft.Container: + """Build the summary panel UI. + + Returns: + Container with summary panel content. + """ + # Executive summary section + self._summary_text = ft.Text( + "", + size=14, + selectable=True, + ) + + # Key points list with citation chips + self._key_points_list = ft.ListView( + spacing=5, + height=150, + padding=5, + ) + + # Action items list with priority badges + self._action_items_list = ft.ListView( + spacing=5, + height=150, + padding=5, + ) + + # Generate button + self._generate_btn = ft.ElevatedButton( + "Generate Summary", + icon=ft.Icons.AUTO_AWESOME, + on_click=self._on_generate_click, + disabled=True, + ) + + # Loading/error states + self._loading_indicator = ft.ProgressRing( + visible=False, + width=20, + height=20, + ) + self._error_text = ft.Text( + "", + color=ft.Colors.RED_400, + visible=False, + size=12, + ) + + # Uncited drafts toggle + self._uncited_count_text = ft.Text( + "", + size=11, + color=ft.Colors.GREY_600, + visible=False, + ) + self._uncited_toggle = ft.Switch( + label="Show uncited", + value=False, + on_change=self._on_uncited_toggle, + visible=False, + scale=0.8, + ) + + summary_container = ft.Container( + content=self._summary_text, + padding=10, + bgcolor=ft.Colors.GREY_100, + border_radius=4, + ) + + self._container = ft.Container( + content=ft.Column( + [ + ft.Row( + [ + ft.Text("Summary", size=16, weight=ft.FontWeight.BOLD), + self._generate_btn, + self._loading_indicator, + ft.Container(expand=True), # Spacer + self._uncited_count_text, + self._uncited_toggle, + ], + alignment=ft.MainAxisAlignment.START, + spacing=10, + ), + self._error_text, + summary_container, + ft.Text("Key Points:", size=14, weight=ft.FontWeight.BOLD), + ft.Container( + content=self._key_points_list, + border=ft.border.all(1, ft.Colors.GREY_300), + border_radius=4, + ), + ft.Text("Action Items:", size=14, weight=ft.FontWeight.BOLD), + ft.Container( + content=self._action_items_list, + border=ft.border.all(1, ft.Colors.GREY_300), + border_radius=4, + ), + ], + spacing=10, + ), + visible=False, + ) + return self._container + + def set_visible(self, visible: bool) -> None: + """Set panel visibility. + + Args: + visible: Whether panel should be visible. + """ + if self._container: + self._container.visible = visible + self._state.request_update() + + def set_enabled(self, enabled: bool) -> None: + """Set generate button enabled state. + + Args: + enabled: Whether generate button should be enabled. + """ + if self._generate_btn: + self._generate_btn.disabled = not enabled + self._state.request_update() + + def _on_generate_click(self, e: ft.ControlEvent) -> None: + """Handle generate button click.""" + if self._state._page: + self._state._page.run_task(self._generate_summary) + + async def _generate_summary(self) -> None: + """Generate summary asynchronously.""" + service = self._get_service() + if not service: + self._show_error("Summarization service not available") + return + + if not self._state.current_meeting: + self._show_error("No meeting selected") + return + + if not self._state.transcript_segments: + self._show_error("No transcript segments to summarize") + return + + # Convert TranscriptSegment to domain Segment + segments = self._convert_segments() + + self._state.summary_loading = True + self._state.summary_error = None + self._update_loading_state() + + # Convert meeting id string to MeetingId + try: + meeting_uuid = UUID(str(self._state.current_meeting.id)) + except (AttributeError, ValueError) as exc: + self._show_error("Invalid meeting id") + logger.error("Invalid meeting id for summary: %s", exc) + self._state.summary_loading = False + self._state.run_on_ui_thread(self._update_loading_state) + return + + meeting_id = MeetingId(meeting_uuid) + + try: + result = await service.summarize( + meeting_id=meeting_id, + segments=segments, + ) + # Track original and filtered summaries for toggle + self._original_summary = result.result.summary + self._filtered_summary = result.filtered_summary + self._state.current_summary = result.summary + + # Calculate uncited counts + self._calculate_uncited_counts() + + self._state.run_on_ui_thread(self._render_summary) + + # Log provider info + logger.info( + "Summary generated by %s (fallback=%s)", + result.provider_used, + result.fallback_used, + ) + except Exception as exc: + logger.exception("Summarization failed") + error_msg = str(exc) + self._state.summary_error = error_msg + self._state.run_on_ui_thread(lambda msg=error_msg: self._show_error(msg)) + finally: + self._state.summary_loading = False + self._state.run_on_ui_thread(self._update_loading_state) + + def _convert_segments(self) -> list: + """Convert TranscriptSegment to domain Segment for service call. + + Returns: + List of domain Segment entities. + """ + from noteflow.domain.entities import Segment + + segments = [] + for ts in self._state.transcript_segments: + seg = Segment( + segment_id=ts.segment_id, + text=ts.text, + start_time=ts.start_time, + end_time=ts.end_time, + language=ts.language, + ) + segments.append(seg) + return segments + + def _update_loading_state(self) -> None: + """Update loading indicator visibility.""" + if self._loading_indicator: + self._loading_indicator.visible = self._state.summary_loading + if self._generate_btn: + self._generate_btn.disabled = self._state.summary_loading + self._state.request_update() + + def _show_error(self, message: str) -> None: + """Show error message. + + Args: + message: Error message to display. + """ + if self._error_text: + self._error_text.value = message + self._error_text.visible = True + self._state.request_update() + + def _clear_error(self) -> None: + """Clear error message.""" + if self._error_text: + self._error_text.value = "" + self._error_text.visible = False + self._state.request_update() + + def _render_summary(self) -> None: + """Render summary content (UI thread only).""" + summary = self._get_display_summary() + if not summary: + return + + self._clear_error() + + # Update uncited toggle visibility + self._update_uncited_ui() + + # Executive summary + if self._summary_text: + self._summary_text.value = summary.executive_summary or "No summary generated." + + # Key points + if self._key_points_list: + self._key_points_list.controls.clear() + for idx, kp in enumerate(summary.key_points): + self._key_points_list.controls.append(self._create_key_point_row(kp, idx)) + + # Action items + if self._action_items_list: + self._action_items_list.controls.clear() + for idx, ai in enumerate(summary.action_items): + self._action_items_list.controls.append(self._create_action_item_row(ai, idx)) + + self._state.request_update() + + def _create_key_point_row(self, kp: KeyPoint, index: int) -> ft.Container: + """Create a row for a key point. + + Args: + kp: Key point to display. + index: Index in the list. + + Returns: + Container with key point content. + """ + # Citation chips + citation_chips = ft.Row( + [self._create_citation_chip(sid) for sid in kp.segment_ids], + spacing=4, + ) + + # Evidence indicator + evidence_icon = ft.Icon( + ft.Icons.CHECK_CIRCLE if kp.has_evidence() else ft.Icons.HELP_OUTLINE, + size=16, + color=ft.Colors.GREEN_400 if kp.has_evidence() else ft.Colors.GREY_400, + ) + + row = ft.Row( + [ + ft.Text(f"{index + 1}.", size=12, color=ft.Colors.GREY_600, width=20), + evidence_icon, + ft.Text(kp.text, size=13, expand=True), + citation_chips, + ], + spacing=8, + vertical_alignment=ft.CrossAxisAlignment.START, + ) + + return ft.Container( + content=row, + padding=ft.padding.symmetric(horizontal=8, vertical=4), + border_radius=4, + ) + + def _create_action_item_row(self, ai: ActionItem, index: int) -> ft.Container: + """Create a row for an action item. + + Args: + ai: Action item to display. + index: Index in the list. + + Returns: + Container with action item content. + """ + # Priority badge + priority_badge = self._create_priority_badge(ai.priority) + + # Assignee + assignee_text = ft.Text( + ai.assignee if ai.is_assigned() else "Unassigned", + size=11, + color=ft.Colors.BLUE_700 if ai.is_assigned() else ft.Colors.GREY_500, + italic=not ai.is_assigned(), + ) + + # Citation chips + citation_chips = ft.Row( + [self._create_citation_chip(sid) for sid in ai.segment_ids], + spacing=4, + ) + + # Evidence indicator + evidence_icon = ft.Icon( + ft.Icons.CHECK_CIRCLE if ai.has_evidence() else ft.Icons.HELP_OUTLINE, + size=16, + color=ft.Colors.GREEN_400 if ai.has_evidence() else ft.Colors.GREY_400, + ) + + row = ft.Row( + [ + ft.Text(f"{index + 1}.", size=12, color=ft.Colors.GREY_600, width=20), + priority_badge, + evidence_icon, + ft.Column( + [ + ft.Text(ai.text, size=13), + assignee_text, + ], + spacing=2, + expand=True, + ), + citation_chips, + ], + spacing=8, + vertical_alignment=ft.CrossAxisAlignment.START, + ) + + return ft.Container( + content=row, + padding=ft.padding.symmetric(horizontal=8, vertical=4), + border_radius=4, + ) + + def _create_priority_badge(self, priority: int) -> ft.Container: + """Create priority indicator badge. + + Args: + priority: Priority level (0-3). + + Returns: + Container with priority badge. + """ + return ft.Container( + content=ft.Text( + PRIORITY_LABELS.get(priority, "—"), + size=10, + color=ft.Colors.WHITE, + ), + bgcolor=PRIORITY_COLORS.get(priority, ft.Colors.GREY_400), + border_radius=4, + padding=ft.padding.symmetric(horizontal=6, vertical=2), + width=35, + alignment=ft.alignment.center, + ) + + def _create_citation_chip(self, segment_id: int) -> ft.Container: + """Create clickable citation chip. + + Args: + segment_id: Segment ID to link to. + + Returns: + Container with citation chip. + """ + return ft.Container( + content=ft.Text( + f"[#{segment_id}]", + size=11, + color=ft.Colors.BLUE_700, + ), + bgcolor=ft.Colors.BLUE_50, + border_radius=4, + padding=ft.padding.symmetric(horizontal=6, vertical=2), + on_click=lambda _: self._handle_citation_click(segment_id), + ink=True, + ) + + def _handle_citation_click(self, segment_id: int) -> None: + """Handle citation chip click. + + Args: + segment_id: Segment ID that was clicked. + """ + if self._on_citation_click: + self._on_citation_click(segment_id) + + def _calculate_uncited_counts(self) -> None: + """Calculate number of uncited items filtered out.""" + if not self._original_summary or not self._filtered_summary: + self._uncited_key_points = 0 + self._uncited_action_items = 0 + return + + original_kp = len(self._original_summary.key_points) + filtered_kp = len(self._filtered_summary.key_points) + self._uncited_key_points = original_kp - filtered_kp + + original_ai = len(self._original_summary.action_items) + filtered_ai = len(self._filtered_summary.action_items) + self._uncited_action_items = original_ai - filtered_ai + + def _has_uncited_items(self) -> bool: + """Check if any uncited items exist.""" + return self._uncited_key_points > 0 or self._uncited_action_items > 0 + + def _on_uncited_toggle(self, e: ft.ControlEvent) -> None: + """Handle uncited drafts toggle change.""" + self._show_uncited = e.control.value + self._render_summary() + + def _update_uncited_ui(self) -> None: + """Update uncited toggle visibility and count text.""" + has_uncited = self._has_uncited_items() + + if self._uncited_toggle: + self._uncited_toggle.visible = has_uncited + + if self._uncited_count_text: + if has_uncited: + total_uncited = self._uncited_key_points + self._uncited_action_items + self._uncited_count_text.value = f"({total_uncited} hidden)" + self._uncited_count_text.visible = not self._show_uncited + else: + self._uncited_count_text.visible = False + + def _get_display_summary(self) -> Summary | None: + """Get summary to display based on toggle state. + + Returns: + Original summary if showing uncited, filtered otherwise. + """ + if self._show_uncited and self._original_summary: + return self._original_summary + return self._state.current_summary +```` + +## File: src/noteflow/config/__init__.py +````python +"""NoteFlow configuration module.""" + +from .constants import DEFAULT_GRPC_PORT, DEFAULT_SAMPLE_RATE, MAX_GRPC_MESSAGE_SIZE +from .settings import Settings, TriggerSettings, get_settings, get_trigger_settings + +__all__ = [ + "DEFAULT_GRPC_PORT", + "DEFAULT_SAMPLE_RATE", + "MAX_GRPC_MESSAGE_SIZE", + "Settings", + "TriggerSettings", + "get_settings", + "get_trigger_settings", +] +```` + +## File: src/noteflow/domain/entities/segment.py +````python +"""Segment entity for transcript segments.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from noteflow.domain.value_objects import MeetingId + + +@dataclass +class WordTiming: + """Word-level timing information within a segment.""" + + word: str + start_time: float + end_time: float + probability: float + + def __post_init__(self) -> None: + """Validate word timing.""" + if self.end_time < self.start_time: + raise ValueError( + f"end_time ({self.end_time}) must be >= start_time ({self.start_time})" + ) + if not 0.0 <= self.probability <= 1.0: + raise ValueError(f"probability must be between 0 and 1, got {self.probability}") + + +@dataclass +class Segment: + """Transcript segment entity. + + Represents a finalized segment of transcribed speech with optional + word-level timing information and language detection. + """ + + segment_id: int + text: str + start_time: float + end_time: float + meeting_id: MeetingId | None = None + words: list[WordTiming] = field(default_factory=list) + language: str = "en" + language_confidence: float = 0.0 + avg_logprob: float = 0.0 + no_speech_prob: float = 0.0 + embedding: list[float] | None = None + + # Speaker diarization (populated by diarization engine) + speaker_id: str | None = None + speaker_confidence: float = 0.0 + + # Database primary key (set after persistence) + db_id: int | None = None + + def __post_init__(self) -> None: + """Validate segment data.""" + if self.end_time < self.start_time: + raise ValueError( + f"end_time ({self.end_time}) must be >= start_time ({self.start_time})" + ) + if self.segment_id < 0: + raise ValueError(f"segment_id must be non-negative, got {self.segment_id}") + + @property + def duration(self) -> float: + """Segment duration in seconds.""" + return self.end_time - self.start_time + + @property + def word_count(self) -> int: + """Number of words in segment.""" + return len(self.words) if self.words else len(self.text.split()) + + def has_embedding(self) -> bool: + """Check if segment has a computed embedding.""" + return self.embedding is not None and len(self.embedding) > 0 +```` + +## File: src/noteflow/domain/ports/repositories.py +````python +"""Repository protocol interfaces for persistence.""" + +from __future__ import annotations + +from collections.abc import Sequence +from datetime import datetime +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from noteflow.domain.entities import Annotation, Meeting, Segment, Summary + from noteflow.domain.value_objects import AnnotationId, MeetingId, MeetingState + + +class MeetingRepository(Protocol): + """Repository protocol for Meeting aggregate operations.""" + + async def create(self, meeting: Meeting) -> Meeting: + """Persist a new meeting. + + Args: + meeting: Meeting to create. + + Returns: + Created meeting with any generated fields populated. + """ + ... + + async def get(self, meeting_id: MeetingId) -> Meeting | None: + """Retrieve a meeting by ID. + + Args: + meeting_id: Meeting identifier. + + Returns: + Meeting if found, None otherwise. + """ + ... + + async def update(self, meeting: Meeting) -> Meeting: + """Update an existing meeting. + + Args: + meeting: Meeting with updated fields. + + Returns: + Updated meeting. + + Raises: + ValueError: If meeting does not exist. + """ + ... + + async def delete(self, meeting_id: MeetingId) -> bool: + """Delete a meeting and all associated data. + + Args: + meeting_id: Meeting identifier. + + Returns: + True if deleted, False if not found. + """ + ... + + async def list_all( + self, + states: list[MeetingState] | None = None, + limit: int = 100, + offset: int = 0, + sort_desc: bool = True, + ) -> tuple[Sequence[Meeting], int]: + """List meetings with optional filtering. + + Args: + states: Optional list of states to filter by. + limit: Maximum number of meetings to return. + offset: Number of meetings to skip. + sort_desc: Sort by created_at descending if True. + + Returns: + Tuple of (meetings list, total count matching filter). + """ + ... + + async def count_by_state(self, state: MeetingState) -> int: + """Count meetings in a specific state. + + Args: + state: Meeting state to count. + + Returns: + Number of meetings in the specified state. + """ + ... + + async def find_older_than(self, cutoff: datetime) -> Sequence[Meeting]: + """Find completed meetings older than cutoff date. + + Args: + cutoff: Cutoff datetime; meetings ended before this are returned. + + Returns: + Sequence of meetings with ended_at before cutoff. + """ + ... + + +class SegmentRepository(Protocol): + """Repository protocol for Segment operations.""" + + async def add(self, meeting_id: MeetingId, segment: Segment) -> Segment: + """Add a segment to a meeting. + + Args: + meeting_id: Meeting identifier. + segment: Segment to add. + + Returns: + Added segment with db_id populated. + + Raises: + ValueError: If meeting does not exist. + """ + ... + + async def add_batch( + self, + meeting_id: MeetingId, + segments: Sequence[Segment], + ) -> Sequence[Segment]: + """Add multiple segments to a meeting in batch. + + Args: + meeting_id: Meeting identifier. + segments: Segments to add. + + Returns: + Added segments with db_ids populated. + + Raises: + ValueError: If meeting does not exist. + """ + ... + + async def get_by_meeting( + self, + meeting_id: MeetingId, + include_words: bool = True, + ) -> Sequence[Segment]: + """Get all segments for a meeting. + + Args: + meeting_id: Meeting identifier. + include_words: Include word-level timing. + + Returns: + List of segments ordered by segment_id. + """ + ... + + async def search_semantic( + self, + query_embedding: list[float], + limit: int = 10, + meeting_id: MeetingId | None = None, + ) -> Sequence[tuple[Segment, float]]: + """Search segments by semantic similarity. + + Args: + query_embedding: Query embedding vector. + limit: Maximum number of results. + meeting_id: Optional meeting to restrict search to. + + Returns: + List of (segment, similarity_score) tuples. + """ + ... + + async def update_embedding( + self, + segment_db_id: int, + embedding: list[float], + ) -> None: + """Update the embedding for a segment. + + Args: + segment_db_id: Segment database primary key. + embedding: New embedding vector. + """ + ... + + +class SummaryRepository(Protocol): + """Repository protocol for Summary operations.""" + + async def save(self, summary: Summary) -> Summary: + """Save or update a meeting summary. + + Args: + summary: Summary to save. + + Returns: + Saved summary with db_id populated. + """ + ... + + async def get_by_meeting(self, meeting_id: MeetingId) -> Summary | None: + """Get summary for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + Summary if exists, None otherwise. + """ + ... + + async def delete_by_meeting(self, meeting_id: MeetingId) -> bool: + """Delete summary for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + True if deleted, False if not found. + """ + ... + + +class AnnotationRepository(Protocol): + """Repository protocol for Annotation operations.""" + + async def add(self, annotation: Annotation) -> Annotation: + """Add an annotation to a meeting. + + Args: + annotation: Annotation to add. + + Returns: + Added annotation with db_id populated. + + Raises: + ValueError: If meeting does not exist. + """ + ... + + async def get(self, annotation_id: AnnotationId) -> Annotation | None: + """Retrieve an annotation by ID. + + Args: + annotation_id: Annotation identifier. + + Returns: + Annotation if found, None otherwise. + """ + ... + + async def get_by_meeting( + self, + meeting_id: MeetingId, + ) -> Sequence[Annotation]: + """Get all annotations for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + List of annotations ordered by start_time. + """ + ... + + async def get_by_time_range( + self, + meeting_id: MeetingId, + start_time: float, + end_time: float, + ) -> Sequence[Annotation]: + """Get annotations within a time range. + + Args: + meeting_id: Meeting identifier. + start_time: Start of time range in seconds. + end_time: End of time range in seconds. + + Returns: + List of annotations overlapping the time range. + """ + ... + + async def update(self, annotation: Annotation) -> Annotation: + """Update an existing annotation. + + Args: + annotation: Annotation with updated fields. + + Returns: + Updated annotation. + + Raises: + ValueError: If annotation does not exist. + """ + ... + + async def delete(self, annotation_id: AnnotationId) -> bool: + """Delete an annotation. + + Args: + annotation_id: Annotation identifier. + + Returns: + True if deleted, False if not found. + """ + ... +```` + +## File: src/noteflow/domain/triggers/entities.py +````python +"""Trigger domain entities and value objects. + +Define trigger signals, decisions, and actions for meeting detection. +""" + +from __future__ import annotations + +import time +from dataclasses import dataclass, field +from enum import Enum + + +class TriggerSource(Enum): + """Source of a trigger signal.""" + + AUDIO_ACTIVITY = "audio_activity" + FOREGROUND_APP = "foreground_app" + CALENDAR = "calendar" # Deferred - optional connector + + +class TriggerAction(Enum): + """Action determined by trigger evaluation.""" + + IGNORE = "ignore" # Confidence < 0.40 + NOTIFY = "notify" # Confidence 0.40-0.79 + AUTO_START = "auto_start" # Confidence >= 0.80 (if enabled) + + +@dataclass(frozen=True) +class TriggerSignal: + """A signal from a single trigger source. + + Attributes: + source: The source that generated this signal. + weight: Confidence contribution (0.0-1.0). + app_name: For foreground app signals, the detected app name. + timestamp: When the signal was generated (monotonic time). + """ + + source: TriggerSource + weight: float + app_name: str | None = None + timestamp: float = field(default_factory=time.monotonic) + + def __post_init__(self) -> None: + """Validate weight is in valid range.""" + if not 0.0 <= self.weight <= 1.0: + msg = f"Weight must be 0.0-1.0, got {self.weight}" + raise ValueError(msg) + + +@dataclass(frozen=True) +class TriggerDecision: + """Result of trigger evaluation. + + Attributes: + action: The determined action (ignore, notify, auto_start). + confidence: Total confidence score from all signals. + signals: The signals that contributed to this decision. + timestamp: When the decision was made (monotonic time). + """ + + action: TriggerAction + confidence: float + signals: tuple[TriggerSignal, ...] + timestamp: float = field(default_factory=time.monotonic) + + @property + def primary_signal(self) -> TriggerSignal | None: + """Get the signal with highest weight contribution.""" + return max(self.signals, key=lambda s: s.weight) if self.signals else None + + @property + def detected_app(self) -> str | None: + """Get the detected app name from any signal if present.""" + return next((signal.app_name for signal in self.signals if signal.app_name), None) +```` + +## File: src/noteflow/domain/value_objects.py +````python +"""Domain value objects for NoteFlow.""" + +from __future__ import annotations + +from enum import Enum, IntEnum +from typing import NewType +from uuid import UUID + +# Type-safe identifiers +MeetingId = NewType("MeetingId", UUID) +AnnotationId = NewType("AnnotationId", UUID) + + +class AnnotationType(Enum): + """User annotation type. + + Used to categorize user-created annotations during recording. + Distinct from LLM-extracted ActionItem/KeyPoint in summaries. + """ + + ACTION_ITEM = "action_item" + DECISION = "decision" + NOTE = "note" + RISK = "risk" + + +class MeetingState(IntEnum): + """Meeting lifecycle state. + + State transitions: + CREATED -> RECORDING -> STOPPING -> STOPPED -> COMPLETED + Any state -> ERROR (on failure) + + The STOPPING state allows graceful shutdown with audio flush operations. + """ + + UNSPECIFIED = 0 + CREATED = 1 + RECORDING = 2 + STOPPED = 3 + COMPLETED = 4 + ERROR = 5 + STOPPING = 6 # Intermediate state for graceful shutdown + + @classmethod + def from_int(cls, value: int) -> MeetingState: + """Convert integer to MeetingState. + + Args: + value: Integer value. + + Returns: + Corresponding MeetingState. + + Raises: + ValueError: If value is not a valid state. + """ + try: + return cls(value) + except ValueError as e: + raise ValueError(f"Invalid meeting state: {value}") from e + + def can_transition_to(self, target: MeetingState) -> bool: + """Check if transition to target state is valid. + + Args: + target: Target state. + + Returns: + True if transition is valid. + """ + valid_transitions: dict[MeetingState, set[MeetingState]] = { + MeetingState.UNSPECIFIED: {MeetingState.CREATED}, + MeetingState.CREATED: {MeetingState.RECORDING, MeetingState.ERROR}, + MeetingState.RECORDING: {MeetingState.STOPPING, MeetingState.ERROR}, + MeetingState.STOPPING: {MeetingState.STOPPED, MeetingState.ERROR}, + MeetingState.STOPPED: {MeetingState.COMPLETED, MeetingState.ERROR}, + MeetingState.COMPLETED: {MeetingState.ERROR}, + MeetingState.ERROR: set(), # Terminal state + } + return target in valid_transitions.get(self, set()) +```` + +## File: src/noteflow/grpc/proto/noteflow_pb2.pyi +```` +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from collections.abc import Iterable as _Iterable, Mapping as _Mapping +from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class UpdateType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + UPDATE_TYPE_UNSPECIFIED: _ClassVar[UpdateType] + UPDATE_TYPE_PARTIAL: _ClassVar[UpdateType] + UPDATE_TYPE_FINAL: _ClassVar[UpdateType] + UPDATE_TYPE_VAD_START: _ClassVar[UpdateType] + UPDATE_TYPE_VAD_END: _ClassVar[UpdateType] + +class MeetingState(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + MEETING_STATE_UNSPECIFIED: _ClassVar[MeetingState] + MEETING_STATE_CREATED: _ClassVar[MeetingState] + MEETING_STATE_RECORDING: _ClassVar[MeetingState] + MEETING_STATE_STOPPED: _ClassVar[MeetingState] + MEETING_STATE_COMPLETED: _ClassVar[MeetingState] + MEETING_STATE_ERROR: _ClassVar[MeetingState] + +class SortOrder(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + SORT_ORDER_UNSPECIFIED: _ClassVar[SortOrder] + SORT_ORDER_CREATED_DESC: _ClassVar[SortOrder] + SORT_ORDER_CREATED_ASC: _ClassVar[SortOrder] + +class Priority(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + PRIORITY_UNSPECIFIED: _ClassVar[Priority] + PRIORITY_LOW: _ClassVar[Priority] + PRIORITY_MEDIUM: _ClassVar[Priority] + PRIORITY_HIGH: _ClassVar[Priority] + +class AnnotationType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + ANNOTATION_TYPE_UNSPECIFIED: _ClassVar[AnnotationType] + ANNOTATION_TYPE_ACTION_ITEM: _ClassVar[AnnotationType] + ANNOTATION_TYPE_DECISION: _ClassVar[AnnotationType] + ANNOTATION_TYPE_NOTE: _ClassVar[AnnotationType] + ANNOTATION_TYPE_RISK: _ClassVar[AnnotationType] + +class ExportFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + EXPORT_FORMAT_UNSPECIFIED: _ClassVar[ExportFormat] + EXPORT_FORMAT_MARKDOWN: _ClassVar[ExportFormat] + EXPORT_FORMAT_HTML: _ClassVar[ExportFormat] + +class JobStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + JOB_STATUS_UNSPECIFIED: _ClassVar[JobStatus] + JOB_STATUS_QUEUED: _ClassVar[JobStatus] + JOB_STATUS_RUNNING: _ClassVar[JobStatus] + JOB_STATUS_COMPLETED: _ClassVar[JobStatus] + JOB_STATUS_FAILED: _ClassVar[JobStatus] +UPDATE_TYPE_UNSPECIFIED: UpdateType +UPDATE_TYPE_PARTIAL: UpdateType +UPDATE_TYPE_FINAL: UpdateType +UPDATE_TYPE_VAD_START: UpdateType +UPDATE_TYPE_VAD_END: UpdateType +MEETING_STATE_UNSPECIFIED: MeetingState +MEETING_STATE_CREATED: MeetingState +MEETING_STATE_RECORDING: MeetingState +MEETING_STATE_STOPPED: MeetingState +MEETING_STATE_COMPLETED: MeetingState +MEETING_STATE_ERROR: MeetingState +SORT_ORDER_UNSPECIFIED: SortOrder +SORT_ORDER_CREATED_DESC: SortOrder +SORT_ORDER_CREATED_ASC: SortOrder +PRIORITY_UNSPECIFIED: Priority +PRIORITY_LOW: Priority +PRIORITY_MEDIUM: Priority +PRIORITY_HIGH: Priority +ANNOTATION_TYPE_UNSPECIFIED: AnnotationType +ANNOTATION_TYPE_ACTION_ITEM: AnnotationType +ANNOTATION_TYPE_DECISION: AnnotationType +ANNOTATION_TYPE_NOTE: AnnotationType +ANNOTATION_TYPE_RISK: AnnotationType +EXPORT_FORMAT_UNSPECIFIED: ExportFormat +EXPORT_FORMAT_MARKDOWN: ExportFormat +EXPORT_FORMAT_HTML: ExportFormat +JOB_STATUS_UNSPECIFIED: JobStatus +JOB_STATUS_QUEUED: JobStatus +JOB_STATUS_RUNNING: JobStatus +JOB_STATUS_COMPLETED: JobStatus +JOB_STATUS_FAILED: JobStatus + +class AudioChunk(_message.Message): + __slots__ = ("meeting_id", "audio_data", "timestamp", "sample_rate", "channels") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + AUDIO_DATA_FIELD_NUMBER: _ClassVar[int] + TIMESTAMP_FIELD_NUMBER: _ClassVar[int] + SAMPLE_RATE_FIELD_NUMBER: _ClassVar[int] + CHANNELS_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + audio_data: bytes + timestamp: float + sample_rate: int + channels: int + def __init__(self, meeting_id: _Optional[str] = ..., audio_data: _Optional[bytes] = ..., timestamp: _Optional[float] = ..., sample_rate: _Optional[int] = ..., channels: _Optional[int] = ...) -> None: ... + +class TranscriptUpdate(_message.Message): + __slots__ = ("meeting_id", "update_type", "partial_text", "segment", "server_timestamp") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + UPDATE_TYPE_FIELD_NUMBER: _ClassVar[int] + PARTIAL_TEXT_FIELD_NUMBER: _ClassVar[int] + SEGMENT_FIELD_NUMBER: _ClassVar[int] + SERVER_TIMESTAMP_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + update_type: UpdateType + partial_text: str + segment: FinalSegment + server_timestamp: float + def __init__(self, meeting_id: _Optional[str] = ..., update_type: _Optional[_Union[UpdateType, str]] = ..., partial_text: _Optional[str] = ..., segment: _Optional[_Union[FinalSegment, _Mapping]] = ..., server_timestamp: _Optional[float] = ...) -> None: ... + +class FinalSegment(_message.Message): + __slots__ = ("segment_id", "text", "start_time", "end_time", "words", "language", "language_confidence", "avg_logprob", "no_speech_prob", "speaker_id", "speaker_confidence") + SEGMENT_ID_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + WORDS_FIELD_NUMBER: _ClassVar[int] + LANGUAGE_FIELD_NUMBER: _ClassVar[int] + LANGUAGE_CONFIDENCE_FIELD_NUMBER: _ClassVar[int] + AVG_LOGPROB_FIELD_NUMBER: _ClassVar[int] + NO_SPEECH_PROB_FIELD_NUMBER: _ClassVar[int] + SPEAKER_ID_FIELD_NUMBER: _ClassVar[int] + SPEAKER_CONFIDENCE_FIELD_NUMBER: _ClassVar[int] + segment_id: int + text: str + start_time: float + end_time: float + words: _containers.RepeatedCompositeFieldContainer[WordTiming] + language: str + language_confidence: float + avg_logprob: float + no_speech_prob: float + speaker_id: str + speaker_confidence: float + def __init__(self, segment_id: _Optional[int] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., words: _Optional[_Iterable[_Union[WordTiming, _Mapping]]] = ..., language: _Optional[str] = ..., language_confidence: _Optional[float] = ..., avg_logprob: _Optional[float] = ..., no_speech_prob: _Optional[float] = ..., speaker_id: _Optional[str] = ..., speaker_confidence: _Optional[float] = ...) -> None: ... + +class WordTiming(_message.Message): + __slots__ = ("word", "start_time", "end_time", "probability") + WORD_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + PROBABILITY_FIELD_NUMBER: _ClassVar[int] + word: str + start_time: float + end_time: float + probability: float + def __init__(self, word: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., probability: _Optional[float] = ...) -> None: ... + +class Meeting(_message.Message): + __slots__ = ("id", "title", "state", "created_at", "started_at", "ended_at", "duration_seconds", "segments", "summary", "metadata") + class MetadataEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + ID_FIELD_NUMBER: _ClassVar[int] + TITLE_FIELD_NUMBER: _ClassVar[int] + STATE_FIELD_NUMBER: _ClassVar[int] + CREATED_AT_FIELD_NUMBER: _ClassVar[int] + STARTED_AT_FIELD_NUMBER: _ClassVar[int] + ENDED_AT_FIELD_NUMBER: _ClassVar[int] + DURATION_SECONDS_FIELD_NUMBER: _ClassVar[int] + SEGMENTS_FIELD_NUMBER: _ClassVar[int] + SUMMARY_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] + id: str + title: str + state: MeetingState + created_at: float + started_at: float + ended_at: float + duration_seconds: float + segments: _containers.RepeatedCompositeFieldContainer[FinalSegment] + summary: Summary + metadata: _containers.ScalarMap[str, str] + def __init__(self, id: _Optional[str] = ..., title: _Optional[str] = ..., state: _Optional[_Union[MeetingState, str]] = ..., created_at: _Optional[float] = ..., started_at: _Optional[float] = ..., ended_at: _Optional[float] = ..., duration_seconds: _Optional[float] = ..., segments: _Optional[_Iterable[_Union[FinalSegment, _Mapping]]] = ..., summary: _Optional[_Union[Summary, _Mapping]] = ..., metadata: _Optional[_Mapping[str, str]] = ...) -> None: ... + +class CreateMeetingRequest(_message.Message): + __slots__ = ("title", "metadata") + class MetadataEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + TITLE_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] + title: str + metadata: _containers.ScalarMap[str, str] + def __init__(self, title: _Optional[str] = ..., metadata: _Optional[_Mapping[str, str]] = ...) -> None: ... + +class StopMeetingRequest(_message.Message): + __slots__ = ("meeting_id",) + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + def __init__(self, meeting_id: _Optional[str] = ...) -> None: ... + +class ListMeetingsRequest(_message.Message): + __slots__ = ("states", "limit", "offset", "sort_order") + STATES_FIELD_NUMBER: _ClassVar[int] + LIMIT_FIELD_NUMBER: _ClassVar[int] + OFFSET_FIELD_NUMBER: _ClassVar[int] + SORT_ORDER_FIELD_NUMBER: _ClassVar[int] + states: _containers.RepeatedScalarFieldContainer[MeetingState] + limit: int + offset: int + sort_order: SortOrder + def __init__(self, states: _Optional[_Iterable[_Union[MeetingState, str]]] = ..., limit: _Optional[int] = ..., offset: _Optional[int] = ..., sort_order: _Optional[_Union[SortOrder, str]] = ...) -> None: ... + +class ListMeetingsResponse(_message.Message): + __slots__ = ("meetings", "total_count") + MEETINGS_FIELD_NUMBER: _ClassVar[int] + TOTAL_COUNT_FIELD_NUMBER: _ClassVar[int] + meetings: _containers.RepeatedCompositeFieldContainer[Meeting] + total_count: int + def __init__(self, meetings: _Optional[_Iterable[_Union[Meeting, _Mapping]]] = ..., total_count: _Optional[int] = ...) -> None: ... + +class GetMeetingRequest(_message.Message): + __slots__ = ("meeting_id", "include_segments", "include_summary") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + INCLUDE_SEGMENTS_FIELD_NUMBER: _ClassVar[int] + INCLUDE_SUMMARY_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + include_segments: bool + include_summary: bool + def __init__(self, meeting_id: _Optional[str] = ..., include_segments: bool = ..., include_summary: bool = ...) -> None: ... + +class DeleteMeetingRequest(_message.Message): + __slots__ = ("meeting_id",) + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + def __init__(self, meeting_id: _Optional[str] = ...) -> None: ... + +class DeleteMeetingResponse(_message.Message): + __slots__ = ("success",) + SUCCESS_FIELD_NUMBER: _ClassVar[int] + success: bool + def __init__(self, success: bool = ...) -> None: ... + +class Summary(_message.Message): + __slots__ = ("meeting_id", "executive_summary", "key_points", "action_items", "generated_at", "model_version") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + EXECUTIVE_SUMMARY_FIELD_NUMBER: _ClassVar[int] + KEY_POINTS_FIELD_NUMBER: _ClassVar[int] + ACTION_ITEMS_FIELD_NUMBER: _ClassVar[int] + GENERATED_AT_FIELD_NUMBER: _ClassVar[int] + MODEL_VERSION_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + executive_summary: str + key_points: _containers.RepeatedCompositeFieldContainer[KeyPoint] + action_items: _containers.RepeatedCompositeFieldContainer[ActionItem] + generated_at: float + model_version: str + def __init__(self, meeting_id: _Optional[str] = ..., executive_summary: _Optional[str] = ..., key_points: _Optional[_Iterable[_Union[KeyPoint, _Mapping]]] = ..., action_items: _Optional[_Iterable[_Union[ActionItem, _Mapping]]] = ..., generated_at: _Optional[float] = ..., model_version: _Optional[str] = ...) -> None: ... + +class KeyPoint(_message.Message): + __slots__ = ("text", "segment_ids", "start_time", "end_time") + TEXT_FIELD_NUMBER: _ClassVar[int] + SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + text: str + segment_ids: _containers.RepeatedScalarFieldContainer[int] + start_time: float + end_time: float + def __init__(self, text: _Optional[str] = ..., segment_ids: _Optional[_Iterable[int]] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ...) -> None: ... + +class ActionItem(_message.Message): + __slots__ = ("text", "assignee", "due_date", "priority", "segment_ids") + TEXT_FIELD_NUMBER: _ClassVar[int] + ASSIGNEE_FIELD_NUMBER: _ClassVar[int] + DUE_DATE_FIELD_NUMBER: _ClassVar[int] + PRIORITY_FIELD_NUMBER: _ClassVar[int] + SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int] + text: str + assignee: str + due_date: float + priority: Priority + segment_ids: _containers.RepeatedScalarFieldContainer[int] + def __init__(self, text: _Optional[str] = ..., assignee: _Optional[str] = ..., due_date: _Optional[float] = ..., priority: _Optional[_Union[Priority, str]] = ..., segment_ids: _Optional[_Iterable[int]] = ...) -> None: ... + +class GenerateSummaryRequest(_message.Message): + __slots__ = ("meeting_id", "force_regenerate") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + FORCE_REGENERATE_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + force_regenerate: bool + def __init__(self, meeting_id: _Optional[str] = ..., force_regenerate: bool = ...) -> None: ... + +class ServerInfoRequest(_message.Message): + __slots__ = () + def __init__(self) -> None: ... + +class ServerInfo(_message.Message): + __slots__ = ("version", "asr_model", "asr_ready", "supported_sample_rates", "max_chunk_size", "uptime_seconds", "active_meetings", "diarization_enabled", "diarization_ready") + VERSION_FIELD_NUMBER: _ClassVar[int] + ASR_MODEL_FIELD_NUMBER: _ClassVar[int] + ASR_READY_FIELD_NUMBER: _ClassVar[int] + SUPPORTED_SAMPLE_RATES_FIELD_NUMBER: _ClassVar[int] + MAX_CHUNK_SIZE_FIELD_NUMBER: _ClassVar[int] + UPTIME_SECONDS_FIELD_NUMBER: _ClassVar[int] + ACTIVE_MEETINGS_FIELD_NUMBER: _ClassVar[int] + DIARIZATION_ENABLED_FIELD_NUMBER: _ClassVar[int] + DIARIZATION_READY_FIELD_NUMBER: _ClassVar[int] + version: str + asr_model: str + asr_ready: bool + supported_sample_rates: _containers.RepeatedScalarFieldContainer[int] + max_chunk_size: int + uptime_seconds: float + active_meetings: int + diarization_enabled: bool + diarization_ready: bool + def __init__(self, version: _Optional[str] = ..., asr_model: _Optional[str] = ..., asr_ready: bool = ..., supported_sample_rates: _Optional[_Iterable[int]] = ..., max_chunk_size: _Optional[int] = ..., uptime_seconds: _Optional[float] = ..., active_meetings: _Optional[int] = ..., diarization_enabled: bool = ..., diarization_ready: bool = ...) -> None: ... + +class Annotation(_message.Message): + __slots__ = ("id", "meeting_id", "annotation_type", "text", "start_time", "end_time", "segment_ids", "created_at") + ID_FIELD_NUMBER: _ClassVar[int] + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int] + CREATED_AT_FIELD_NUMBER: _ClassVar[int] + id: str + meeting_id: str + annotation_type: AnnotationType + text: str + start_time: float + end_time: float + segment_ids: _containers.RepeatedScalarFieldContainer[int] + created_at: float + def __init__(self, id: _Optional[str] = ..., meeting_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ..., created_at: _Optional[float] = ...) -> None: ... + +class AddAnnotationRequest(_message.Message): + __slots__ = ("meeting_id", "annotation_type", "text", "start_time", "end_time", "segment_ids") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + annotation_type: AnnotationType + text: str + start_time: float + end_time: float + segment_ids: _containers.RepeatedScalarFieldContainer[int] + def __init__(self, meeting_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ...) -> None: ... + +class GetAnnotationRequest(_message.Message): + __slots__ = ("annotation_id",) + ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int] + annotation_id: str + def __init__(self, annotation_id: _Optional[str] = ...) -> None: ... + +class ListAnnotationsRequest(_message.Message): + __slots__ = ("meeting_id", "start_time", "end_time") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + start_time: float + end_time: float + def __init__(self, meeting_id: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ...) -> None: ... + +class ListAnnotationsResponse(_message.Message): + __slots__ = ("annotations",) + ANNOTATIONS_FIELD_NUMBER: _ClassVar[int] + annotations: _containers.RepeatedCompositeFieldContainer[Annotation] + def __init__(self, annotations: _Optional[_Iterable[_Union[Annotation, _Mapping]]] = ...) -> None: ... + +class UpdateAnnotationRequest(_message.Message): + __slots__ = ("annotation_id", "annotation_type", "text", "start_time", "end_time", "segment_ids") + ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int] + ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int] + annotation_id: str + annotation_type: AnnotationType + text: str + start_time: float + end_time: float + segment_ids: _containers.RepeatedScalarFieldContainer[int] + def __init__(self, annotation_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ...) -> None: ... + +class DeleteAnnotationRequest(_message.Message): + __slots__ = ("annotation_id",) + ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int] + annotation_id: str + def __init__(self, annotation_id: _Optional[str] = ...) -> None: ... + +class DeleteAnnotationResponse(_message.Message): + __slots__ = ("success",) + SUCCESS_FIELD_NUMBER: _ClassVar[int] + success: bool + def __init__(self, success: bool = ...) -> None: ... + +class ExportTranscriptRequest(_message.Message): + __slots__ = ("meeting_id", "format") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + FORMAT_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + format: ExportFormat + def __init__(self, meeting_id: _Optional[str] = ..., format: _Optional[_Union[ExportFormat, str]] = ...) -> None: ... + +class ExportTranscriptResponse(_message.Message): + __slots__ = ("content", "format_name", "file_extension") + CONTENT_FIELD_NUMBER: _ClassVar[int] + FORMAT_NAME_FIELD_NUMBER: _ClassVar[int] + FILE_EXTENSION_FIELD_NUMBER: _ClassVar[int] + content: str + format_name: str + file_extension: str + def __init__(self, content: _Optional[str] = ..., format_name: _Optional[str] = ..., file_extension: _Optional[str] = ...) -> None: ... + +class RefineSpeakerDiarizationRequest(_message.Message): + __slots__ = ("meeting_id", "num_speakers") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + NUM_SPEAKERS_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + num_speakers: int + def __init__(self, meeting_id: _Optional[str] = ..., num_speakers: _Optional[int] = ...) -> None: ... + +class RefineSpeakerDiarizationResponse(_message.Message): + __slots__ = ("segments_updated", "speaker_ids", "error_message", "job_id", "status") + SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int] + SPEAKER_IDS_FIELD_NUMBER: _ClassVar[int] + ERROR_MESSAGE_FIELD_NUMBER: _ClassVar[int] + JOB_ID_FIELD_NUMBER: _ClassVar[int] + STATUS_FIELD_NUMBER: _ClassVar[int] + segments_updated: int + speaker_ids: _containers.RepeatedScalarFieldContainer[str] + error_message: str + job_id: str + status: JobStatus + def __init__(self, segments_updated: _Optional[int] = ..., speaker_ids: _Optional[_Iterable[str]] = ..., error_message: _Optional[str] = ..., job_id: _Optional[str] = ..., status: _Optional[JobStatus] = ...) -> None: ... + +class RenameSpeakerRequest(_message.Message): + __slots__ = ("meeting_id", "old_speaker_id", "new_speaker_name") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + OLD_SPEAKER_ID_FIELD_NUMBER: _ClassVar[int] + NEW_SPEAKER_NAME_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + old_speaker_id: str + new_speaker_name: str + def __init__(self, meeting_id: _Optional[str] = ..., old_speaker_id: _Optional[str] = ..., new_speaker_name: _Optional[str] = ...) -> None: ... + +class RenameSpeakerResponse(_message.Message): + __slots__ = ("segments_updated", "success") + SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int] + SUCCESS_FIELD_NUMBER: _ClassVar[int] + segments_updated: int + success: bool + def __init__(self, segments_updated: _Optional[int] = ..., success: bool = ...) -> None: ... + +class GetDiarizationJobStatusRequest(_message.Message): + __slots__ = ("job_id",) + JOB_ID_FIELD_NUMBER: _ClassVar[int] + job_id: str + def __init__(self, job_id: _Optional[str] = ...) -> None: ... + +class DiarizationJobStatus(_message.Message): + __slots__ = ("job_id", "status", "segments_updated", "speaker_ids", "error_message") + JOB_ID_FIELD_NUMBER: _ClassVar[int] + STATUS_FIELD_NUMBER: _ClassVar[int] + SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int] + SPEAKER_IDS_FIELD_NUMBER: _ClassVar[int] + ERROR_MESSAGE_FIELD_NUMBER: _ClassVar[int] + job_id: str + status: JobStatus + segments_updated: int + speaker_ids: _containers.RepeatedScalarFieldContainer[str] + error_message: str + def __init__(self, job_id: _Optional[str] = ..., status: _Optional[JobStatus] = ..., segments_updated: _Optional[int] = ..., speaker_ids: _Optional[_Iterable[str]] = ..., error_message: _Optional[str] = ...) -> None: ... +```` + +## File: src/noteflow/grpc/meeting_store.py +````python +"""In-memory meeting storage for the NoteFlow gRPC server. + +Provides thread-safe in-memory storage using domain entities directly. +Used as fallback when no database is configured. +""" + +from __future__ import annotations + +import threading +from typing import TYPE_CHECKING + +from noteflow.domain.entities import Meeting, Segment, Summary +from noteflow.domain.value_objects import MeetingState + +if TYPE_CHECKING: + from collections.abc import Sequence + from datetime import datetime + + +class MeetingStore: + """Thread-safe in-memory meeting storage using domain entities.""" + + def __init__(self) -> None: + """Initialize the store.""" + self._meetings: dict[str, Meeting] = {} + self._lock = threading.RLock() + + def create( + self, + title: str = "", + metadata: dict[str, str] | None = None, + ) -> Meeting: + """Create a new meeting. + + Args: + title: Optional meeting title. + metadata: Optional metadata. + + Returns: + Created meeting. + """ + meeting = Meeting.create(title=title or "Untitled Meeting", metadata=metadata or {}) + + with self._lock: + self._meetings[str(meeting.id)] = meeting + + return meeting + + def get(self, meeting_id: str) -> Meeting | None: + """Get a meeting by ID. + + Args: + meeting_id: Meeting ID string. + + Returns: + Meeting or None if not found. + """ + with self._lock: + return self._meetings.get(meeting_id) + + def list_all( + self, + states: Sequence[MeetingState] | None = None, + limit: int = 100, + offset: int = 0, + sort_desc: bool = True, + ) -> tuple[list[Meeting], int]: + """List meetings with optional filtering. + + Args: + states: Optional list of states to filter by. + limit: Maximum number of meetings to return. + offset: Number of meetings to skip. + sort_desc: Sort by created_at descending if True. + + Returns: + Tuple of (meetings list, total count). + """ + with self._lock: + meetings = list(self._meetings.values()) + + # Filter by state + if states: + state_set = set(states) + meetings = [m for m in meetings if m.state in state_set] + + total = len(meetings) + + # Sort + meetings.sort(key=lambda m: m.created_at, reverse=sort_desc) + + # Paginate + meetings = meetings[offset : offset + limit] + + return meetings, total + + def update(self, meeting: Meeting) -> Meeting: + """Update a meeting in the store. + + Args: + meeting: Meeting with updated fields. + + Returns: + Updated meeting. + """ + with self._lock: + self._meetings[str(meeting.id)] = meeting + return meeting + + def add_segment(self, meeting_id: str, segment: Segment) -> Meeting | None: + """Add a segment to a meeting. + + Args: + meeting_id: Meeting ID. + segment: Segment to add. + + Returns: + Updated meeting or None if not found. + """ + with self._lock: + meeting = self._meetings.get(meeting_id) + if meeting is None: + return None + + meeting.add_segment(segment) + return meeting + + def set_summary(self, meeting_id: str, summary: Summary) -> Meeting | None: + """Set meeting summary. + + Args: + meeting_id: Meeting ID. + summary: Summary to set. + + Returns: + Updated meeting or None if not found. + """ + with self._lock: + meeting = self._meetings.get(meeting_id) + if meeting is None: + return None + + meeting.summary = summary + return meeting + + def update_state(self, meeting_id: str, state: MeetingState) -> bool: + """Atomically update meeting state. + + Args: + meeting_id: Meeting ID. + state: New state. + + Returns: + True if updated, False if meeting not found. + """ + with self._lock: + meeting = self._meetings.get(meeting_id) + if meeting is None: + return False + meeting.state = state + return True + + def update_title(self, meeting_id: str, title: str) -> bool: + """Atomically update meeting title. + + Args: + meeting_id: Meeting ID. + title: New title. + + Returns: + True if updated, False if meeting not found. + """ + with self._lock: + meeting = self._meetings.get(meeting_id) + if meeting is None: + return False + meeting.title = title + return True + + def update_end_time(self, meeting_id: str, end_time: datetime) -> bool: + """Atomically update meeting end time. + + Args: + meeting_id: Meeting ID. + end_time: New end time. + + Returns: + True if updated, False if meeting not found. + """ + with self._lock: + meeting = self._meetings.get(meeting_id) + if meeting is None: + return False + meeting.end_time = end_time + return True + + def delete(self, meeting_id: str) -> bool: + """Delete a meeting. + + Args: + meeting_id: Meeting ID. + + Returns: + True if deleted, False if not found. + """ + with self._lock: + if meeting_id in self._meetings: + del self._meetings[meeting_id] + return True + return False + + @property + def active_count(self) -> int: + """Count of meetings in RECORDING or STOPPING state.""" + with self._lock: + return sum( + m.state in (MeetingState.RECORDING, MeetingState.STOPPING) + for m in self._meetings.values() + ) +```` + +## File: src/noteflow/grpc/server.py +````python +"""NoteFlow gRPC server entry point (async).""" + +from __future__ import annotations + +import argparse +import asyncio +import logging +import signal +import time +from typing import TYPE_CHECKING, Final + +import grpc.aio +from pydantic import ValidationError + +from noteflow.application.services import RecoveryService +from noteflow.application.services.summarization_service import SummarizationService +from noteflow.config.settings import get_settings +from noteflow.infrastructure.asr import FasterWhisperEngine +from noteflow.infrastructure.asr.engine import VALID_MODEL_SIZES +from noteflow.infrastructure.diarization import DiarizationEngine +from noteflow.infrastructure.persistence.database import create_async_session_factory +from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork +from noteflow.infrastructure.summarization import create_summarization_service + +from .proto import noteflow_pb2_grpc +from .service import NoteFlowServicer + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +logger = logging.getLogger(__name__) + +DEFAULT_PORT: Final[int] = 50051 +DEFAULT_MODEL: Final[str] = "base" + + +class NoteFlowServer: + """Async gRPC server for NoteFlow.""" + + def __init__( + self, + port: int = DEFAULT_PORT, + asr_model: str = DEFAULT_MODEL, + asr_device: str = "cpu", + asr_compute_type: str = "int8", + session_factory: async_sessionmaker[AsyncSession] | None = None, + summarization_service: SummarizationService | None = None, + diarization_engine: DiarizationEngine | None = None, + ) -> None: + """Initialize the server. + + Args: + port: Port to listen on. + asr_model: ASR model size. + asr_device: Device for ASR ("cpu" or "cuda"). + asr_compute_type: ASR compute type. + session_factory: Optional async session factory for database. + summarization_service: Optional summarization service for generating summaries. + diarization_engine: Optional diarization engine for speaker identification. + """ + self._port = port + self._asr_model = asr_model + self._asr_device = asr_device + self._asr_compute_type = asr_compute_type + self._session_factory = session_factory + self._summarization_service = summarization_service + self._diarization_engine = diarization_engine + self._server: grpc.aio.Server | None = None + self._servicer: NoteFlowServicer | None = None + + async def start(self) -> None: + """Start the async gRPC server.""" + logger.info("Starting NoteFlow gRPC server (async)...") + + # Create ASR engine + logger.info( + "Loading ASR model '%s' on %s (%s)...", + self._asr_model, + self._asr_device, + self._asr_compute_type, + ) + start_time = time.perf_counter() + + asr_engine = FasterWhisperEngine( + compute_type=self._asr_compute_type, + device=self._asr_device, + ) + asr_engine.load_model(self._asr_model) + + load_time = time.perf_counter() - start_time + logger.info("ASR model loaded in %.2f seconds", load_time) + + # Lazy-create summarization service if not provided + if self._summarization_service is None: + self._summarization_service = create_summarization_service() + logger.info("Summarization service initialized (default factory)") + + # Create servicer with session factory, summarization, and diarization + self._servicer = NoteFlowServicer( + asr_engine=asr_engine, + session_factory=self._session_factory, + summarization_service=self._summarization_service, + diarization_engine=self._diarization_engine, + ) + + # Create async gRPC server + self._server = grpc.aio.server( + options=[ + ("grpc.max_send_message_length", 100 * 1024 * 1024), # 100MB + ("grpc.max_receive_message_length", 100 * 1024 * 1024), + ], + ) + + # Register service + noteflow_pb2_grpc.add_NoteFlowServiceServicer_to_server( + self._servicer, + self._server, + ) + + # Bind to port + address = f"[::]:{self._port}" + self._server.add_insecure_port(address) + + # Start server + await self._server.start() + logger.info("Server listening on %s", address) + + async def stop(self, grace_period: float = 5.0) -> None: + """Stop the server gracefully. + + Args: + grace_period: Time to wait for in-flight RPCs. + """ + if self._server: + logger.info("Stopping server (grace period: %.1fs)...", grace_period) + await self._server.stop(grace_period) + logger.info("Server stopped") + + async def wait_for_termination(self) -> None: + """Block until server is terminated.""" + if self._server: + await self._server.wait_for_termination() + + +async def run_server( + port: int, + asr_model: str, + asr_device: str, + asr_compute_type: str, + database_url: str | None = None, + diarization_enabled: bool = False, + diarization_hf_token: str | None = None, + diarization_device: str = "auto", +) -> None: + """Run the async gRPC server. + + Args: + port: Port to listen on. + asr_model: ASR model size. + asr_device: Device for ASR. + asr_compute_type: ASR compute type. + database_url: Optional database URL for persistence. + diarization_enabled: Whether to enable speaker diarization. + diarization_hf_token: HuggingFace token for pyannote models. + diarization_device: Device for diarization ("auto", "cpu", "cuda", "mps"). + """ + # Create session factory if database URL provided + session_factory = None + if database_url: + logger.info("Connecting to database...") + session_factory = create_async_session_factory(database_url) + logger.info("Database connection pool ready") + + # Run crash recovery on startup + uow = SqlAlchemyUnitOfWork(session_factory) + recovery_service = RecoveryService(uow) + recovered = await recovery_service.recover_crashed_meetings() + if recovered: + logger.warning( + "Recovered %d crashed meetings on startup", + len(recovered), + ) + + # Create summarization service - auto-detects LOCAL/MOCK providers + summarization_service = create_summarization_service() + logger.info("Summarization service initialized") + + # Create diarization engine if enabled + diarization_engine: DiarizationEngine | None = None + if diarization_enabled: + if not diarization_hf_token: + logger.warning( + "Diarization enabled but no HuggingFace token provided. " + "Set NOTEFLOW_DIARIZATION_HF_TOKEN or --diarization-hf-token." + ) + else: + logger.info("Initializing diarization engine on %s...", diarization_device) + diarization_engine = DiarizationEngine( + device=diarization_device, + hf_token=diarization_hf_token, + ) + logger.info("Diarization engine initialized (models loaded on demand)") + + server = NoteFlowServer( + port=port, + asr_model=asr_model, + asr_device=asr_device, + asr_compute_type=asr_compute_type, + session_factory=session_factory, + summarization_service=summarization_service, + diarization_engine=diarization_engine, + ) + + # Set up graceful shutdown + loop = asyncio.get_running_loop() + shutdown_event = asyncio.Event() + + def signal_handler() -> None: + logger.info("Received shutdown signal...") + shutdown_event.set() + + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler(sig, signal_handler) + + try: + await server.start() + print(f"\nNoteFlow server running on port {port}") + print(f"ASR model: {asr_model} ({asr_device}/{asr_compute_type})") + if database_url: + print("Database: Connected") + else: + print("Database: Not configured (in-memory mode)") + if diarization_engine: + print(f"Diarization: Enabled ({diarization_device})") + else: + print("Diarization: Disabled") + print("Press Ctrl+C to stop\n") + + # Wait for shutdown signal or server termination + await shutdown_event.wait() + finally: + await server.stop() + + +def main() -> None: + """Entry point for NoteFlow gRPC server.""" + parser = argparse.ArgumentParser(description="NoteFlow gRPC Server") + parser.add_argument( + "-p", + "--port", + type=int, + default=DEFAULT_PORT, + help=f"Port to listen on (default: {DEFAULT_PORT})", + ) + parser.add_argument( + "-m", + "--model", + type=str, + default=DEFAULT_MODEL, + choices=list(VALID_MODEL_SIZES), + help=f"ASR model size (default: {DEFAULT_MODEL})", + ) + parser.add_argument( + "-d", + "--device", + type=str, + default="cpu", + choices=["cpu", "cuda"], + help="ASR device (default: cpu)", + ) + parser.add_argument( + "-c", + "--compute-type", + type=str, + default="int8", + choices=["int8", "float16", "float32"], + help="ASR compute type (default: int8)", + ) + parser.add_argument( + "--database-url", + type=str, + default=None, + help="PostgreSQL database URL (overrides NOTEFLOW_DATABASE_URL)", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Enable verbose logging", + ) + parser.add_argument( + "--diarization", + action="store_true", + help="Enable speaker diarization (requires pyannote.audio)", + ) + parser.add_argument( + "--diarization-hf-token", + type=str, + default=None, + help="HuggingFace token for pyannote models (overrides NOTEFLOW_DIARIZATION_HF_TOKEN)", + ) + parser.add_argument( + "--diarization-device", + type=str, + default="auto", + choices=["auto", "cpu", "cuda", "mps"], + help="Device for diarization (default: auto)", + ) + args = parser.parse_args() + + # Configure logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig( + level=log_level, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + ) + + # Get settings + try: + settings = get_settings() + except (OSError, ValueError, ValidationError) as exc: + logger.warning("Failed to load settings: %s", exc) + settings = None + + # Get database URL from args or settings + database_url = args.database_url + if not database_url and settings: + database_url = str(settings.database_url) + if not database_url: + logger.warning("No database URL configured, running in-memory mode") + + # Get diarization config from args or settings + diarization_enabled = args.diarization + diarization_hf_token = args.diarization_hf_token + diarization_device = args.diarization_device + if settings and not diarization_enabled: + diarization_enabled = settings.diarization_enabled + if settings and not diarization_hf_token: + diarization_hf_token = settings.diarization_hf_token + if settings and diarization_device == "auto": + diarization_device = settings.diarization_device + + # Run server + asyncio.run( + run_server( + port=args.port, + asr_model=args.model, + asr_device=args.device, + asr_compute_type=args.compute_type, + database_url=database_url, + diarization_enabled=diarization_enabled, + diarization_hf_token=diarization_hf_token, + diarization_device=diarization_device, + ) + ) + + +if __name__ == "__main__": + main() +```` + +## File: src/noteflow/infrastructure/asr/segmenter.py +````python +"""Audio segmenter with VAD-driven state machine. + +Manages speech segment boundaries using Voice Activity Detection. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum, auto +from typing import TYPE_CHECKING + +import numpy as np +from numpy.typing import NDArray + +if TYPE_CHECKING: + from collections.abc import Iterator + + +class SegmenterState(Enum): + """Segmenter state machine states.""" + + IDLE = auto() # Waiting for speech + SPEECH = auto() # Speech detected, accumulating audio + TRAILING = auto() # Speech ended, collecting trailing audio + + +@dataclass +class SegmenterConfig: + """Configuration for segmenter behavior.""" + + # Minimum speech duration to consider valid (seconds) + min_speech_duration: float = 0.3 + # Maximum segment duration before forced split (seconds) + max_segment_duration: float = 30.0 + # Trailing silence to include after speech ends (seconds) + trailing_silence: float = 0.5 + # Leading audio to include before speech starts (seconds) + leading_buffer: float = 0.2 + # Sample rate for audio processing + sample_rate: int = 16000 + + +@dataclass +class AudioSegment: + """A completed audio segment ready for transcription.""" + + audio: NDArray[np.float32] + start_time: float + end_time: float + + @property + def duration(self) -> float: + """Segment duration in seconds.""" + return self.end_time - self.start_time + + +@dataclass +class Segmenter: + """VAD-driven audio segmenter with state machine. + + Accumulates audio during speech and emits complete segments + when speech ends or max duration is reached. + """ + + config: SegmenterConfig = field(default_factory=SegmenterConfig) + + # State machine + _state: SegmenterState = field(default=SegmenterState.IDLE, init=False) + + # Timing tracking + _stream_time: float = field(default=0.0, init=False) + _speech_start_time: float = field(default=0.0, init=False) + _leading_duration: float = field(default=0.0, init=False) + + # Audio buffers + _leading_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False) + _speech_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False) + _trailing_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False) + _trailing_duration: float = field(default=0.0, init=False) + + @property + def state(self) -> SegmenterState: + """Get current segmenter state.""" + return self._state + + def reset(self) -> None: + """Reset segmenter to initial state.""" + self._state = SegmenterState.IDLE + self._stream_time = 0.0 + self._speech_start_time = 0.0 + self._leading_duration = 0.0 + self._leading_buffer.clear() + self._speech_buffer.clear() + self._trailing_buffer.clear() + self._trailing_duration = 0.0 + + def process_audio( + self, + audio: NDArray[np.float32], + is_speech: bool, + ) -> Iterator[AudioSegment]: + """Process audio chunk with VAD decision. + + Args: + audio: Audio samples (float32, mono). + is_speech: VAD decision for this chunk. + + Yields: + Complete AudioSegment when speech ends or max duration reached. + """ + chunk_duration = len(audio) / self.config.sample_rate + chunk_start = self._stream_time + self._stream_time += chunk_duration + + if self._state == SegmenterState.IDLE: + yield from self._handle_idle(audio, is_speech, chunk_start) + elif self._state == SegmenterState.SPEECH: + yield from self._handle_speech(audio, is_speech, chunk_start, chunk_duration) + elif self._state == SegmenterState.TRAILING: + yield from self._handle_trailing(audio, is_speech, chunk_start, chunk_duration) + + def flush(self) -> AudioSegment | None: + """Flush any pending audio as a segment. + + Call when stream ends to get final segment. + + Returns: + Remaining audio segment if valid, None otherwise. + """ + if self._state in (SegmenterState.SPEECH, SegmenterState.TRAILING): + segment = self._emit_segment() + self._state = SegmenterState.IDLE + return segment + return None + + def _handle_idle( + self, + audio: NDArray[np.float32], + is_speech: bool, + chunk_start: float, + ) -> Iterator[AudioSegment]: + """Handle audio in IDLE state.""" + if is_speech: + # Speech started - transition to SPEECH state + self._state = SegmenterState.SPEECH + self._speech_start_time = chunk_start + + # Capture how much pre-speech audio we are including. + leading_samples = sum(len(chunk) for chunk in self._leading_buffer) + self._leading_duration = leading_samples / self.config.sample_rate + + # Include leading buffer (pre-speech audio) + self._speech_buffer = list(self._leading_buffer) + self._speech_buffer.append(audio) + self._leading_buffer.clear() + else: + # Still idle - maintain leading buffer + self._update_leading_buffer(audio) + + yield from () # No segments emitted in IDLE + + def _handle_speech( + self, + audio: NDArray[np.float32], + is_speech: bool, + chunk_start: float, + chunk_duration: float, + ) -> Iterator[AudioSegment]: + """Handle audio in SPEECH state.""" + if is_speech: + self._speech_buffer.append(audio) + current_duration = self._stream_time - self._speech_start_time + + # Check max duration limit + if current_duration >= self.config.max_segment_duration: + segment = self._emit_segment() + if segment is not None: + yield segment + # Start a fresh segment at the end of this chunk + self._speech_start_time = self._stream_time + self._leading_duration = 0.0 + self._speech_buffer = [] + else: + # Speech ended - transition to TRAILING + # Start trailing buffer with this silent chunk + self._state = SegmenterState.TRAILING + self._trailing_buffer = [audio] + self._trailing_duration = chunk_duration + + # Check if already past trailing threshold + if self._trailing_duration >= self.config.trailing_silence: + segment = self._emit_segment() + if segment is not None: + yield segment + self._state = SegmenterState.IDLE + + def _handle_trailing( + self, + audio: NDArray[np.float32], + is_speech: bool, + chunk_start: float, + chunk_duration: float, + ) -> Iterator[AudioSegment]: + """Handle audio in TRAILING state.""" + if is_speech: + # Speech resumed - merge trailing back and continue + self._speech_buffer.extend(self._trailing_buffer) + self._speech_buffer.append(audio) + self._trailing_buffer.clear() + self._trailing_duration = 0.0 + self._state = SegmenterState.SPEECH + else: + # Still silence - accumulate trailing + self._trailing_buffer.append(audio) + self._trailing_duration += chunk_duration + + if self._trailing_duration >= self.config.trailing_silence: + # Enough trailing silence - emit segment + segment = self._emit_segment() + if segment is not None: + yield segment + self._state = SegmenterState.IDLE + + def _update_leading_buffer(self, audio: NDArray[np.float32]) -> None: + """Maintain rolling leading buffer.""" + self._leading_buffer.append(audio) + + # Calculate total buffer duration + total_samples = sum(len(chunk) for chunk in self._leading_buffer) + total_duration = total_samples / self.config.sample_rate + + # Trim to configured leading buffer size + while total_duration > self.config.leading_buffer and self._leading_buffer: + removed = self._leading_buffer.pop(0) + total_samples -= len(removed) + total_duration = total_samples / self.config.sample_rate + + def _emit_segment(self) -> AudioSegment | None: + """Create and emit completed segment.""" + # Combine speech + trailing audio + all_audio = self._speech_buffer + self._trailing_buffer + + # Calculate actual start time (account for leading buffer) + actual_start = max(0.0, self._speech_start_time - self._leading_duration) + + # Concatenate audio + audio = np.concatenate(all_audio) if all_audio else np.array([], dtype=np.float32) + + # If we only have silence/trailing audio, don't emit a segment. + if not self._speech_buffer: + self._trailing_buffer.clear() + self._trailing_duration = 0.0 + self._leading_duration = 0.0 + return None + + # Check minimum speech duration (excluding leading buffer) + speech_samples = sum(len(chunk) for chunk in self._speech_buffer) + speech_duration = speech_samples / self.config.sample_rate + if speech_duration < self.config.min_speech_duration: + self._speech_buffer.clear() + self._trailing_buffer.clear() + self._trailing_duration = 0.0 + self._leading_duration = 0.0 + return None + + segment = AudioSegment( + audio=audio, + start_time=actual_start, + end_time=self._stream_time, + ) + + # Clear buffers + self._speech_buffer.clear() + self._trailing_buffer.clear() + self._trailing_duration = 0.0 + self._leading_duration = 0.0 + + return segment +```` + +## File: src/noteflow/infrastructure/asr/streaming_vad.py +````python +"""Streaming Voice Activity Detection. + +Provides real-time speech detection for audio streams. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Protocol + +from noteflow.infrastructure.audio import compute_rms + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + + +class VadEngine(Protocol): + """Protocol for VAD engine implementations.""" + + def process(self, audio: NDArray[np.float32]) -> bool: + """Process audio chunk and return speech detection result. + + Args: + audio: Audio samples (float32, mono). + + Returns: + True if speech detected, False otherwise. + """ + ... + + def reset(self) -> None: + """Reset VAD state.""" + ... + + +@dataclass +class EnergyVadConfig: + """Configuration for energy-based VAD.""" + + # Speech detection threshold (RMS energy) + speech_threshold: float = 0.01 + # Silence threshold (lower than speech for hysteresis) + silence_threshold: float = 0.005 + # Minimum consecutive speech frames to confirm speech + min_speech_frames: int = 2 + # Minimum consecutive silence frames to confirm silence + min_silence_frames: int = 3 + + +@dataclass +class EnergyVad: + """Simple energy-based Voice Activity Detection. + + Uses RMS energy with hysteresis for robust detection. + Suitable for clean audio; use silero-vad for noisy environments. + """ + + config: EnergyVadConfig = field(default_factory=EnergyVadConfig) + + # Internal state + _is_speech: bool = field(default=False, init=False) + _speech_frame_count: int = field(default=0, init=False) + _silence_frame_count: int = field(default=0, init=False) + + def process(self, audio: NDArray[np.float32]) -> bool: + """Process audio chunk and detect speech. + + Uses RMS energy with hysteresis to detect speech. + State transitions require consecutive frames above/below threshold. + + Args: + audio: Audio samples (float32, mono, normalized to [-1, 1]). + + Returns: + True if speech detected, False for silence. + """ + energy = compute_rms(audio) + + if self._is_speech: + # Currently in speech - check for silence + if energy < self.config.silence_threshold: + self._silence_frame_count += 1 + self._speech_frame_count = 0 + if self._silence_frame_count >= self.config.min_silence_frames: + self._is_speech = False + else: + self._silence_frame_count = 0 + elif energy > self.config.speech_threshold: + self._speech_frame_count += 1 + self._silence_frame_count = 0 + if self._speech_frame_count >= self.config.min_speech_frames: + self._is_speech = True + else: + self._speech_frame_count = 0 + + return self._is_speech + + def reset(self) -> None: + """Reset VAD state to initial values.""" + self._is_speech = False + self._speech_frame_count = 0 + self._silence_frame_count = 0 + + +@dataclass +class StreamingVad: + """Streaming VAD wrapper with configurable backend. + + Wraps VAD engines to provide a unified streaming interface. + """ + + engine: VadEngine = field(default_factory=EnergyVad) + sample_rate: int = 16000 + + def process_chunk(self, audio: NDArray[np.float32]) -> bool: + """Process audio chunk through VAD engine. + + Args: + audio: Audio samples (float32, mono). + + Returns: + True if speech detected, False otherwise. + """ + return self.engine.process(audio) + + def reset(self) -> None: + """Reset VAD state.""" + self.engine.reset() +```` + +## File: src/noteflow/infrastructure/audio/levels.py +````python +"""Audio level computation implementation. + +Provide RMS and dB level calculation for VU meter display. +""" + +from __future__ import annotations + +import math +from typing import Final + +import numpy as np +from numpy.typing import NDArray + + +def compute_rms(frames: NDArray[np.float32]) -> float: + """Calculate Root Mean Square of audio samples. + + Args: + frames: Audio samples as float32 array. + + Returns: + RMS level as float (0.0 for empty array). + """ + if len(frames) == 0: + return 0.0 + # Use float64 for precision during squaring to avoid overflow + return float(np.sqrt(np.mean(frames.astype(np.float64) ** 2))) + + +class RmsLevelProvider: + """RMS-based audio level provider. + + Compute RMS (Root Mean Square) level from audio frames for VU meter display. + """ + + # Minimum dB value to report (silence threshold) + MIN_DB: Final[float] = -60.0 + + def get_rms(self, frames: NDArray[np.float32]) -> float: + """Calculate RMS level from audio frames. + + Args: + frames: Audio samples as float32 array (normalized -1.0 to 1.0). + + Returns: + RMS level normalized to 0.0-1.0 range. + """ + rms = compute_rms(frames) + # Clamp to 0.0-1.0 range for VU meter display + return min(1.0, max(0.0, rms)) + + def get_db(self, frames: NDArray[np.float32]) -> float: + """Calculate dB level from audio frames. + + Args: + frames: Audio samples as float32 array (normalized -1.0 to 1.0). + + Returns: + Level in dB (MIN_DB to 0 range). + """ + rms = self.get_rms(frames) + + if rms <= 0: + return self.MIN_DB + + # Convert to dB: 20 * log10(rms) + db = 20.0 * math.log10(rms) + + # Clamp to MIN_DB to 0 range + return max(self.MIN_DB, min(0.0, db)) + + def rms_to_db(self, rms: float) -> float: + """Convert RMS value to dB. + + Args: + rms: RMS level (0.0-1.0). + + Returns: + Level in dB (MIN_DB to 0 range). + """ + if rms <= 0: + return self.MIN_DB + + db = 20.0 * math.log10(rms) + return max(self.MIN_DB, min(0.0, db)) + + def db_to_rms(self, db: float) -> float: + """Convert dB value to RMS. + + Args: + db: Level in dB. + + Returns: + RMS level (0.0-1.0). + """ + return 0.0 if db <= self.MIN_DB else 10.0 ** (db / 20.0) +```` + +## File: src/noteflow/infrastructure/audio/reader.py +````python +"""Read encrypted audio from archived meetings. + +Mirror of MeetingAudioWriter - reads encrypted PCM16 chunks and converts to float32. +Reuses ChunkedAssetReader from security/crypto.py for decryption. +""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import TYPE_CHECKING + +import numpy as np + +from noteflow.infrastructure.audio.dto import TimestampedAudio +from noteflow.infrastructure.security.crypto import ChunkedAssetReader + +if TYPE_CHECKING: + from noteflow.infrastructure.security.crypto import AesGcmCryptoBox + +logger = logging.getLogger(__name__) + + +class MeetingAudioReader: + """Read audio chunks from encrypted meeting file. + + Mirror of MeetingAudioWriter - handles manifest parsing, DEK unwrapping, + and encrypted audio decryption. + + Directory structure (as created by MeetingAudioWriter): + ~/.noteflow/meetings// + ├── manifest.json # Meeting metadata + wrapped DEK + └── audio.enc # Encrypted PCM16 chunks (NFAE format) + """ + + def __init__( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Initialize audio reader. + + Args: + crypto: CryptoBox instance for decryption and DEK unwrapping. + meetings_dir: Root directory for all meetings (e.g., ~/.noteflow/meetings). + """ + self._crypto = crypto + self._meetings_dir = meetings_dir + self._meeting_dir: Path | None = None + self._sample_rate: int = 16000 + + def load_meeting_audio( + self, + meeting_id: str, + ) -> list[TimestampedAudio]: + """Load all audio from an archived meeting. + + Reads manifest, unwraps DEK, decrypts audio chunks, converts to float32. + + Args: + meeting_id: Meeting UUID string. + + Returns: + List of TimestampedAudio chunks (or empty list if not found/failed). + + Raises: + FileNotFoundError: If meeting directory or audio file not found. + ValueError: If manifest is invalid or audio format unsupported. + """ + meeting_dir = self._meetings_dir / meeting_id + self._meeting_dir = meeting_dir + + # Load and parse manifest + manifest_path = meeting_dir / "manifest.json" + if not manifest_path.exists(): + raise FileNotFoundError(f"Manifest not found: {manifest_path}") + + manifest = json.loads(manifest_path.read_text()) + self._sample_rate = manifest.get("sample_rate", 16000) + wrapped_dek_hex = manifest.get("wrapped_dek") + + if not wrapped_dek_hex: + raise ValueError("Manifest missing wrapped_dek") + + # Unwrap DEK + wrapped_dek = bytes.fromhex(wrapped_dek_hex) + dek = self._crypto.unwrap_dek(wrapped_dek) + + # Open encrypted audio file + audio_path = meeting_dir / "audio.enc" + if not audio_path.exists(): + raise FileNotFoundError(f"Audio file not found: {audio_path}") + + reader = ChunkedAssetReader(self._crypto) + reader.open(audio_path, dek) + + try: + return self._read_all_chunks(reader) + finally: + reader.close() + + def _read_all_chunks( + self, + reader: ChunkedAssetReader, + ) -> list[TimestampedAudio]: + """Read and convert all audio chunks. + + Args: + reader: Open ChunkedAssetReader. + + Returns: + List of TimestampedAudio chunks. + """ + chunks: list[TimestampedAudio] = [] + current_time = 0.0 + + for chunk_bytes in reader.read_chunks(): + # Convert PCM16 bytes back to int16 array + pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16) + + # Convert int16 [-32768, 32767] to float32 [-1.0, 1.0] + audio_float = pcm16.astype(np.float32) / 32767.0 + + # Calculate duration based on sample rate + duration = len(audio_float) / self._sample_rate + + chunks.append( + TimestampedAudio( + frames=audio_float, + timestamp=current_time, + duration=duration, + ) + ) + + current_time += duration + + logger.info( + "Loaded audio: meeting_dir=%s, chunks=%d, total_duration=%.2fs", + self._meeting_dir, + len(chunks), + current_time, + ) + + return chunks + + def get_manifest(self, meeting_id: str) -> dict[str, object] | None: + """Get manifest metadata for a meeting. + + Args: + meeting_id: Meeting UUID string. + + Returns: + Manifest dict or None if not found. + """ + manifest_path = self._meetings_dir / meeting_id / "manifest.json" + if not manifest_path.exists(): + return None + + return dict(json.loads(manifest_path.read_text())) + + def audio_exists(self, meeting_id: str) -> bool: + """Check if audio file exists for a meeting. + + Args: + meeting_id: Meeting UUID string. + + Returns: + True if audio.enc exists. + """ + meeting_dir = self._meetings_dir / meeting_id + audio_path = meeting_dir / "audio.enc" + manifest_path = meeting_dir / "manifest.json" + return audio_path.exists() and manifest_path.exists() + + @property + def sample_rate(self) -> int: + """Return the sample rate from the last loaded manifest.""" + return self._sample_rate +```` + +## File: src/noteflow/infrastructure/export/markdown.py +````python +"""Markdown exporter implementation. + +Export meeting transcripts to Markdown format. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import TYPE_CHECKING + +from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp + +if TYPE_CHECKING: + from collections.abc import Sequence + + from noteflow.domain.entities.meeting import Meeting + from noteflow.domain.entities.segment import Segment + + +class MarkdownExporter: + """Export meeting transcripts to Markdown format. + + Produces clean, readable Markdown with meeting metadata header, + transcript sections with timestamps, and optional summary section. + """ + + @property + def format_name(self) -> str: + """Human-readable format name.""" + return "Markdown" + + @property + def file_extension(self) -> str: + """File extension for Markdown.""" + return ".md" + + def export( + self, + meeting: Meeting, + segments: Sequence[Segment], + ) -> str: + """Export meeting transcript to Markdown. + + Args: + meeting: Meeting entity with metadata. + segments: Ordered list of transcript segments. + + Returns: + Markdown-formatted transcript string. + """ + lines: list[str] = [ + f"# {meeting.title}", + "", + "## Meeting Info", + "", + f"- **Date:** {format_datetime(meeting.created_at)}", + ] + + if meeting.started_at: + lines.append(f"- **Started:** {format_datetime(meeting.started_at)}") + if meeting.ended_at: + lines.append(f"- **Ended:** {format_datetime(meeting.ended_at)}") + lines.append(f"- **Duration:** {format_timestamp(meeting.duration_seconds)}") + lines.extend((f"- **Segments:** {len(segments)}", "", "## Transcript", "")) + for segment in segments: + timestamp = format_timestamp(segment.start_time) + lines.extend((f"**[{timestamp}]** {segment.text}", "")) + # Summary section (if available) + if meeting.summary: + lines.extend(("## Summary", "")) + if meeting.summary.executive_summary: + lines.extend((meeting.summary.executive_summary, "")) + if meeting.summary.key_points: + lines.extend(("### Key Points", "")) + lines.extend(f"- {point.text}" for point in meeting.summary.key_points) + lines.append("") + + if meeting.summary.action_items: + lines.extend(("### Action Items", "")) + for item in meeting.summary.action_items: + assignee = f" (@{item.assignee})" if item.assignee else "" + lines.append(f"- [ ] {item.text}{assignee}") + lines.append("") + + # Footer + lines.append("---") + lines.append(f"*Exported from NoteFlow on {format_datetime(datetime.now())}*") + + return "\n".join(lines) +```` + +## File: src/noteflow/infrastructure/persistence/migrations/env.py +````python +"""Alembic migration environment configuration.""" + +from __future__ import annotations + +import asyncio +import os +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import pool +from sqlalchemy.engine import Connection +from sqlalchemy.ext.asyncio import async_engine_from_config + +from noteflow.infrastructure.persistence.models import Base + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# Import all models to ensure they're registered with Base.metadata +target_metadata = Base.metadata + +if database_url := os.environ.get("NOTEFLOW_DATABASE_URL"): + # Convert postgres:// to postgresql+asyncpg:// + if database_url.startswith("postgres://"): + database_url = database_url.replace("postgres://", "postgresql+asyncpg://", 1) + elif database_url.startswith("postgresql://"): + database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1) + config.set_main_option("sqlalchemy.url", database_url) + + +def include_object( + obj: object, + name: str | None, + type_: str, + reflected: bool, + compare_to: object | None, +) -> bool: + """Filter objects for autogenerate.""" + # Only include objects in the noteflow schema + if type_ == "table": + schema = getattr(obj, "schema", None) + return schema == "noteflow" + return True + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + include_schemas=True, + include_object=include_object, + version_table_schema="noteflow", + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection: Connection) -> None: + """Execute migrations with the provided connection.""" + context.configure( + connection=connection, + target_metadata=target_metadata, + include_schemas=True, + include_object=include_object, + version_table_schema="noteflow", + ) + + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations() -> None: + """Run migrations in async mode. + + Create an Engine and associate a connection with the context. + """ + connectable = async_engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + asyncio.run(run_async_migrations()) + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() +```` + +## File: src/noteflow/infrastructure/persistence/repositories/annotation_repo.py +````python +"""SQLAlchemy implementation of AnnotationRepository.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import TYPE_CHECKING +from uuid import UUID + +from sqlalchemy import and_, delete, or_, select + +from noteflow.domain.entities import Annotation +from noteflow.domain.value_objects import AnnotationId +from noteflow.infrastructure.converters import OrmConverter +from noteflow.infrastructure.persistence.models import AnnotationModel +from noteflow.infrastructure.persistence.repositories._base import BaseRepository + +if TYPE_CHECKING: + from noteflow.domain.value_objects import MeetingId + + +class SqlAlchemyAnnotationRepository(BaseRepository): + """SQLAlchemy implementation of AnnotationRepository.""" + + async def add(self, annotation: Annotation) -> Annotation: + """Add an annotation to a meeting. + + Args: + annotation: Annotation to add. + + Returns: + Added annotation with db_id populated. + + Raises: + ValueError: If meeting does not exist. + """ + model = AnnotationModel( + annotation_id=UUID(str(annotation.id)), + meeting_id=UUID(str(annotation.meeting_id)), + annotation_type=annotation.annotation_type.value, + text=annotation.text, + start_time=annotation.start_time, + end_time=annotation.end_time, + segment_ids=annotation.segment_ids, + created_at=annotation.created_at, + ) + self._session.add(model) + await self._session.flush() + annotation.db_id = model.id + return annotation + + async def get(self, annotation_id: AnnotationId) -> Annotation | None: + """Retrieve an annotation by ID. + + Args: + annotation_id: Annotation identifier. + + Returns: + Annotation if found, None otherwise. + """ + stmt = select(AnnotationModel).where( + AnnotationModel.annotation_id == UUID(str(annotation_id)) + ) + model = await self._execute_scalar(stmt) + + return None if model is None else OrmConverter.annotation_to_domain(model) + + async def get_by_meeting( + self, + meeting_id: MeetingId, + ) -> Sequence[Annotation]: + """Get all annotations for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + List of annotations ordered by start_time. + """ + stmt = ( + select(AnnotationModel) + .where(AnnotationModel.meeting_id == UUID(str(meeting_id))) + .order_by(AnnotationModel.start_time) + ) + models = await self._execute_scalars(stmt) + + return [OrmConverter.annotation_to_domain(model) for model in models] + + async def get_by_time_range( + self, + meeting_id: MeetingId, + start_time: float, + end_time: float, + ) -> Sequence[Annotation]: + """Get annotations within a time range. + + Args: + meeting_id: Meeting identifier. + start_time: Start of time range in seconds. + end_time: End of time range in seconds. + + Returns: + List of annotations overlapping the time range. + """ + # Find annotations that overlap with the given time range + stmt = ( + select(AnnotationModel) + .where( + and_( + AnnotationModel.meeting_id == UUID(str(meeting_id)), + or_( + # Annotation starts within range + and_( + AnnotationModel.start_time >= start_time, + AnnotationModel.start_time <= end_time, + ), + # Annotation ends within range + and_( + AnnotationModel.end_time >= start_time, + AnnotationModel.end_time <= end_time, + ), + # Annotation spans entire range + and_( + AnnotationModel.start_time <= start_time, + AnnotationModel.end_time >= end_time, + ), + ), + ) + ) + .order_by(AnnotationModel.start_time) + ) + models = await self._execute_scalars(stmt) + + return [OrmConverter.annotation_to_domain(model) for model in models] + + async def update(self, annotation: Annotation) -> Annotation: + """Update an existing annotation. + + Args: + annotation: Annotation with updated fields. + + Returns: + Updated annotation. + + Raises: + ValueError: If annotation does not exist. + """ + stmt = select(AnnotationModel).where( + AnnotationModel.annotation_id == UUID(str(annotation.id)) + ) + model = await self._execute_scalar(stmt) + + if model is None: + raise ValueError(f"Annotation {annotation.id} not found") + + model.annotation_type = annotation.annotation_type.value + model.text = annotation.text + model.start_time = annotation.start_time + model.end_time = annotation.end_time + model.segment_ids = annotation.segment_ids + + await self._session.flush() + return annotation + + async def delete(self, annotation_id: AnnotationId) -> bool: + """Delete an annotation. + + Args: + annotation_id: Annotation identifier. + + Returns: + True if deleted, False if not found. + """ + stmt = select(AnnotationModel).where( + AnnotationModel.annotation_id == UUID(str(annotation_id)) + ) + model = await self._execute_scalar(stmt) + + if model is None: + return False + + await self._session.execute(delete(AnnotationModel).where(AnnotationModel.id == model.id)) + await self._session.flush() + return True +```` + +## File: src/noteflow/infrastructure/persistence/repositories/summary_repo.py +````python +"""SQLAlchemy implementation of SummaryRepository.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING +from uuid import UUID + +from sqlalchemy import delete, select + +from noteflow.domain.entities import ActionItem, KeyPoint, Summary +from noteflow.infrastructure.converters import OrmConverter +from noteflow.infrastructure.persistence.models import ( + ActionItemModel, + KeyPointModel, + SummaryModel, +) +from noteflow.infrastructure.persistence.repositories._base import BaseRepository + +if TYPE_CHECKING: + from noteflow.domain.value_objects import MeetingId + + +class SqlAlchemySummaryRepository(BaseRepository): + """SQLAlchemy implementation of SummaryRepository.""" + + async def save(self, summary: Summary) -> Summary: + """Save or update a meeting summary. + + Args: + summary: Summary to save. + + Returns: + Saved summary with db_id populated. + """ + # Check if summary exists for this meeting + stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(summary.meeting_id))) + result = await self._session.execute(stmt) + if existing := result.scalar_one_or_none(): + # Update existing summary + existing.executive_summary = summary.executive_summary + if summary.generated_at is not None: + existing.generated_at = summary.generated_at + existing.model_version = summary.model_version + + # Delete old key points and action items + await self._session.execute( + delete(KeyPointModel).where(KeyPointModel.summary_id == existing.id) + ) + await self._session.execute( + delete(ActionItemModel).where(ActionItemModel.summary_id == existing.id) + ) + + # Add new key points + kp_models: list[tuple[KeyPointModel, KeyPoint]] = [] + for kp in summary.key_points: + kp_model = KeyPointModel( + summary_id=existing.id, + text=kp.text, + start_time=kp.start_time, + end_time=kp.end_time, + segment_ids=kp.segment_ids, + ) + self._session.add(kp_model) + kp_models.append((kp_model, kp)) + + # Add new action items + ai_models: list[tuple[ActionItemModel, ActionItem]] = [] + for ai in summary.action_items: + ai_model = ActionItemModel( + summary_id=existing.id, + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date, + priority=ai.priority, + segment_ids=ai.segment_ids, + ) + self._session.add(ai_model) + ai_models.append((ai_model, ai)) + + await self._session.flush() + for kp_model, kp in kp_models: + kp.db_id = kp_model.id + for ai_model, ai in ai_models: + ai.db_id = ai_model.id + summary.db_id = existing.id + else: + # Create new summary + model = SummaryModel( + meeting_id=UUID(str(summary.meeting_id)), + executive_summary=summary.executive_summary, + generated_at=summary.generated_at, + model_version=summary.model_version, + ) + self._session.add(model) + await self._session.flush() + + # Add key points + for kp in summary.key_points: + kp_model = KeyPointModel( + summary_id=model.id, + text=kp.text, + start_time=kp.start_time, + end_time=kp.end_time, + segment_ids=kp.segment_ids, + ) + self._session.add(kp_model) + await self._session.flush() + kp.db_id = kp_model.id + + # Add action items + for ai in summary.action_items: + ai_model = ActionItemModel( + summary_id=model.id, + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date, + priority=ai.priority, + segment_ids=ai.segment_ids, + ) + self._session.add(ai_model) + await self._session.flush() + ai.db_id = ai_model.id + + summary.db_id = model.id + + return summary + + async def get_by_meeting(self, meeting_id: MeetingId) -> Summary | None: + """Get summary for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + Summary if exists, None otherwise. + """ + stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(meeting_id))) + model = await self._execute_scalar(stmt) + + return None if model is None else OrmConverter.summary_to_domain(model, meeting_id) + + async def delete_by_meeting(self, meeting_id: MeetingId) -> bool: + """Delete summary for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + True if deleted, False if not found. + """ + stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(meeting_id))) + model = await self._execute_scalar(stmt) + + if model is None: + return False + + await self._delete_and_flush(model) + return True +```` + +## File: src/noteflow/infrastructure/persistence/models.py +````python +"""SQLAlchemy ORM models for NoteFlow.""" + +from __future__ import annotations + +from datetime import datetime +from typing import ClassVar +from uuid import uuid4 + +from pgvector.sqlalchemy import Vector +from sqlalchemy import ( + DateTime, + Float, + ForeignKey, + Integer, + LargeBinary, + String, + Text, +) +from sqlalchemy.dialects.postgresql import JSONB, UUID +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship + +# Vector dimension for embeddings (OpenAI compatible) +EMBEDDING_DIM = 1536 + + +class Base(DeclarativeBase): + """Base class for all ORM models.""" + + pass + + +class MeetingModel(Base): + """SQLAlchemy model for meetings table.""" + + __tablename__ = "meetings" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + primary_key=True, + default=uuid4, + ) + title: Mapped[str] = mapped_column(String(255), nullable=False) + state: Mapped[int] = mapped_column(Integer, nullable=False, default=1) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + started_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), + nullable=True, + ) + ended_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), + nullable=True, + ) + metadata_: Mapped[dict[str, str]] = mapped_column( + "metadata", + JSONB, + nullable=False, + default=dict, + ) + wrapped_dek: Mapped[bytes | None] = mapped_column( + LargeBinary, + nullable=True, + ) + + # Relationships + segments: Mapped[list[SegmentModel]] = relationship( + "SegmentModel", + back_populates="meeting", + cascade="all, delete-orphan", + lazy="selectin", + ) + summary: Mapped[SummaryModel | None] = relationship( + "SummaryModel", + back_populates="meeting", + cascade="all, delete-orphan", + uselist=False, + lazy="selectin", + ) + annotations: Mapped[list[AnnotationModel]] = relationship( + "AnnotationModel", + back_populates="meeting", + cascade="all, delete-orphan", + lazy="selectin", + ) + + +class SegmentModel(Base): + """SQLAlchemy model for segments table.""" + + __tablename__ = "segments" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + meeting_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ) + segment_id: Mapped[int] = mapped_column(Integer, nullable=False) + text: Mapped[str] = mapped_column(Text, nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False) + end_time: Mapped[float] = mapped_column(Float, nullable=False) + language: Mapped[str] = mapped_column(String(10), nullable=False, default="en") + language_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + avg_logprob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + no_speech_prob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + embedding: Mapped[list[float] | None] = mapped_column( + Vector(EMBEDDING_DIM), + nullable=True, + ) + speaker_id: Mapped[str | None] = mapped_column(String(50), nullable=True) + speaker_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + + # Relationships + meeting: Mapped[MeetingModel] = relationship( + "MeetingModel", + back_populates="segments", + ) + words: Mapped[list[WordTimingModel]] = relationship( + "WordTimingModel", + back_populates="segment", + cascade="all, delete-orphan", + lazy="selectin", + ) + + +class WordTimingModel(Base): + """SQLAlchemy model for word_timings table.""" + + __tablename__ = "word_timings" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + segment_pk: Mapped[int] = mapped_column( + Integer, + ForeignKey("noteflow.segments.id", ondelete="CASCADE"), + nullable=False, + ) + word: Mapped[str] = mapped_column(String(255), nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False) + end_time: Mapped[float] = mapped_column(Float, nullable=False) + probability: Mapped[float] = mapped_column(Float, nullable=False) + + # Relationships + segment: Mapped[SegmentModel] = relationship( + "SegmentModel", + back_populates="words", + ) + + +class SummaryModel(Base): + """SQLAlchemy model for summaries table.""" + + __tablename__ = "summaries" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + meeting_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + unique=True, + ) + executive_summary: Mapped[str | None] = mapped_column(Text, nullable=True) + generated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + model_version: Mapped[str | None] = mapped_column(String(50), nullable=True) + + # Relationships + meeting: Mapped[MeetingModel] = relationship( + "MeetingModel", + back_populates="summary", + ) + key_points: Mapped[list[KeyPointModel]] = relationship( + "KeyPointModel", + back_populates="summary", + cascade="all, delete-orphan", + lazy="selectin", + ) + action_items: Mapped[list[ActionItemModel]] = relationship( + "ActionItemModel", + back_populates="summary", + cascade="all, delete-orphan", + lazy="selectin", + ) + + +class KeyPointModel(Base): + """SQLAlchemy model for key_points table.""" + + __tablename__ = "key_points" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + summary_id: Mapped[int] = mapped_column( + Integer, + ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), + nullable=False, + ) + text: Mapped[str] = mapped_column(Text, nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + segment_ids: Mapped[list[int]] = mapped_column( + JSONB, + nullable=False, + default=list, + ) + + # Relationships + summary: Mapped[SummaryModel] = relationship( + "SummaryModel", + back_populates="key_points", + ) + + +class ActionItemModel(Base): + """SQLAlchemy model for action_items table.""" + + __tablename__ = "action_items" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + summary_id: Mapped[int] = mapped_column( + Integer, + ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), + nullable=False, + ) + text: Mapped[str] = mapped_column(Text, nullable=False) + assignee: Mapped[str] = mapped_column(String(255), nullable=False, default="") + due_date: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), + nullable=True, + ) + priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + segment_ids: Mapped[list[int]] = mapped_column( + JSONB, + nullable=False, + default=list, + ) + + # Relationships + summary: Mapped[SummaryModel] = relationship( + "SummaryModel", + back_populates="action_items", + ) + + +class AnnotationModel(Base): + """SQLAlchemy model for annotations table. + + User-created annotations during recording. Distinct from LLM-extracted + ActionItem/KeyPoint which belong to Summary. Annotations belong directly + to Meeting and are created in real-time. + """ + + __tablename__ = "annotations" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + annotation_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + nullable=False, + unique=True, + default=uuid4, + ) + meeting_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ) + annotation_type: Mapped[str] = mapped_column(String(50), nullable=False) + text: Mapped[str] = mapped_column(Text, nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False) + end_time: Mapped[float] = mapped_column(Float, nullable=False) + segment_ids: Mapped[list[int]] = mapped_column( + JSONB, + nullable=False, + default=list, + ) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + + # Relationships + meeting: Mapped[MeetingModel] = relationship( + "MeetingModel", + back_populates="annotations", + ) +```` + +## File: src/noteflow/infrastructure/summarization/__init__.py +````python +"""Summarization infrastructure module. + +Provides summarization provider implementations and citation verification. +""" + +from noteflow.infrastructure.summarization.citation_verifier import ( + SegmentCitationVerifier, +) +from noteflow.infrastructure.summarization.cloud_provider import ( + CloudBackend, + CloudSummarizer, +) +from noteflow.infrastructure.summarization.factory import create_summarization_service +from noteflow.infrastructure.summarization.mock_provider import MockSummarizer +from noteflow.infrastructure.summarization.ollama_provider import OllamaSummarizer + +__all__ = [ + "CloudBackend", + "CloudSummarizer", + "MockSummarizer", + "OllamaSummarizer", + "SegmentCitationVerifier", + "create_summarization_service", +] +```` + +## File: src/noteflow/infrastructure/summarization/_parsing.py +````python +"""Shared parsing utilities for summarization providers.""" + +from __future__ import annotations + +import json +from datetime import UTC, datetime +from typing import TYPE_CHECKING + +from noteflow.domain.entities import ActionItem, KeyPoint, Summary +from noteflow.domain.summarization import InvalidResponseError + +if TYPE_CHECKING: + from noteflow.domain.summarization import SummarizationRequest + + +# System prompt for structured summarization +SYSTEM_PROMPT = """You are a meeting summarization assistant. Analyze the transcript and produce structured output. + +OUTPUT FORMAT (JSON): +{ + "executive_summary": "2-3 sentence high-level overview", + "key_points": [ + {"text": "Key insight or decision", "segment_ids": [0, 1]} + ], + "action_items": [ + {"text": "Action to take", "assignee": "Person name or empty string", "priority": 0, "segment_ids": [2]} + ] +} + +RULES: +1. Each key_point and action_item MUST have at least one segment_id referencing the source +2. segment_ids are integers matching the [N] markers in the transcript +3. priority: 0=unspecified, 1=low, 2=medium, 3=high +4. Only extract action items that clearly indicate tasks to be done +5. Output ONLY valid JSON, no markdown or explanation""" + + +def build_transcript_prompt(request: SummarizationRequest) -> str: + """Build transcript prompt with segment markers. + + Args: + request: Summarization request with segments. + + Returns: + Formatted prompt string with transcript and constraints. + """ + lines = [f"[{seg.segment_id}] {seg.text}" for seg in request.segments] + constraints = "" + if request.segments: + valid_ids = ", ".join(str(seg.segment_id) for seg in request.segments) + constraints = ( + "\n\nCONSTRAINTS:\n" + f"- Maximum {request.max_key_points} key points\n" + f"- Maximum {request.max_action_items} action items\n" + f"- Valid segment_ids: {valid_ids}" + ) + + return f"TRANSCRIPT:\n{chr(10).join(lines)}{constraints}" + + +def parse_llm_response(response_text: str, request: SummarizationRequest) -> Summary: + """Parse JSON response into Summary entity. + + Args: + response_text: Raw JSON response from LLM. + request: Original request for validation context. + + Returns: + Summary entity with parsed data. + + Raises: + InvalidResponseError: If JSON is malformed. + """ + # Strip markdown code fences if present + text = response_text.strip() + if text.startswith("```"): + lines = text.split("\n") + if lines[0].startswith("```"): + lines = lines[1:] + if lines and lines[-1].strip() == "```": + lines = lines[:-1] + text = "\n".join(lines) + + try: + data = json.loads(text) + except json.JSONDecodeError as e: + raise InvalidResponseError(f"Invalid JSON response: {e}") from e + + valid_ids = {seg.segment_id for seg in request.segments} + + # Parse key points + key_points: list[KeyPoint] = [] + for kp_data in data.get("key_points", [])[: request.max_key_points]: + seg_ids = [sid for sid in kp_data.get("segment_ids", []) if sid in valid_ids] + start_time = 0.0 + end_time = 0.0 + if seg_ids and (refs := [s for s in request.segments if s.segment_id in seg_ids]): + start_time = min(s.start_time for s in refs) + end_time = max(s.end_time for s in refs) + key_points.append( + KeyPoint( + text=str(kp_data.get("text", "")), + segment_ids=seg_ids, + start_time=start_time, + end_time=end_time, + ) + ) + + # Parse action items + action_items: list[ActionItem] = [] + for ai_data in data.get("action_items", [])[: request.max_action_items]: + seg_ids = [sid for sid in ai_data.get("segment_ids", []) if sid in valid_ids] + priority = ai_data.get("priority", 0) + if not isinstance(priority, int) or priority not in range(4): + priority = 0 + action_items.append( + ActionItem( + text=str(ai_data.get("text", "")), + assignee=str(ai_data.get("assignee", "")), + priority=priority, + segment_ids=seg_ids, + ) + ) + + return Summary( + meeting_id=request.meeting_id, + executive_summary=str(data.get("executive_summary", "")), + key_points=key_points, + action_items=action_items, + generated_at=datetime.now(UTC), + ) +```` + +## File: src/noteflow/infrastructure/summarization/cloud_provider.py +````python +"""Cloud summarization provider for OpenAI/Anthropic APIs.""" + +from __future__ import annotations + +import asyncio +import os +import time +from datetime import UTC, datetime +from enum import Enum +from typing import TYPE_CHECKING, cast + +from noteflow.domain.entities import Summary +from noteflow.domain.summarization import ( + InvalidResponseError, + ProviderUnavailableError, + SummarizationRequest, + SummarizationResult, + SummarizationTimeoutError, +) +from noteflow.infrastructure.summarization._parsing import ( + SYSTEM_PROMPT, + build_transcript_prompt, + parse_llm_response, +) + +if TYPE_CHECKING: + import anthropic + import openai + + +class CloudBackend(Enum): + """Supported cloud LLM backends.""" + + OPENAI = "openai" + ANTHROPIC = "anthropic" + + +class CloudSummarizer: + """Cloud-based LLM summarizer using OpenAI or Anthropic. + + Requires explicit user consent as data is sent to external services. + """ + + def __init__( + self, + backend: CloudBackend = CloudBackend.OPENAI, + api_key: str | None = None, + model: str | None = None, + timeout_seconds: float = 60.0, + base_url: str | None = None, + ) -> None: + """Initialize cloud summarizer. + + Args: + backend: Cloud provider backend (OpenAI or Anthropic). + api_key: API key (defaults to env var if not provided). + model: Model name (defaults per backend if not provided). + timeout_seconds: Request timeout in seconds. + base_url: Optional base URL (OpenAI only; defaults to OpenAI API). + """ + self._backend = backend + self._api_key = api_key + self._timeout = timeout_seconds + self._client: openai.OpenAI | anthropic.Anthropic | None = None + # Only used for OpenAI + self._openai_base_url = ( + base_url + if base_url is not None + else os.environ.get("OPENAI_BASE_URL") + if backend == CloudBackend.OPENAI + else None + ) + + # Set default models per backend + if model is None: + self._model = ( + "gpt-4o-mini" if backend == CloudBackend.OPENAI else "claude-3-haiku-20240307" + ) + else: + self._model = model + + def _get_openai_client(self) -> openai.OpenAI: + """Get or create OpenAI client.""" + if self._client is None: + try: + import openai + + self._client = openai.OpenAI( + api_key=self._api_key, + timeout=self._timeout, + base_url=self._openai_base_url, + ) + except ImportError as e: + raise ProviderUnavailableError( + "openai package not installed. Install with: pip install openai" + ) from e + return cast(openai.OpenAI, self._client) + + def _get_anthropic_client(self) -> anthropic.Anthropic: + """Get or create Anthropic client.""" + if self._client is None: + try: + import anthropic + + self._client = anthropic.Anthropic(api_key=self._api_key, timeout=self._timeout) + except ImportError as e: + raise ProviderUnavailableError( + "anthropic package not installed. Install with: pip install anthropic" + ) from e + return cast(anthropic.Anthropic, self._client) + + @property + def provider_name(self) -> str: + """Provider identifier.""" + return self._backend.value + + @property + def is_available(self) -> bool: + """Check if cloud provider is configured with an API key.""" + import os + + if self._api_key: + return True + + # Check environment variables + if self._backend == CloudBackend.OPENAI: + return bool(os.environ.get("OPENAI_API_KEY")) + return bool(os.environ.get("ANTHROPIC_API_KEY")) + + @property + def requires_cloud_consent(self) -> bool: + """Cloud providers require explicit user consent.""" + return True + + async def summarize(self, request: SummarizationRequest) -> SummarizationResult: + """Generate evidence-linked summary using cloud LLM. + + Args: + request: Summarization request with segments. + + Returns: + SummarizationResult with generated summary. + + Raises: + ProviderUnavailableError: If provider not configured. + SummarizationTimeoutError: If request times out. + InvalidResponseError: If response cannot be parsed. + """ + start = time.monotonic() + + # Handle empty segments + if not request.segments: + return SummarizationResult( + summary=Summary( + meeting_id=request.meeting_id, + executive_summary="No transcript segments to summarize.", + key_points=[], + action_items=[], + generated_at=datetime.now(UTC), + model_version=self._model, + ), + model_name=self._model, + provider_name=self.provider_name, + tokens_used=None, + latency_ms=0.0, + ) + + user_prompt = build_transcript_prompt(request) + + if self._backend == CloudBackend.OPENAI: + content, tokens_used = await asyncio.to_thread(self._call_openai, user_prompt) + else: + content, tokens_used = await asyncio.to_thread(self._call_anthropic, user_prompt) + + # Parse into Summary + summary = parse_llm_response(content, request) + summary = Summary( + meeting_id=summary.meeting_id, + executive_summary=summary.executive_summary, + key_points=summary.key_points, + action_items=summary.action_items, + generated_at=summary.generated_at, + model_version=self._model, + ) + + elapsed_ms = (time.monotonic() - start) * 1000 + + return SummarizationResult( + summary=summary, + model_name=self._model, + provider_name=self.provider_name, + tokens_used=tokens_used, + latency_ms=elapsed_ms, + ) + + def _call_openai(self, user_prompt: str) -> tuple[str, int | None]: + """Call OpenAI API and return (content, tokens_used).""" + try: + client = self._get_openai_client() + except ProviderUnavailableError: + raise + + try: + response = client.chat.completions.create( + model=self._model, + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.3, + response_format={"type": "json_object"}, + ) + except TimeoutError as e: + raise SummarizationTimeoutError(f"OpenAI request timed out: {e}") from e + except Exception as e: + err_str = str(e).lower() + if "api key" in err_str or "authentication" in err_str: + raise ProviderUnavailableError(f"OpenAI authentication failed: {e}") from e + if "rate limit" in err_str: + raise SummarizationTimeoutError(f"OpenAI rate limited: {e}") from e + raise InvalidResponseError(f"OpenAI error: {e}") from e + + content = response.choices[0].message.content or "" + if not content: + raise InvalidResponseError("Empty response from OpenAI") + + tokens_used = response.usage.total_tokens if response.usage else None + return content, tokens_used + + def _call_anthropic(self, user_prompt: str) -> tuple[str, int | None]: + """Call Anthropic API and return (content, tokens_used).""" + try: + client = self._get_anthropic_client() + except ProviderUnavailableError: + raise + + try: + response = client.messages.create( + model=self._model, + max_tokens=4096, + system=SYSTEM_PROMPT, + messages=[{"role": "user", "content": user_prompt}], + ) + except TimeoutError as e: + raise SummarizationTimeoutError(f"Anthropic request timed out: {e}") from e + except Exception as e: + err_str = str(e).lower() + if "api key" in err_str or "authentication" in err_str: + raise ProviderUnavailableError(f"Anthropic authentication failed: {e}") from e + if "rate limit" in err_str: + raise SummarizationTimeoutError(f"Anthropic rate limited: {e}") from e + raise InvalidResponseError(f"Anthropic error: {e}") from e + + content = "".join(block.text for block in response.content if hasattr(block, "text")) + if not content: + raise InvalidResponseError("Empty response from Anthropic") + + tokens_used = None + if hasattr(response, "usage"): + tokens_used = response.usage.input_tokens + response.usage.output_tokens + + return content, tokens_used +```` + +## File: src/noteflow/infrastructure/summarization/mock_provider.py +````python +"""Mock summarization provider for testing.""" + +from __future__ import annotations + +import time +from datetime import UTC, datetime + +from noteflow.domain.entities import ActionItem, KeyPoint, Summary +from noteflow.domain.summarization import ( + SummarizationRequest, + SummarizationResult, +) + + +class MockSummarizer: + """Deterministic mock summarizer for testing. + + Generates predictable summaries based on input segments without + requiring an actual LLM. Useful for unit tests and development. + """ + + def __init__(self, latency_ms: float = 10.0) -> None: + """Initialize mock summarizer. + + Args: + latency_ms: Simulated latency in milliseconds. + """ + self._latency_ms = latency_ms + + @property + def provider_name(self) -> str: + """Provider identifier.""" + return "mock" + + @property + def is_available(self) -> bool: + """Mock provider is always available.""" + return True + + @property + def requires_cloud_consent(self) -> bool: + """Mock provider does not send data externally.""" + return False + + async def summarize(self, request: SummarizationRequest) -> SummarizationResult: + """Generate deterministic mock summary. + + Creates key points and action items based on segment content, + with proper evidence linking to segment_ids. + + Args: + request: Summarization request with segments. + + Returns: + SummarizationResult with mock summary. + """ + start = time.monotonic() + + # Generate executive summary + segment_count = request.segment_count + total_duration = request.total_duration + executive_summary = ( + f"Meeting with {segment_count} segments spanning {total_duration:.1f} seconds." + ) + + # Generate key points from segments (up to max_key_points) + key_points: list[KeyPoint] = [] + for i, segment in enumerate(request.segments[: request.max_key_points]): + # Truncate text for key point + text = f"{segment.text[:100]}..." if len(segment.text) > 100 else segment.text + key_points.append( + KeyPoint( + text=f"Point {i + 1}: {text}", + segment_ids=[segment.segment_id], + start_time=segment.start_time, + end_time=segment.end_time, + ) + ) + + # Generate action items from segments containing action words + action_items: list[ActionItem] = [] + action_keywords = {"todo", "action", "will", "should", "must", "need to"} + for segment in request.segments: + text_lower = segment.text.lower() + if any(kw in text_lower for kw in action_keywords): + if len(action_items) >= request.max_action_items: + break + action_items.append( + ActionItem( + text=f"Action: {segment.text[:80]}", + assignee="", # Mock doesn't extract assignees + segment_ids=[segment.segment_id], + ) + ) + + summary = Summary( + meeting_id=request.meeting_id, + executive_summary=executive_summary, + key_points=key_points, + action_items=action_items, + generated_at=datetime.now(UTC), + model_version="mock-1.0", + ) + + elapsed = (time.monotonic() - start) * 1000 + self._latency_ms + + return SummarizationResult( + summary=summary, + model_name="mock-1.0", + provider_name=self.provider_name, + tokens_used=None, + latency_ms=elapsed, + ) +```` + +## File: src/noteflow/infrastructure/summarization/ollama_provider.py +````python +"""Ollama summarization provider for local LLM inference.""" + +from __future__ import annotations + +import asyncio +import os +import time +from datetime import UTC, datetime +from typing import TYPE_CHECKING + +from noteflow.domain.entities import Summary +from noteflow.domain.summarization import ( + InvalidResponseError, + ProviderUnavailableError, + SummarizationRequest, + SummarizationResult, + SummarizationTimeoutError, +) +from noteflow.infrastructure.summarization._parsing import ( + SYSTEM_PROMPT, + build_transcript_prompt, + parse_llm_response, +) + +if TYPE_CHECKING: + import ollama + + +class OllamaSummarizer: + """Ollama-based local LLM summarizer. + + Uses a local Ollama server for privacy-preserving summarization. + No data is sent to external cloud services. + """ + + def __init__( + self, + model: str | None = None, + host: str | None = None, + timeout_seconds: float = 120.0, + ) -> None: + """Initialize Ollama summarizer. + + Args: + model: Ollama model name (e.g., 'llama3.2', 'mistral'). + host: Ollama server URL. + timeout_seconds: Request timeout in seconds. + """ + self._model = model or os.environ.get("OLLAMA_MODEL", "llama3.2") + self._host = host or os.environ.get("OLLAMA_HOST", "http://localhost:11434") + self._timeout = timeout_seconds + self._client: ollama.Client | None = None + + def _get_client(self) -> ollama.Client: + """Lazy-load Ollama client.""" + if self._client is None: + try: + import ollama + + self._client = ollama.Client(host=self._host) + except ImportError as e: + raise ProviderUnavailableError( + "ollama package not installed. Install with: pip install ollama" + ) from e + return self._client + + @property + def provider_name(self) -> str: + """Provider identifier.""" + return "ollama" + + @property + def is_available(self) -> bool: + """Check if Ollama server is reachable.""" + try: + client = self._get_client() + # Try to list models to verify connectivity + client.list() + return True + except (ConnectionError, TimeoutError, RuntimeError, OSError): + return False + + @property + def requires_cloud_consent(self) -> bool: + """Ollama runs locally, no cloud consent required.""" + return False + + async def summarize(self, request: SummarizationRequest) -> SummarizationResult: + """Generate evidence-linked summary using Ollama. + + Args: + request: Summarization request with segments. + + Returns: + SummarizationResult with generated summary. + + Raises: + ProviderUnavailableError: If Ollama is not accessible. + SummarizationTimeoutError: If request times out. + InvalidResponseError: If response cannot be parsed. + """ + start = time.monotonic() + + # Handle empty segments + if not request.segments: + return SummarizationResult( + summary=Summary( + meeting_id=request.meeting_id, + executive_summary="No transcript segments to summarize.", + key_points=[], + action_items=[], + generated_at=datetime.now(UTC), + model_version=self._model, + ), + model_name=self._model, + provider_name=self.provider_name, + tokens_used=None, + latency_ms=0.0, + ) + + try: + client = self._get_client() + except ProviderUnavailableError: + raise + + user_prompt = build_transcript_prompt(request) + + try: + # Offload blocking call to a worker thread to avoid blocking the event loop + response = await asyncio.to_thread( + client.chat, + model=self._model, + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": user_prompt}, + ], + options={"temperature": 0.3}, + format="json", + ) + except TimeoutError as e: + raise SummarizationTimeoutError(f"Ollama request timed out: {e}") from e + except Exception as e: + err_str = str(e).lower() + if "connection" in err_str or "refused" in err_str: + raise ProviderUnavailableError(f"Cannot connect to Ollama: {e}") from e + raise InvalidResponseError(f"Ollama error: {e}") from e + + # Extract response text + content = response.get("message", {}).get("content", "") + if not content: + raise InvalidResponseError("Empty response from Ollama") + + # Parse into Summary + summary = parse_llm_response(content, request) + summary = Summary( + meeting_id=summary.meeting_id, + executive_summary=summary.executive_summary, + key_points=summary.key_points, + action_items=summary.action_items, + generated_at=summary.generated_at, + model_version=self._model, + ) + + elapsed_ms = (time.monotonic() - start) * 1000 + + # Extract token usage if available + tokens_used = None + if "eval_count" in response: + tokens_used = response.get("eval_count", 0) + response.get("prompt_eval_count", 0) + + return SummarizationResult( + summary=summary, + model_name=self._model, + provider_name=self.provider_name, + tokens_used=tokens_used, + latency_ms=elapsed_ms, + ) +```` + +## File: src/noteflow/infrastructure/triggers/audio_activity.py +````python +"""Audio activity signal provider. + +Detect sustained audio activity using existing RmsLevelProvider. +""" + +from __future__ import annotations + +import threading +import time +from collections import deque +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + + from noteflow.infrastructure.audio import RmsLevelProvider + + +@dataclass +class AudioActivitySettings: + """Configuration for audio activity detection. + + Attributes: + enabled: Whether audio activity detection is enabled. + threshold_db: Minimum dB level to consider as activity (default -40 dB). + window_seconds: Time window for sustained activity detection. + min_active_ratio: Minimum ratio of active samples in window (0.0-1.0). + min_samples: Minimum samples required before evaluation. + max_history: Maximum samples retained in history. + weight: Confidence weight contributed by this provider. + """ + + enabled: bool + threshold_db: float + window_seconds: float + min_active_ratio: float + min_samples: int + max_history: int + weight: float + + def __post_init__(self) -> None: + if self.min_samples > self.max_history: + msg = "min_samples must be <= max_history" + raise ValueError(msg) + + +class AudioActivityProvider: + """Detect sustained audio activity using existing RmsLevelProvider. + + Reuses RmsLevelProvider from infrastructure/audio for dB calculation. + Tracks activity history over a sliding window and generates signals + when sustained speech activity is detected. + """ + + def __init__( + self, + level_provider: RmsLevelProvider, + settings: AudioActivitySettings, + ) -> None: + """Initialize audio activity provider. + + Args: + level_provider: Existing RmsLevelProvider instance to reuse. + settings: Configuration settings for audio activity detection. + """ + self._level_provider = level_provider + self._settings = settings + self._history: deque[tuple[float, bool]] = deque(maxlen=self._settings.max_history) + self._lock = threading.Lock() + + @property + def source(self) -> TriggerSource: + """Get the source type for this provider.""" + return TriggerSource.AUDIO_ACTIVITY + + @property + def max_weight(self) -> float: + """Get the maximum weight this provider can contribute.""" + return self._settings.weight + + def update(self, frames: NDArray[np.float32], timestamp: float) -> None: + """Update activity history with new audio frames. + + Call this from the audio capture callback to feed new samples. + + Args: + frames: Audio samples as float32 array. + timestamp: Monotonic timestamp of the audio chunk. + """ + if not self._settings.enabled: + return + + db = self._level_provider.get_db(frames) + is_active = db >= self._settings.threshold_db + with self._lock: + self._history.append((timestamp, is_active)) + + def get_signal(self) -> TriggerSignal | None: + """Get current signal if sustained activity detected. + + Returns: + TriggerSignal if activity ratio exceeds threshold, None otherwise. + """ + if not self._settings.enabled: + return None + + # Need minimum samples before we can evaluate + with self._lock: + history = list(self._history) + + if len(history) < self._settings.min_samples: + return None + + # Prune old samples outside window + now = time.monotonic() + cutoff = now - self._settings.window_seconds + recent = [(ts, active) for ts, active in history if ts >= cutoff] + + if len(recent) < self._settings.min_samples: + return None + + # Calculate activity ratio + active_count = sum(bool(active) for _, active in recent) + ratio = active_count / len(recent) + + if ratio < self._settings.min_active_ratio: + return None + + return TriggerSignal(source=self.source, weight=self.max_weight) + + def is_enabled(self) -> bool: + """Check if this provider is enabled.""" + return self._settings.enabled + + def clear_history(self) -> None: + """Clear activity history. Useful when recording starts.""" + with self._lock: + self._history.clear() +```` + +## File: src/noteflow/infrastructure/__init__.py +````python +"""NoteFlow infrastructure layer. + +Contains implementations of ports and adapters for external systems: +- asr: Speech-to-text transcription (faster-whisper) +- diarization: Speaker diarization (pyannote.audio + diart) +- persistence: Database access (SQLAlchemy + PostgreSQL) +- security: Encryption and key management (AES-GCM + OS keychain) +""" +```` + +## File: tests/application/test_export_service.py +````python +"""Tests for ExportService application service.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock +from uuid import uuid4 + +import pytest + +from noteflow.application.services.export_service import ExportService +from noteflow.domain.entities import Meeting, Segment +from noteflow.domain.value_objects import MeetingId + + +def _uow_with_meeting(meeting: Meeting | None, segments: list[Segment] | None = None) -> MagicMock: + """Build a minimal async UnitOfWork double.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.meetings = MagicMock(get=AsyncMock(return_value=meeting)) + uow.segments = MagicMock(get_by_meeting=AsyncMock(return_value=segments or [])) + return uow + + +@pytest.mark.asyncio +async def test_export_transcript_meeting_not_found() -> None: + """export_transcript should raise when meeting is missing.""" + meeting_id = MeetingId(uuid4()) + service = ExportService(_uow_with_meeting(meeting=None)) + + with pytest.raises(ValueError, match="not found"): + await service.export_transcript(meeting_id) + + +@pytest.mark.asyncio +async def test_export_to_file_infers_format_and_writes(tmp_path: Path) -> None: + """export_to_file infers markdown from extension and writes content.""" + meeting = Meeting.create(title="Demo") + segments = [ + Segment( + segment_id=0, + text="Hello world", + start_time=0.0, + end_time=1.0, + meeting_id=meeting.id, + ) + ] + uow = _uow_with_meeting(meeting, segments) + service = ExportService(uow) + + output = await service.export_to_file(meeting.id, tmp_path / "export.markdown") + + assert output.suffix == ".md" + assert output.exists() + content = output.read_text(encoding="utf-8") + assert "Hello world" in content + + +def test_infer_format_rejects_unknown_extension() -> None: + """_infer_format_from_extension should raise for unknown suffix.""" + service = ExportService(_uow_with_meeting(None)) + + with pytest.raises(ValueError, match="Cannot infer format"): + service._infer_format_from_extension(".txt") # type: ignore[arg-type] + + +def test_get_exporter_raises_for_unknown_format() -> None: + """_get_exporter should guard against unsupported enums.""" + service = ExportService(_uow_with_meeting(None)) + + class FakeFormat: + HTML = "html" + + with pytest.raises(ValueError, match="Unsupported"): + service._get_exporter(FakeFormat.HTML) # type: ignore[arg-type] + + +def test_get_supported_formats_returns_names_and_extensions() -> None: + """get_supported_formats should expose format metadata.""" + service = ExportService(_uow_with_meeting(None)) + + formats = {name.lower(): ext for name, ext in service.get_supported_formats()} + + assert formats["markdown"] == ".md" + assert formats["html"] == ".html" +```` + +## File: tests/application/test_retention_service.py +````python +"""Tests for RetentionService.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from noteflow.application.services.retention_service import RetentionReport, RetentionService +from noteflow.domain.entities import Meeting + + +def _create_meeting(ended_at: datetime | None = None) -> Meeting: + """Create a test meeting with optional ended_at.""" + meeting = Meeting.create(title="Test Meeting") + if ended_at: + meeting._ended_at = ended_at + return meeting + + +class TestRetentionServiceProperties: + """Tests for RetentionService properties.""" + + def test_is_enabled_reflects_init(self) -> None: + """is_enabled should reflect constructor parameter.""" + uow = MagicMock() + + def factory() -> MagicMock: + return uow + + enabled_service = RetentionService(factory, retention_days=30, enabled=True) + disabled_service = RetentionService(factory, retention_days=30, enabled=False) + + assert enabled_service.is_enabled is True + assert disabled_service.is_enabled is False + + def test_retention_days_property(self) -> None: + """retention_days should return configured value.""" + uow = MagicMock() + service = RetentionService(lambda: uow, retention_days=45) + + assert service.retention_days == 45 + + def test_cutoff_date_calculation(self) -> None: + """cutoff_date should be retention_days in the past.""" + uow = MagicMock() + service = RetentionService(lambda: uow, retention_days=30) + + cutoff = service.cutoff_date + expected = datetime.now(UTC) - timedelta(days=30) + + # Allow 1 second tolerance + assert abs((cutoff - expected).total_seconds()) < 1 + + +class TestRetentionServiceFindExpired: + """Tests for find_expired_meetings method.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.meetings = MagicMock() + return uow + + @pytest.mark.asyncio + async def test_find_expired_returns_meetings(self, mock_uow: MagicMock) -> None: + """find_expired_meetings should return meetings from repository.""" + old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100)) + mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting]) + + service = RetentionService(lambda: mock_uow, retention_days=30) + result = await service.find_expired_meetings() + + assert len(result) == 1 + mock_uow.meetings.find_older_than.assert_awaited_once() + + @pytest.mark.asyncio + async def test_find_expired_returns_empty_list(self, mock_uow: MagicMock) -> None: + """find_expired_meetings should return empty list when none found.""" + mock_uow.meetings.find_older_than = AsyncMock(return_value=[]) + + service = RetentionService(lambda: mock_uow, retention_days=30) + result = await service.find_expired_meetings() + + assert result == [] + + +class TestRetentionServiceRunCleanup: + """Tests for run_cleanup method.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.meetings = MagicMock() + uow.commit = AsyncMock() + return uow + + @pytest.mark.asyncio + async def test_run_cleanup_disabled_returns_empty_report(self, mock_uow: MagicMock) -> None: + """run_cleanup should return empty report when disabled.""" + service = RetentionService(lambda: mock_uow, retention_days=30, enabled=False) + + report = await service.run_cleanup() + + assert report.meetings_checked == 0 + assert report.meetings_deleted == 0 + assert report.errors == () + + @pytest.mark.asyncio + async def test_run_cleanup_dry_run_does_not_delete(self, mock_uow: MagicMock) -> None: + """run_cleanup with dry_run should not delete meetings.""" + old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100)) + mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting]) + + service = RetentionService(lambda: mock_uow, retention_days=30, enabled=False) + report = await service.run_cleanup(dry_run=True) + + # Should report meeting was checked but not deleted + assert report.meetings_checked == 1 + assert report.meetings_deleted == 0 + assert report.errors == () + + @pytest.mark.asyncio + async def test_run_cleanup_deletes_expired_meetings( + self, mock_uow: MagicMock, tmp_path: Path + ) -> None: + """run_cleanup should delete expired meetings when enabled.""" + old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100)) + mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting]) + mock_uow.meetings.get = AsyncMock(return_value=old_meeting) + mock_uow.meetings.delete = AsyncMock(return_value=True) + + service = RetentionService( + lambda: mock_uow, + retention_days=30, + meetings_dir=tmp_path, + enabled=True, + ) + report = await service.run_cleanup() + + assert report.meetings_checked == 1 + assert report.meetings_deleted == 1 + assert report.errors == () + + @pytest.mark.asyncio + async def test_run_cleanup_handles_errors_gracefully(self, mock_uow: MagicMock) -> None: + """run_cleanup should capture errors without failing.""" + old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100)) + mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting]) + mock_uow.meetings.get = AsyncMock(side_effect=RuntimeError("DB error")) + + service = RetentionService(lambda: mock_uow, retention_days=30, enabled=True) + report = await service.run_cleanup() + + assert report.meetings_checked == 1 + assert report.meetings_deleted == 0 + assert len(report.errors) == 1 + assert "DB error" in report.errors[0] + + +class TestRetentionReport: + """Tests for RetentionReport dataclass.""" + + def test_retention_report_is_immutable(self) -> None: + """RetentionReport should be frozen.""" + report = RetentionReport( + meetings_checked=5, + meetings_deleted=3, + errors=("error1",), + ) + + with pytest.raises(AttributeError): + report.meetings_checked = 10 # type: ignore[misc] + + def test_retention_report_stores_values(self) -> None: + """RetentionReport should store all values correctly.""" + report = RetentionReport( + meetings_checked=10, + meetings_deleted=8, + errors=("err1", "err2"), + ) + + assert report.meetings_checked == 10 + assert report.meetings_deleted == 8 + assert report.errors == ("err1", "err2") +```` + +## File: tests/domain/test_meeting.py +````python +"""Tests for Meeting entity.""" + +from __future__ import annotations + +from datetime import datetime, timedelta + +import pytest + +from noteflow.domain.entities.meeting import Meeting +from noteflow.domain.entities.segment import Segment +from noteflow.domain.entities.summary import Summary +from noteflow.domain.value_objects import MeetingState + + +class TestMeetingCreation: + """Tests for Meeting creation methods.""" + + def test_create_with_default_title(self) -> None: + """Test factory method generates default title.""" + meeting = Meeting.create() + assert meeting.title.startswith("Meeting ") + assert meeting.state == MeetingState.CREATED + assert meeting.started_at is None + assert meeting.ended_at is None + assert meeting.segments == [] + assert meeting.summary is None + + def test_create_with_custom_title(self) -> None: + """Test factory method accepts custom title.""" + meeting = Meeting.create(title="Team Standup") + assert meeting.title == "Team Standup" + + def test_create_with_metadata(self) -> None: + """Test factory method accepts metadata.""" + metadata = {"project": "NoteFlow", "team": "Engineering"} + meeting = Meeting.create(title="Sprint Planning", metadata=metadata) + assert meeting.metadata == metadata + + def test_from_uuid_str(self) -> None: + """Test creation from existing UUID string.""" + uuid_str = "12345678-1234-5678-1234-567812345678" + meeting = Meeting.from_uuid_str( + uuid_str=uuid_str, + title="Restored Meeting", + state=MeetingState.STOPPED, + ) + assert str(meeting.id) == uuid_str + assert meeting.title == "Restored Meeting" + assert meeting.state == MeetingState.STOPPED + + +class TestMeetingStateTransitions: + """Tests for Meeting state machine transitions.""" + + def test_start_recording_from_created(self) -> None: + """Test starting recording from CREATED state.""" + meeting = Meeting.create() + meeting.start_recording() + assert meeting.state == MeetingState.RECORDING + assert meeting.started_at is not None + + def test_start_recording_invalid_state_raises(self) -> None: + """Test starting recording from invalid state raises.""" + meeting = Meeting.create() + meeting.start_recording() + meeting.begin_stopping() + meeting.stop_recording() + with pytest.raises(ValueError, match="Cannot start recording"): + meeting.start_recording() + + def test_begin_stopping_from_recording(self) -> None: + """Test transitioning to STOPPING from RECORDING state.""" + meeting = Meeting.create() + meeting.start_recording() + meeting.begin_stopping() + assert meeting.state == MeetingState.STOPPING + + def test_begin_stopping_invalid_state_raises(self) -> None: + """Test begin_stopping from invalid state raises.""" + meeting = Meeting.create() + with pytest.raises(ValueError, match="Cannot begin stopping"): + meeting.begin_stopping() + + def test_stop_recording_from_stopping(self) -> None: + """Test stopping recording from STOPPING state.""" + meeting = Meeting.create() + meeting.start_recording() + meeting.begin_stopping() + meeting.stop_recording() + assert meeting.state == MeetingState.STOPPED + assert meeting.ended_at is not None + + def test_stop_recording_from_recording_raises(self) -> None: + """Test stopping recording directly from RECORDING raises. + + Must go through STOPPING state for graceful shutdown. + """ + meeting = Meeting.create() + meeting.start_recording() + with pytest.raises(ValueError, match="Cannot stop recording"): + meeting.stop_recording() + + def test_stop_recording_from_created_raises(self) -> None: + """Test stopping recording from CREATED state raises.""" + meeting = Meeting.create() + with pytest.raises(ValueError, match="Cannot stop recording"): + meeting.stop_recording() + + def test_complete_from_stopped(self) -> None: + """Test completing meeting from STOPPED state.""" + meeting = Meeting.create() + meeting.start_recording() + meeting.begin_stopping() + meeting.stop_recording() + meeting.complete() + assert meeting.state == MeetingState.COMPLETED + + def test_complete_invalid_state_raises(self) -> None: + """Test completing from invalid state raises.""" + meeting = Meeting.create() + with pytest.raises(ValueError, match="Cannot complete"): + meeting.complete() + + def test_mark_error(self) -> None: + """Test marking meeting as error state.""" + meeting = Meeting.create() + meeting.mark_error() + assert meeting.state == MeetingState.ERROR + + def test_stopping_to_recording_invalid(self) -> None: + """Test cannot transition from STOPPING back to RECORDING.""" + meeting = Meeting.create() + meeting.start_recording() + meeting.begin_stopping() + with pytest.raises(ValueError, match="Cannot start recording"): + meeting.start_recording() + + +class TestMeetingSegments: + """Tests for Meeting segment management.""" + + def test_add_segment(self) -> None: + """Test adding a segment to meeting.""" + meeting = Meeting.create() + segment = Segment(segment_id=0, text="Hello world", start_time=0.0, end_time=1.0) + meeting.add_segment(segment) + assert meeting.segment_count == 1 + assert meeting.segments[0] == segment + + def test_next_segment_id_empty(self) -> None: + """Test next segment ID when no segments exist.""" + meeting = Meeting.create() + assert meeting.next_segment_id == 0 + + def test_next_segment_id_with_segments(self) -> None: + """Test next segment ID increments correctly.""" + meeting = Meeting.create() + meeting.add_segment(Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0)) + meeting.add_segment(Segment(segment_id=1, text="Second", start_time=1.0, end_time=2.0)) + assert meeting.next_segment_id == 2 + + def test_next_segment_id_non_contiguous(self) -> None: + """Test next segment ID uses max + 1 for non-contiguous IDs.""" + meeting = Meeting.create() + meeting.add_segment(Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0)) + meeting.add_segment(Segment(segment_id=5, text="Sixth", start_time=1.0, end_time=2.0)) + assert meeting.next_segment_id == 6 + + def test_full_transcript(self) -> None: + """Test concatenating all segment text.""" + meeting = Meeting.create() + meeting.add_segment(Segment(segment_id=0, text="Hello", start_time=0.0, end_time=1.0)) + meeting.add_segment(Segment(segment_id=1, text="world", start_time=1.0, end_time=2.0)) + assert meeting.full_transcript == "Hello world" + + def test_full_transcript_empty(self) -> None: + """Test full_transcript is empty when there are no segments.""" + meeting = Meeting.create() + assert meeting.full_transcript == "" + + +class TestMeetingProperties: + """Tests for Meeting computed properties.""" + + def test_duration_seconds_not_started(self) -> None: + """Test duration is 0 when not started.""" + meeting = Meeting.create() + assert meeting.duration_seconds == 0.0 + + def test_duration_seconds_with_times(self) -> None: + """Test duration calculation with start and end times.""" + meeting = Meeting.create() + meeting.started_at = datetime(2024, 1, 1, 10, 0, 0) + meeting.ended_at = datetime(2024, 1, 1, 10, 30, 0) + assert meeting.duration_seconds == 1800.0 + + def test_duration_seconds_in_progress(self) -> None: + """Test duration is > 0 when started but not ended.""" + meeting = Meeting.create() + meeting.started_at = datetime.now() - timedelta(seconds=5) + assert meeting.duration_seconds >= 5.0 + + def test_is_active_created(self) -> None: + """Test is_active returns True for CREATED state.""" + meeting = Meeting.create() + assert meeting.is_active() is True + + def test_is_active_recording(self) -> None: + """Test is_active returns True for RECORDING state.""" + meeting = Meeting.create() + meeting.start_recording() + assert meeting.is_active() is True + + def test_is_active_stopping(self) -> None: + """Test is_active returns False for STOPPING state.""" + meeting = Meeting.create() + meeting.start_recording() + meeting.begin_stopping() + assert meeting.is_active() is False + + def test_is_active_stopped(self) -> None: + """Test is_active returns False for STOPPED state.""" + meeting = Meeting.create() + meeting.start_recording() + meeting.begin_stopping() + meeting.stop_recording() + assert meeting.is_active() is False + + def test_has_summary_false(self) -> None: + """Test has_summary returns False when no summary.""" + meeting = Meeting.create() + assert meeting.has_summary() is False + + def test_has_summary_true(self) -> None: + """Test has_summary returns True when summary set.""" + meeting = Meeting.create() + summary = Summary(meeting_id=meeting.id) + meeting.set_summary(summary) + assert meeting.has_summary() is True +```` + +## File: tests/domain/test_triggers.py +````python +"""Tests for trigger domain entities.""" + +from __future__ import annotations + +import pytest + +from noteflow.domain.triggers import TriggerAction, TriggerDecision, TriggerSignal, TriggerSource + + +def test_trigger_signal_weight_bounds() -> None: + """TriggerSignal enforces weight bounds.""" + with pytest.raises(ValueError, match=r"Weight must be 0\.0-1\.0"): + TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=-0.1) + + with pytest.raises(ValueError, match=r"Weight must be 0\.0-1\.0"): + TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=1.1) + + signal = TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=0.5) + assert signal.weight == 0.5 + + +def test_trigger_decision_primary_signal_and_detected_app() -> None: + """TriggerDecision exposes primary signal and detected app.""" + audio = TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=0.2) + foreground = TriggerSignal( + source=TriggerSource.FOREGROUND_APP, + weight=0.4, + app_name="Zoom Meeting", + ) + decision = TriggerDecision( + action=TriggerAction.NOTIFY, + confidence=0.6, + signals=(audio, foreground), + ) + + assert decision.primary_signal == foreground + assert decision.detected_app == "Zoom Meeting" + + empty = TriggerDecision(action=TriggerAction.IGNORE, confidence=0.0, signals=()) + assert empty.primary_signal is None + assert empty.detected_app is None +```` + +## File: tests/grpc/test_generate_summary.py +````python +"""Tests for GenerateSummary RPC fallback behavior.""" + +from __future__ import annotations + +import pytest + +from noteflow.domain.entities import Segment +from noteflow.domain.summarization import ProviderUnavailableError +from noteflow.grpc.proto import noteflow_pb2 +from noteflow.grpc.service import NoteFlowServicer + + +class _DummyContext: + """Minimal gRPC context that raises if abort is invoked.""" + + async def abort(self, code, details): # type: ignore[override] + raise AssertionError(f"abort called: {code} - {details}") + + +@pytest.mark.asyncio +async def test_generate_summary_uses_placeholder_when_service_missing() -> None: + """Ensure RPC returns a placeholder when no summarization service is configured.""" + + servicer = NoteFlowServicer() + store = servicer._get_memory_store() + + meeting = store.create("Test Meeting") + store.add_segment( + str(meeting.id), + Segment(segment_id=0, text="Hello world", start_time=0.0, end_time=1.0, language="en"), + ) + + response = await servicer.GenerateSummary( + noteflow_pb2.GenerateSummaryRequest(meeting_id=str(meeting.id)), + _DummyContext(), + ) + + assert response.executive_summary != "" + assert response.model_version == "placeholder-v0" + retrieved_meeting = store.get(str(meeting.id)) + assert retrieved_meeting is not None, "Meeting should exist after creation" + assert retrieved_meeting.summary is not None + + +class _FailingSummarizationService: + """Summarization service that always reports provider unavailability.""" + + async def summarize(self, meeting_id, segments): # type: ignore[override] + raise ProviderUnavailableError("LLM unavailable") + + +@pytest.mark.asyncio +async def test_generate_summary_falls_back_when_provider_unavailable() -> None: + """Provider errors should fall back to placeholder instead of failing the RPC.""" + + servicer = NoteFlowServicer(summarization_service=_FailingSummarizationService()) + store = servicer._get_memory_store() + + meeting = store.create("Test Meeting") + store.add_segment( + str(meeting.id), + Segment(segment_id=1, text="Action item noted", start_time=0.0, end_time=2.0, language="en"), + ) + + response = await servicer.GenerateSummary( + noteflow_pb2.GenerateSummaryRequest(meeting_id=str(meeting.id)), + _DummyContext(), + ) + + assert response.executive_summary != "" + assert response.model_version == "placeholder-v0" +```` + +## File: tests/infrastructure/asr/test_dto.py +````python +"""Tests for ASR DTO validation and properties.""" + +from __future__ import annotations + +from dataclasses import FrozenInstanceError + +import pytest + +from noteflow.infrastructure.asr.dto import ( + AsrResult, + PartialUpdate, + VadEvent, + VadEventType, + WordTiming, +) + + +class TestWordTimingDto: + """Tests for WordTiming DTO.""" + + def test_word_timing_valid(self) -> None: + word = WordTiming(word="hello", start=0.0, end=0.5, probability=0.75) + assert word.word == "hello" + assert word.start == 0.0 + assert word.end == 0.5 + assert word.probability == 0.75 + + def test_word_timing_invalid_times_raises(self) -> None: + with pytest.raises(ValueError, match=r"Word end .* < start"): + WordTiming(word="bad", start=1.0, end=0.5, probability=0.5) + + @pytest.mark.parametrize("prob", [-0.1, 1.1]) + def test_word_timing_invalid_probability_raises(self, prob: float) -> None: + with pytest.raises(ValueError, match=r"Probability must be 0\.0-1\.0"): + WordTiming(word="bad", start=0.0, end=0.1, probability=prob) + + def test_word_timing_frozen(self) -> None: + word = WordTiming(word="hello", start=0.0, end=0.5, probability=0.9) + with pytest.raises(FrozenInstanceError): + word.word = "mutate" # type: ignore[misc] + + +class TestAsrResultDto: + """Tests for AsrResult DTO.""" + + def test_asr_result_duration(self) -> None: + result = AsrResult(text="hello", start=1.0, end=3.5) + assert result.duration == 2.5 + + def test_asr_result_invalid_times_raises(self) -> None: + with pytest.raises(ValueError, match=r"Segment end .* < start"): + AsrResult(text="bad", start=2.0, end=1.0) + + +class TestPartialUpdateDto: + """Tests for PartialUpdate DTO.""" + + def test_partial_update_invalid_times_raises(self) -> None: + with pytest.raises(ValueError, match=r"Partial end .* < start"): + PartialUpdate(text="partial", start=2.0, end=1.0) + + +class TestVadEventDto: + """Tests for VadEvent DTO.""" + + def test_vad_event_invalid_timestamp_raises(self) -> None: + with pytest.raises(ValueError, match="Timestamp must be non-negative"): + VadEvent(event_type=VadEventType.SPEECH_START, timestamp=-1.0) + + @pytest.mark.parametrize("confidence", [-0.1, 1.1]) + def test_vad_event_invalid_confidence_raises(self, confidence: float) -> None: + with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"): + VadEvent(event_type=VadEventType.SPEECH_END, timestamp=0.5, confidence=confidence) +```` + +## File: tests/infrastructure/asr/test_segmenter.py +````python +"""Tests for Segmenter state machine.""" + +from __future__ import annotations + +import numpy as np +import pytest + +from noteflow.infrastructure.asr.segmenter import ( + AudioSegment, + Segmenter, + SegmenterConfig, + SegmenterState, +) + + +class TestSegmenterInitialization: + """Tests for Segmenter initialization.""" + + def test_default_config(self) -> None: + """Segmenter uses default config when not provided.""" + segmenter = Segmenter() + + assert segmenter.config.sample_rate == 16000 + assert segmenter.config.min_speech_duration == 0.3 + + def test_custom_config(self) -> None: + """Segmenter accepts custom configuration.""" + config = SegmenterConfig(sample_rate=44100, max_segment_duration=60.0) + segmenter = Segmenter(config=config) + + assert segmenter.config.sample_rate == 44100 + assert segmenter.config.max_segment_duration == 60.0 + + def test_initial_state_is_idle(self) -> None: + """Segmenter starts in IDLE state.""" + segmenter = Segmenter() + + assert segmenter.state == SegmenterState.IDLE + + +class TestSegmenterStateTransitions: + """Tests for Segmenter state machine transitions.""" + + @pytest.fixture + def segmenter(self) -> Segmenter: + """Create segmenter with test-friendly config.""" + return Segmenter( + config=SegmenterConfig( + sample_rate=16000, + trailing_silence=0.1, + leading_buffer=0.1, + min_speech_duration=0.1, + ) + ) + + @staticmethod + def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray: + """Create test audio of specified duration.""" + return np.zeros(int(duration * sample_rate), dtype=np.float32) + + def test_idle_to_speech_on_voice_detected(self, segmenter: Segmenter) -> None: + """Transition from IDLE to SPEECH when voice detected.""" + audio = self.make_audio(0.1) + + list(segmenter.process_audio(audio, is_speech=True)) + + assert segmenter.state == SegmenterState.SPEECH + + def test_idle_stays_idle_on_silence(self, segmenter: Segmenter) -> None: + """Stay in IDLE state when no speech detected.""" + audio = self.make_audio(0.1) + + list(segmenter.process_audio(audio, is_speech=False)) + + assert segmenter.state == SegmenterState.IDLE + + def test_speech_to_trailing_on_silence(self, segmenter: Segmenter) -> None: + """Transition from SPEECH to TRAILING when speech ends.""" + speech_audio = self.make_audio(0.1) + short_silence = self.make_audio(0.05) # Less than trailing_silence threshold + + list(segmenter.process_audio(speech_audio, is_speech=True)) + list(segmenter.process_audio(short_silence, is_speech=False)) + + assert segmenter.state == SegmenterState.TRAILING + + def test_trailing_to_idle_after_silence_threshold(self, segmenter: Segmenter) -> None: + """Transition from TRAILING to IDLE after enough silence.""" + audio = self.make_audio(0.1) + + list(segmenter.process_audio(audio, is_speech=True)) + list(segmenter.process_audio(audio, is_speech=False)) + list(segmenter.process_audio(audio, is_speech=False)) + + assert segmenter.state == SegmenterState.IDLE + + def test_trailing_to_speech_if_voice_resumes(self, segmenter: Segmenter) -> None: + """Transition from TRAILING back to SPEECH if voice resumes.""" + audio = self.make_audio(0.05) + + list(segmenter.process_audio(audio, is_speech=True)) + list(segmenter.process_audio(audio, is_speech=False)) + assert segmenter.state == SegmenterState.TRAILING + + list(segmenter.process_audio(audio, is_speech=True)) + + assert segmenter.state == SegmenterState.SPEECH + + +class TestSegmenterEmission: + """Tests for segment emission behavior.""" + + @pytest.fixture + def segmenter(self) -> Segmenter: + """Create segmenter with test-friendly config.""" + return Segmenter( + config=SegmenterConfig( + sample_rate=16000, + trailing_silence=0.1, + leading_buffer=0.1, + min_speech_duration=0.0, + ) + ) + + @staticmethod + def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray: + """Create test audio of specified duration.""" + return np.ones(int(duration * sample_rate), dtype=np.float32) + + def test_emits_segment_after_trailing_silence(self, segmenter: Segmenter) -> None: + """Emit segment when trailing silence threshold is reached.""" + audio = self.make_audio(0.2) + + segments_speech = list(segmenter.process_audio(audio, is_speech=True)) + segments_silence = list(segmenter.process_audio(audio, is_speech=False)) + + assert not segments_speech + assert len(segments_silence) == 1 + assert isinstance(segments_silence[0], AudioSegment) + + def test_emitted_segment_has_correct_timing(self, segmenter: Segmenter) -> None: + """Emitted segment has correct start and end times.""" + audio = self.make_audio(0.2) + + list(segmenter.process_audio(audio, is_speech=True)) + segments = list(segmenter.process_audio(audio, is_speech=False)) + + segment = segments[0] + assert segment.start_time >= 0.0 + assert segment.end_time > segment.start_time + assert segment.duration > 0 + + def test_emitted_segment_contains_audio(self, segmenter: Segmenter) -> None: + """Emitted segment contains concatenated audio.""" + audio = self.make_audio(0.2) + + list(segmenter.process_audio(audio, is_speech=True)) + segments = list(segmenter.process_audio(audio, is_speech=False)) + + assert len(segments[0].audio) > 0 + + def test_emits_on_max_duration(self) -> None: + """Force emit segment when max duration is reached.""" + segmenter = Segmenter( + config=SegmenterConfig( + sample_rate=16000, + max_segment_duration=0.3, + ) + ) + audio = self.make_audio(0.2) + + segments_1 = list(segmenter.process_audio(audio, is_speech=True)) + segments_2 = list(segmenter.process_audio(audio, is_speech=True)) + + assert not segments_1 + assert len(segments_2) == 1 + + def test_min_speech_duration_filters_short_segments(self) -> None: + """Segments shorter than min_speech_duration should be ignored.""" + segmenter = Segmenter( + config=SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.5, + trailing_silence=0.1, + ) + ) + short_speech = self.make_audio(0.1) + silence = self.make_audio(0.1) + + list(segmenter.process_audio(short_speech, is_speech=True)) + emitted = list(segmenter.process_audio(silence, is_speech=False)) + + assert not emitted + + +class TestSegmenterFlush: + """Tests for flush behavior.""" + + @pytest.fixture + def segmenter(self) -> Segmenter: + """Create segmenter with test-friendly config.""" + return Segmenter( + config=SegmenterConfig( + sample_rate=16000, + trailing_silence=0.5, + min_speech_duration=0.0, + ) + ) + + @staticmethod + def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray: + """Create test audio of specified duration.""" + return np.ones(int(duration * sample_rate), dtype=np.float32) + + def test_flush_returns_none_when_idle(self, segmenter: Segmenter) -> None: + """Flush returns None when no pending audio.""" + result = segmenter.flush() + + assert result is None + + def test_flush_returns_segment_when_in_speech(self, segmenter: Segmenter) -> None: + """Flush returns pending segment when in SPEECH state.""" + audio = self.make_audio(0.2) + list(segmenter.process_audio(audio, is_speech=True)) + + result = segmenter.flush() + + assert result is not None + assert isinstance(result, AudioSegment) + + def test_flush_returns_segment_when_in_trailing(self, segmenter: Segmenter) -> None: + """Flush returns pending segment when in TRAILING state.""" + audio = self.make_audio(0.1) + list(segmenter.process_audio(audio, is_speech=True)) + list(segmenter.process_audio(audio, is_speech=False)) + assert segmenter.state == SegmenterState.TRAILING + + result = segmenter.flush() + + assert result is not None + assert isinstance(result, AudioSegment) + + def test_flush_resets_to_idle(self, segmenter: Segmenter) -> None: + """Flush resets state to IDLE.""" + audio = self.make_audio(0.2) + list(segmenter.process_audio(audio, is_speech=True)) + + segmenter.flush() + + assert segmenter.state == SegmenterState.IDLE + + +class TestSegmenterReset: + """Tests for reset behavior.""" + + def test_reset_clears_state(self) -> None: + """Reset returns segmenter to initial state.""" + segmenter = Segmenter() + audio = np.ones(1600, dtype=np.float32) + + list(segmenter.process_audio(audio, is_speech=True)) + assert segmenter.state == SegmenterState.SPEECH + + segmenter.reset() + + assert segmenter.state == SegmenterState.IDLE + + +class TestAudioSegmentDataclass: + """Tests for AudioSegment dataclass.""" + + def test_duration_property(self) -> None: + """Duration property calculates correctly.""" + segment = AudioSegment( + audio=np.zeros(1600, dtype=np.float32), + start_time=1.0, + end_time=2.5, + ) + + assert segment.duration == 1.5 +```` + +## File: tests/infrastructure/asr/test_streaming_vad.py +````python +"""Tests for StreamingVad and EnergyVad.""" + +from __future__ import annotations + +import numpy as np + +from noteflow.infrastructure.asr.streaming_vad import ( + EnergyVad, + EnergyVadConfig, + StreamingVad, +) + + +class TestEnergyVadBasics: + """Basic tests for EnergyVad.""" + + def test_default_config(self) -> None: + """EnergyVad uses default config when not provided.""" + vad = EnergyVad() + + assert vad.config.speech_threshold == 0.01 + assert vad.config.silence_threshold == 0.005 + + def test_custom_config(self) -> None: + """EnergyVad accepts custom configuration.""" + config = EnergyVadConfig(speech_threshold=0.02, min_speech_frames=5) + vad = EnergyVad(config=config) + + assert vad.config.speech_threshold == 0.02 + assert vad.config.min_speech_frames == 5 + + def test_initial_state_is_silence(self) -> None: + """EnergyVad starts in silence state.""" + vad = EnergyVad() + + assert vad._is_speech is False + + +class TestEnergyVadDetection: + """Tests for EnergyVad speech detection.""" + + def test_detects_silence_for_zeros(self) -> None: + """Silent audio detected as non-speech.""" + vad = EnergyVad() + audio = np.zeros(1600, dtype=np.float32) + + result = vad.process(audio) + + assert result is False + + def test_detects_speech_for_high_energy(self) -> None: + """High energy audio eventually detected as speech.""" + vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=2)) + # Audio with energy above threshold + audio = np.ones(1600, dtype=np.float32) * 0.1 + + vad.process(audio) + result = vad.process(audio) + + assert result is True + + def test_speech_requires_consecutive_frames(self) -> None: + """Speech detection requires min_speech_frames consecutive frames.""" + vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=3)) + audio = np.ones(1600, dtype=np.float32) * 0.1 + + assert vad.process(audio) is False + assert vad.process(audio) is False + assert vad.process(audio) is True + + def test_silence_after_speech_requires_frames(self) -> None: + """Transition to silence requires min_silence_frames.""" + config = EnergyVadConfig(min_speech_frames=1, min_silence_frames=2) + vad = EnergyVad(config=config) + speech = np.ones(1600, dtype=np.float32) * 0.1 + silence = np.zeros(1600, dtype=np.float32) + + vad.process(speech) + assert vad._is_speech is True + + vad.process(silence) + assert vad._is_speech is True + + vad.process(silence) + assert vad._is_speech is False + + def test_hysteresis_prevents_chatter(self) -> None: + """Hysteresis prevents rapid speech/silence toggling.""" + config = EnergyVadConfig( + speech_threshold=0.01, + silence_threshold=0.005, + min_speech_frames=1, + min_silence_frames=1, + ) + vad = EnergyVad(config=config) + + # Just above speech threshold -> speech + high = np.ones(1600, dtype=np.float32) * 0.015 + vad.process(high) + assert vad._is_speech is True + + # Between thresholds (below speech, above silence) -> stays speech + mid = np.ones(1600, dtype=np.float32) * 0.007 + vad.process(mid) + assert vad._is_speech is True + + # Below silence threshold -> silence + low = np.ones(1600, dtype=np.float32) * 0.003 + vad.process(low) + assert vad._is_speech is False + + +class TestEnergyVadReset: + """Tests for EnergyVad reset behavior.""" + + def test_reset_clears_state(self) -> None: + """Reset returns VAD to initial state.""" + vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=1)) + audio = np.ones(1600, dtype=np.float32) * 0.1 + vad.process(audio) + + vad.reset() + + assert vad._is_speech is False + assert vad._speech_frame_count == 0 + assert vad._silence_frame_count == 0 + + +class TestStreamingVad: + """Tests for StreamingVad wrapper.""" + + def test_default_engine_is_energy_vad(self) -> None: + """StreamingVad uses EnergyVad by default.""" + vad = StreamingVad() + + assert isinstance(vad.engine, EnergyVad) + + def test_process_chunk_delegates_to_engine(self) -> None: + """process_chunk delegates to underlying engine.""" + vad = StreamingVad() + silence = np.zeros(1600, dtype=np.float32) + + result = vad.process_chunk(silence) + + assert result is False + + def test_reset_delegates_to_engine(self) -> None: + """reset delegates to underlying engine.""" + vad = StreamingVad() + speech = np.ones(1600, dtype=np.float32) * 0.1 + + vad.process_chunk(speech) + vad.process_chunk(speech) + vad.reset() + + assert vad.engine._is_speech is False +```` + +## File: tests/infrastructure/audio/test_levels.py +````python +"""Tests for RmsLevelProvider and compute_rms.""" + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +from noteflow.infrastructure.audio import RmsLevelProvider, compute_rms + +if TYPE_CHECKING: + from numpy.typing import NDArray + + +class TestComputeRms: + """Tests for compute_rms function.""" + + def test_empty_array_returns_zero(self) -> None: + """RMS of empty array is zero.""" + frames = np.array([], dtype=np.float32) + assert compute_rms(frames) == 0.0 + + def test_zeros_returns_zero(self) -> None: + """RMS of zeros is zero.""" + frames = np.zeros(100, dtype=np.float32) + assert compute_rms(frames) == 0.0 + + def test_ones_returns_one(self) -> None: + """RMS of all ones is one.""" + frames = np.ones(100, dtype=np.float32) + assert compute_rms(frames) == 1.0 + + def test_half_amplitude_returns_half(self) -> None: + """RMS of constant 0.5 is 0.5.""" + frames = np.full(100, 0.5, dtype=np.float32) + assert compute_rms(frames) == 0.5 + + def test_sine_wave_returns_sqrt_half(self) -> None: + """RMS of sine wave is approximately 1/sqrt(2).""" + t = np.linspace(0, 2 * np.pi, 1000, dtype=np.float32) + frames = np.sin(t).astype(np.float32) + result = compute_rms(frames) + assert 0.7 < result < 0.72 # ~0.707 + + +class TestRmsLevelProvider: + """Tests for RmsLevelProvider class.""" + + @pytest.fixture + def provider(self) -> RmsLevelProvider: + """Create RmsLevelProvider instance.""" + return RmsLevelProvider() + + def test_get_rms_empty_array_returns_zero(self, provider: RmsLevelProvider) -> None: + """Test RMS of empty array is zero.""" + frames = np.array([], dtype=np.float32) + assert provider.get_rms(frames) == 0.0 + + def test_get_rms_silence_returns_zero( + self, provider: RmsLevelProvider, silence_audio: NDArray[np.float32] + ) -> None: + """Test RMS of silence is zero.""" + assert provider.get_rms(silence_audio) == 0.0 + + def test_get_rms_full_scale_returns_one( + self, provider: RmsLevelProvider, full_scale_audio: NDArray[np.float32] + ) -> None: + """Test RMS of full scale signal is one.""" + assert provider.get_rms(full_scale_audio) == 1.0 + + def test_get_rms_half_scale_returns_half( + self, provider: RmsLevelProvider, half_scale_audio: NDArray[np.float32] + ) -> None: + """Test RMS of half scale signal is 0.5.""" + assert provider.get_rms(half_scale_audio) == 0.5 + + def test_get_rms_normalized_range(self, provider: RmsLevelProvider) -> None: + """Test RMS is always in 0.0-1.0 range.""" + # Test with values > 1.0 (should clamp) + frames = np.full(100, 2.0, dtype=np.float32) + rms = provider.get_rms(frames) + assert 0.0 <= rms <= 1.0 + + def test_get_db_silence_returns_min_db( + self, provider: RmsLevelProvider, silence_audio: NDArray[np.float32] + ) -> None: + """Test dB of silence returns MIN_DB.""" + assert provider.get_db(silence_audio) == provider.MIN_DB + + def test_get_db_full_scale_returns_zero( + self, provider: RmsLevelProvider, full_scale_audio: NDArray[np.float32] + ) -> None: + """Test dB of full scale signal is 0 dB.""" + assert provider.get_db(full_scale_audio) == 0.0 + + def test_get_db_half_scale_is_negative_six( + self, provider: RmsLevelProvider, half_scale_audio: NDArray[np.float32] + ) -> None: + """Test dB of half scale is approximately -6 dB.""" + db = provider.get_db(half_scale_audio) + # -6.02 dB for half amplitude + assert -7.0 < db < -5.0 + + def test_rms_to_db_zero_returns_min_db(self, provider: RmsLevelProvider) -> None: + """Test rms_to_db(0) returns MIN_DB.""" + assert provider.rms_to_db(0.0) == provider.MIN_DB + + def test_rms_to_db_one_returns_zero(self, provider: RmsLevelProvider) -> None: + """Test rms_to_db(1.0) returns 0 dB.""" + assert provider.rms_to_db(1.0) == 0.0 + + def test_db_to_rms_min_db_returns_zero(self, provider: RmsLevelProvider) -> None: + """Test db_to_rms(MIN_DB) returns 0.""" + assert provider.db_to_rms(provider.MIN_DB) == 0.0 + + def test_db_to_rms_zero_returns_one(self, provider: RmsLevelProvider) -> None: + """Test db_to_rms(0) returns 1.0.""" + assert provider.db_to_rms(0.0) == 1.0 + + @pytest.mark.parametrize("rms", [0.1, 0.25, 0.5, 0.75, 1.0]) + def test_rms_db_roundtrip(self, provider: RmsLevelProvider, rms: float) -> None: + """Test RMS -> dB -> RMS roundtrip preserves value.""" + db = provider.rms_to_db(rms) + recovered = provider.db_to_rms(db) + assert math.isclose(recovered, rms, rel_tol=1e-9) +```` + +## File: tests/infrastructure/security/test_keystore.py +````python +"""Tests for KeyringKeyStore and InMemoryKeyStore.""" + +from __future__ import annotations + +import types +from typing import Any + +import pytest + +from noteflow.infrastructure.security import keystore + + +def _install_fake_keyring(monkeypatch: pytest.MonkeyPatch) -> dict[tuple[str, str], str]: + """Install a fake keyring backend backed by a dictionary.""" + storage: dict[tuple[str, str], str] = {} + + class DummyErrors: + class KeyringError(Exception): ... + + class PasswordDeleteError(KeyringError): ... + + def get_password(service: str, key: str) -> str | None: + return storage.get((service, key)) + + def set_password(service: str, key: str, value: str) -> None: + storage[(service, key)] = value + + def delete_password(service: str, key: str) -> None: + storage.pop((service, key), None) + + monkeypatch.setattr( + keystore, + "keyring", + types.SimpleNamespace( + get_password=get_password, + set_password=set_password, + delete_password=delete_password, + errors=DummyErrors, + ), + ) + return storage + + +def test_get_or_create_master_key_creates_and_reuses(monkeypatch: pytest.MonkeyPatch) -> None: + """Master key should be created once and then reused.""" + storage = _install_fake_keyring(monkeypatch) + ks = keystore.KeyringKeyStore(service_name="svc", key_name="key") + + first = ks.get_or_create_master_key() + second = ks.get_or_create_master_key() + + assert len(first) == keystore.KEY_SIZE + assert first == second + assert ("svc", "key") in storage + + +def test_get_or_create_master_key_wraps_keyring_errors(monkeypatch: pytest.MonkeyPatch) -> None: + """Keyring errors should surface as RuntimeError.""" + + class DummyErrors: + class KeyringError(Exception): ... + + def raise_error(*_: Any, **__: Any) -> None: + raise DummyErrors.KeyringError("unavailable") + + monkeypatch.setattr( + keystore, + "keyring", + types.SimpleNamespace( + get_password=raise_error, + set_password=raise_error, + errors=DummyErrors, + delete_password=raise_error, + ), + ) + + ks = keystore.KeyringKeyStore() + with pytest.raises(RuntimeError, match="Keyring unavailable"): + ks.get_or_create_master_key() + + +def test_delete_master_key_handles_missing(monkeypatch: pytest.MonkeyPatch) -> None: + """delete_master_key should swallow missing-key errors.""" + storage = _install_fake_keyring(monkeypatch) + + class DummyErrors: + class KeyringError(Exception): ... + + class PasswordDeleteError(KeyringError): ... + + # Reinstall with errors that raise on delete to exercise branch + def delete_password(*_: Any, **__: Any) -> None: + raise DummyErrors.PasswordDeleteError("not found") + + monkeypatch.setattr( + keystore, + "keyring", + types.SimpleNamespace( + get_password=lambda s, k: storage.get((s, k)), + set_password=lambda s, k, v: storage.setdefault((s, k), v), + delete_password=delete_password, + errors=DummyErrors, + ), + ) + + ks = keystore.KeyringKeyStore() + # Should not raise even when delete_password errors + ks.delete_master_key() + + +def test_has_master_key_false_on_errors(monkeypatch: pytest.MonkeyPatch) -> None: + """has_master_key should return False when keyring raises.""" + + class DummyErrors: + class KeyringError(Exception): ... + + def raise_error(*_: Any, **__: Any) -> None: + raise DummyErrors.KeyringError("oops") + + monkeypatch.setattr( + keystore, + "keyring", + types.SimpleNamespace( + get_password=raise_error, + errors=DummyErrors, + delete_password=lambda *a, **k: None, + set_password=lambda *a, **k: None, + ), + ) + + ks = keystore.KeyringKeyStore() + assert ks.has_master_key() is False +```` + +## File: tests/integration/test_trigger_settings.py +````python +"""Integration tests for trigger and retention settings loading.""" + +from __future__ import annotations + +import pytest + +from noteflow.config.settings import Settings, get_settings, get_trigger_settings + +pytestmark = pytest.mark.integration + + +@pytest.fixture(autouse=True) +def _clear_settings_cache() -> None: + get_trigger_settings.cache_clear() + get_settings.cache_clear() + + +def test_trigger_settings_env_parsing(monkeypatch: pytest.MonkeyPatch) -> None: + """TriggerSettings should parse CSV lists from environment variables.""" + monkeypatch.setenv("NOTEFLOW_TRIGGER_MEETING_APPS", "zoom, teams") + monkeypatch.setenv("NOTEFLOW_TRIGGER_SUPPRESSED_APPS", "spotify") + monkeypatch.setenv("NOTEFLOW_TRIGGER_AUDIO_MIN_SAMPLES", "5") + monkeypatch.setenv("NOTEFLOW_TRIGGER_POLL_INTERVAL_SECONDS", "1.5") + + settings = get_trigger_settings() + + assert settings.trigger_meeting_apps == ["zoom", "teams"] + assert settings.trigger_suppressed_apps == ["spotify"] + assert settings.trigger_audio_min_samples == 5 + assert settings.trigger_poll_interval_seconds == pytest.approx(1.5) + + +class TestRetentionSettings: + """Tests for retention settings.""" + + def test_retention_defaults(self) -> None: + """Retention settings should have correct defaults.""" + # Access via class to check field defaults without loading from env + assert Settings.model_fields["retention_enabled"].default is False + assert Settings.model_fields["retention_days"].default == 90 + assert Settings.model_fields["retention_check_interval_hours"].default == 24 + + def test_retention_env_parsing(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Retention settings should parse from environment variables.""" + monkeypatch.setenv("NOTEFLOW_DATABASE_URL", "postgresql+asyncpg://user:pass@localhost/db") + monkeypatch.setenv("NOTEFLOW_RETENTION_ENABLED", "true") + monkeypatch.setenv("NOTEFLOW_RETENTION_DAYS", "30") + monkeypatch.setenv("NOTEFLOW_RETENTION_CHECK_INTERVAL_HOURS", "12") + + settings = get_settings() + + assert settings.retention_enabled is True + assert settings.retention_days == 30 + assert settings.retention_check_interval_hours == 12 + + def test_retention_days_validation(self) -> None: + """Retention days should be validated within range.""" + from pydantic import ValidationError + + # ge=1, le=3650 + with pytest.raises(ValidationError): + Settings.model_validate( + {"database_url": "postgresql+asyncpg://x:x@x/x", "retention_days": 0} + ) + with pytest.raises(ValidationError): + Settings.model_validate( + {"database_url": "postgresql+asyncpg://x:x@x/x", "retention_days": 4000} + ) + + def test_retention_check_interval_validation(self) -> None: + """Retention check interval should be validated within range.""" + from pydantic import ValidationError + + # ge=1, le=168 + with pytest.raises(ValidationError): + Settings.model_validate( + { + "database_url": "postgresql+asyncpg://x:x@x/x", + "retention_check_interval_hours": 0, + } + ) + with pytest.raises(ValidationError): + Settings.model_validate( + { + "database_url": "postgresql+asyncpg://x:x@x/x", + "retention_check_interval_hours": 200, + } + ) +```` + +## File: src/noteflow/application/services/__init__.py +````python +"""Application services for NoteFlow use cases.""" + +from noteflow.application.services.export_service import ExportFormat, ExportService +from noteflow.application.services.meeting_service import MeetingService +from noteflow.application.services.recovery_service import RecoveryService +from noteflow.application.services.retention_service import RetentionReport, RetentionService +from noteflow.application.services.summarization_service import ( + SummarizationMode, + SummarizationService, + SummarizationServiceResult, + SummarizationServiceSettings, +) +from noteflow.application.services.trigger_service import TriggerService, TriggerServiceSettings + +__all__ = [ + "ExportFormat", + "ExportService", + "MeetingService", + "RecoveryService", + "RetentionReport", + "RetentionService", + "SummarizationMode", + "SummarizationService", + "SummarizationServiceResult", + "SummarizationServiceSettings", + "TriggerService", + "TriggerServiceSettings", +] +```` + +## File: src/noteflow/client/state.py +````python +"""Centralized application state for NoteFlow client. + +Composes existing types from grpc.client and infrastructure.audio. +Does not recreate any dataclasses - imports and uses existing ones. +""" + +from __future__ import annotations + +import logging +from collections.abc import Callable +from dataclasses import dataclass, field + +import flet as ft + +# REUSE existing types - do not recreate +from noteflow.domain.entities import Summary +from noteflow.domain.triggers import TriggerDecision +from noteflow.grpc.client import AnnotationInfo, MeetingInfo, ServerInfo, TranscriptSegment +from noteflow.infrastructure.audio import ( + RmsLevelProvider, + SoundDevicePlayback, + TimestampedAudio, +) + +logger = logging.getLogger(__name__) + +# Callback type aliases (follow NoteFlowClient pattern from grpc/client.py) +OnTranscriptCallback = Callable[[TranscriptSegment], None] +OnConnectionCallback = Callable[[bool, str], None] + + +@dataclass +class AppState: + """Centralized application state for NoteFlow client. + + Composes existing types from grpc.client and infrastructure.audio. + All state is centralized here for component access. + """ + + # Connection state + server_address: str = "localhost:50051" + connected: bool = False + server_info: ServerInfo | None = None # REUSE existing type + + # Recording state + recording: bool = False + current_meeting: MeetingInfo | None = None # REUSE existing type + recording_start_time: float | None = None + elapsed_seconds: int = 0 + + # Audio state (REUSE existing RmsLevelProvider) + level_provider: RmsLevelProvider = field(default_factory=RmsLevelProvider) + current_db_level: float = -60.0 + + # Transcript state (REUSE existing TranscriptSegment) + transcript_segments: list[TranscriptSegment] = field(default_factory=list) + current_partial_text: str = "" # Live partial transcript (not yet final) + + # Playback state (REUSE existing SoundDevicePlayback) + playback: SoundDevicePlayback = field(default_factory=SoundDevicePlayback) + playback_position: float = 0.0 + session_audio_buffer: list[TimestampedAudio] = field(default_factory=list) + + # Transcript sync state + highlighted_segment_index: int | None = None + + # Annotations state (REUSE existing AnnotationInfo) + annotations: list[AnnotationInfo] = field(default_factory=list) + + # Meeting library state (REUSE existing MeetingInfo) + meetings: list[MeetingInfo] = field(default_factory=list) + selected_meeting: MeetingInfo | None = None + + # Trigger state (REUSE existing TriggerDecision) + trigger_enabled: bool = True + trigger_pending: bool = False # True when prompt is shown + trigger_decision: TriggerDecision | None = None # Last trigger decision + + # Summary state (REUSE existing Summary entity) + current_summary: Summary | None = None + summary_loading: bool = False + summary_error: str | None = None + + # UI page reference (private) + _page: ft.Page | None = field(default=None, repr=False) + + def set_page(self, page: ft.Page) -> None: + """Set page reference for thread-safe updates. + + Args: + page: Flet page instance. + """ + self._page = page + + def request_update(self) -> None: + """Request UI update from any thread. + + Safe to call from background threads. + """ + if self._page: + self._page.update() + + def run_on_ui_thread(self, callback: Callable[[], None]) -> None: + """Schedule callback on the UI event loop safely. + + Follows NoteFlowClient callback pattern with error handling. + + Args: + callback: Function to execute on the UI event loop. + """ + if not self._page: + return + + try: + if hasattr(self._page, "run_task"): + + async def _run() -> None: + callback() + + self._page.run_task(_run) + else: + self._page.run_thread(callback) + except Exception as e: + logger.error("UI thread callback error: %s", e) + + def clear_transcript(self) -> None: + """Clear all transcript segments and partial text.""" + self.transcript_segments.clear() + self.current_partial_text = "" + + def reset_recording_state(self) -> None: + """Reset recording-related state.""" + self.recording = False + self.current_meeting = None + self.recording_start_time = None + self.elapsed_seconds = 0 + + def clear_session_audio(self) -> None: + """Clear session audio buffer and reset playback state.""" + self.session_audio_buffer.clear() + self.playback_position = 0.0 + + def find_segment_at_position(self, position: float) -> int | None: + """Find segment index containing the given position using binary search. + + Args: + position: Time in seconds. + + Returns: + Index of segment containing position, or None if not found. + """ + segments = self.transcript_segments + if not segments: + return None + + left, right = 0, len(segments) - 1 + + while left <= right: + mid = (left + right) // 2 + segment = segments[mid] + + if segment.start_time <= position <= segment.end_time: + return mid + if position < segment.start_time: + right = mid - 1 + else: + left = mid + 1 + + return None +```` + +## File: src/noteflow/grpc/proto/noteflow_pb2_grpc.py +````python +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" + +import grpc +import warnings + +import noteflow_pb2 as noteflow__pb2 + +GRPC_VERSION = grpc.__version__ +_version_not_supported = False + +GRPC_GENERATED_VERSION = '1.76.0' +try: + from grpc._utilities import first_version_is_lower + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + raise RuntimeError( + f'The grpc package installed is at version {GRPC_VERSION}, but the generated code in noteflow_pb2_grpc.py depends on' + + f' grpcio>={GRPC_GENERATED_VERSION}.' + + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' + ) + + +class NoteFlowServiceStub(object): + """============================================================================= + Core Service + ============================================================================= + + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.StreamTranscription = channel.stream_stream( + '/noteflow.NoteFlowService/StreamTranscription', + request_serializer=noteflow__pb2.AudioChunk.SerializeToString, + response_deserializer=noteflow__pb2.TranscriptUpdate.FromString, + _registered_method=True) + self.CreateMeeting = channel.unary_unary( + '/noteflow.NoteFlowService/CreateMeeting', + request_serializer=noteflow__pb2.CreateMeetingRequest.SerializeToString, + response_deserializer=noteflow__pb2.Meeting.FromString, + _registered_method=True) + self.StopMeeting = channel.unary_unary( + '/noteflow.NoteFlowService/StopMeeting', + request_serializer=noteflow__pb2.StopMeetingRequest.SerializeToString, + response_deserializer=noteflow__pb2.Meeting.FromString, + _registered_method=True) + self.ListMeetings = channel.unary_unary( + '/noteflow.NoteFlowService/ListMeetings', + request_serializer=noteflow__pb2.ListMeetingsRequest.SerializeToString, + response_deserializer=noteflow__pb2.ListMeetingsResponse.FromString, + _registered_method=True) + self.GetMeeting = channel.unary_unary( + '/noteflow.NoteFlowService/GetMeeting', + request_serializer=noteflow__pb2.GetMeetingRequest.SerializeToString, + response_deserializer=noteflow__pb2.Meeting.FromString, + _registered_method=True) + self.DeleteMeeting = channel.unary_unary( + '/noteflow.NoteFlowService/DeleteMeeting', + request_serializer=noteflow__pb2.DeleteMeetingRequest.SerializeToString, + response_deserializer=noteflow__pb2.DeleteMeetingResponse.FromString, + _registered_method=True) + self.GenerateSummary = channel.unary_unary( + '/noteflow.NoteFlowService/GenerateSummary', + request_serializer=noteflow__pb2.GenerateSummaryRequest.SerializeToString, + response_deserializer=noteflow__pb2.Summary.FromString, + _registered_method=True) + self.AddAnnotation = channel.unary_unary( + '/noteflow.NoteFlowService/AddAnnotation', + request_serializer=noteflow__pb2.AddAnnotationRequest.SerializeToString, + response_deserializer=noteflow__pb2.Annotation.FromString, + _registered_method=True) + self.GetAnnotation = channel.unary_unary( + '/noteflow.NoteFlowService/GetAnnotation', + request_serializer=noteflow__pb2.GetAnnotationRequest.SerializeToString, + response_deserializer=noteflow__pb2.Annotation.FromString, + _registered_method=True) + self.ListAnnotations = channel.unary_unary( + '/noteflow.NoteFlowService/ListAnnotations', + request_serializer=noteflow__pb2.ListAnnotationsRequest.SerializeToString, + response_deserializer=noteflow__pb2.ListAnnotationsResponse.FromString, + _registered_method=True) + self.UpdateAnnotation = channel.unary_unary( + '/noteflow.NoteFlowService/UpdateAnnotation', + request_serializer=noteflow__pb2.UpdateAnnotationRequest.SerializeToString, + response_deserializer=noteflow__pb2.Annotation.FromString, + _registered_method=True) + self.DeleteAnnotation = channel.unary_unary( + '/noteflow.NoteFlowService/DeleteAnnotation', + request_serializer=noteflow__pb2.DeleteAnnotationRequest.SerializeToString, + response_deserializer=noteflow__pb2.DeleteAnnotationResponse.FromString, + _registered_method=True) + self.ExportTranscript = channel.unary_unary( + '/noteflow.NoteFlowService/ExportTranscript', + request_serializer=noteflow__pb2.ExportTranscriptRequest.SerializeToString, + response_deserializer=noteflow__pb2.ExportTranscriptResponse.FromString, + _registered_method=True) + self.RefineSpeakerDiarization = channel.unary_unary( + '/noteflow.NoteFlowService/RefineSpeakerDiarization', + request_serializer=noteflow__pb2.RefineSpeakerDiarizationRequest.SerializeToString, + response_deserializer=noteflow__pb2.RefineSpeakerDiarizationResponse.FromString, + _registered_method=True) + self.RenameSpeaker = channel.unary_unary( + '/noteflow.NoteFlowService/RenameSpeaker', + request_serializer=noteflow__pb2.RenameSpeakerRequest.SerializeToString, + response_deserializer=noteflow__pb2.RenameSpeakerResponse.FromString, + _registered_method=True) + self.GetDiarizationJobStatus = channel.unary_unary( + '/noteflow.NoteFlowService/GetDiarizationJobStatus', + request_serializer=noteflow__pb2.GetDiarizationJobStatusRequest.SerializeToString, + response_deserializer=noteflow__pb2.DiarizationJobStatus.FromString, + _registered_method=True) + self.GetServerInfo = channel.unary_unary( + '/noteflow.NoteFlowService/GetServerInfo', + request_serializer=noteflow__pb2.ServerInfoRequest.SerializeToString, + response_deserializer=noteflow__pb2.ServerInfo.FromString, + _registered_method=True) + + +class NoteFlowServiceServicer(object): + """============================================================================= + Core Service + ============================================================================= + + """ + + def StreamTranscription(self, request_iterator, context): + """Bidirectional streaming: client sends audio chunks, server returns transcripts + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def CreateMeeting(self, request, context): + """Meeting lifecycle management + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def StopMeeting(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ListMeetings(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetMeeting(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def DeleteMeeting(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GenerateSummary(self, request, context): + """Summary generation + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def AddAnnotation(self, request, context): + """Annotation management + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetAnnotation(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ListAnnotations(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def UpdateAnnotation(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def DeleteAnnotation(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ExportTranscript(self, request, context): + """Export functionality + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def RefineSpeakerDiarization(self, request, context): + """Speaker diarization + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def RenameSpeaker(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetDiarizationJobStatus(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetServerInfo(self, request, context): + """Server health and capabilities + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_NoteFlowServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + 'StreamTranscription': grpc.stream_stream_rpc_method_handler( + servicer.StreamTranscription, + request_deserializer=noteflow__pb2.AudioChunk.FromString, + response_serializer=noteflow__pb2.TranscriptUpdate.SerializeToString, + ), + 'CreateMeeting': grpc.unary_unary_rpc_method_handler( + servicer.CreateMeeting, + request_deserializer=noteflow__pb2.CreateMeetingRequest.FromString, + response_serializer=noteflow__pb2.Meeting.SerializeToString, + ), + 'StopMeeting': grpc.unary_unary_rpc_method_handler( + servicer.StopMeeting, + request_deserializer=noteflow__pb2.StopMeetingRequest.FromString, + response_serializer=noteflow__pb2.Meeting.SerializeToString, + ), + 'ListMeetings': grpc.unary_unary_rpc_method_handler( + servicer.ListMeetings, + request_deserializer=noteflow__pb2.ListMeetingsRequest.FromString, + response_serializer=noteflow__pb2.ListMeetingsResponse.SerializeToString, + ), + 'GetMeeting': grpc.unary_unary_rpc_method_handler( + servicer.GetMeeting, + request_deserializer=noteflow__pb2.GetMeetingRequest.FromString, + response_serializer=noteflow__pb2.Meeting.SerializeToString, + ), + 'DeleteMeeting': grpc.unary_unary_rpc_method_handler( + servicer.DeleteMeeting, + request_deserializer=noteflow__pb2.DeleteMeetingRequest.FromString, + response_serializer=noteflow__pb2.DeleteMeetingResponse.SerializeToString, + ), + 'GenerateSummary': grpc.unary_unary_rpc_method_handler( + servicer.GenerateSummary, + request_deserializer=noteflow__pb2.GenerateSummaryRequest.FromString, + response_serializer=noteflow__pb2.Summary.SerializeToString, + ), + 'AddAnnotation': grpc.unary_unary_rpc_method_handler( + servicer.AddAnnotation, + request_deserializer=noteflow__pb2.AddAnnotationRequest.FromString, + response_serializer=noteflow__pb2.Annotation.SerializeToString, + ), + 'GetAnnotation': grpc.unary_unary_rpc_method_handler( + servicer.GetAnnotation, + request_deserializer=noteflow__pb2.GetAnnotationRequest.FromString, + response_serializer=noteflow__pb2.Annotation.SerializeToString, + ), + 'ListAnnotations': grpc.unary_unary_rpc_method_handler( + servicer.ListAnnotations, + request_deserializer=noteflow__pb2.ListAnnotationsRequest.FromString, + response_serializer=noteflow__pb2.ListAnnotationsResponse.SerializeToString, + ), + 'UpdateAnnotation': grpc.unary_unary_rpc_method_handler( + servicer.UpdateAnnotation, + request_deserializer=noteflow__pb2.UpdateAnnotationRequest.FromString, + response_serializer=noteflow__pb2.Annotation.SerializeToString, + ), + 'DeleteAnnotation': grpc.unary_unary_rpc_method_handler( + servicer.DeleteAnnotation, + request_deserializer=noteflow__pb2.DeleteAnnotationRequest.FromString, + response_serializer=noteflow__pb2.DeleteAnnotationResponse.SerializeToString, + ), + 'ExportTranscript': grpc.unary_unary_rpc_method_handler( + servicer.ExportTranscript, + request_deserializer=noteflow__pb2.ExportTranscriptRequest.FromString, + response_serializer=noteflow__pb2.ExportTranscriptResponse.SerializeToString, + ), + 'RefineSpeakerDiarization': grpc.unary_unary_rpc_method_handler( + servicer.RefineSpeakerDiarization, + request_deserializer=noteflow__pb2.RefineSpeakerDiarizationRequest.FromString, + response_serializer=noteflow__pb2.RefineSpeakerDiarizationResponse.SerializeToString, + ), + 'RenameSpeaker': grpc.unary_unary_rpc_method_handler( + servicer.RenameSpeaker, + request_deserializer=noteflow__pb2.RenameSpeakerRequest.FromString, + response_serializer=noteflow__pb2.RenameSpeakerResponse.SerializeToString, + ), + 'GetDiarizationJobStatus': grpc.unary_unary_rpc_method_handler( + servicer.GetDiarizationJobStatus, + request_deserializer=noteflow__pb2.GetDiarizationJobStatusRequest.FromString, + response_serializer=noteflow__pb2.DiarizationJobStatus.SerializeToString, + ), + 'GetServerInfo': grpc.unary_unary_rpc_method_handler( + servicer.GetServerInfo, + request_deserializer=noteflow__pb2.ServerInfoRequest.FromString, + response_serializer=noteflow__pb2.ServerInfo.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'noteflow.NoteFlowService', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + server.add_registered_method_handlers('noteflow.NoteFlowService', rpc_method_handlers) + + + # This class is part of an EXPERIMENTAL API. +class NoteFlowService(object): + """============================================================================= + Core Service + ============================================================================= + + """ + + @staticmethod + def StreamTranscription(request_iterator, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.stream_stream( + request_iterator, + target, + '/noteflow.NoteFlowService/StreamTranscription', + noteflow__pb2.AudioChunk.SerializeToString, + noteflow__pb2.TranscriptUpdate.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def CreateMeeting(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/CreateMeeting', + noteflow__pb2.CreateMeetingRequest.SerializeToString, + noteflow__pb2.Meeting.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def StopMeeting(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/StopMeeting', + noteflow__pb2.StopMeetingRequest.SerializeToString, + noteflow__pb2.Meeting.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def ListMeetings(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/ListMeetings', + noteflow__pb2.ListMeetingsRequest.SerializeToString, + noteflow__pb2.ListMeetingsResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def GetMeeting(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/GetMeeting', + noteflow__pb2.GetMeetingRequest.SerializeToString, + noteflow__pb2.Meeting.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def DeleteMeeting(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/DeleteMeeting', + noteflow__pb2.DeleteMeetingRequest.SerializeToString, + noteflow__pb2.DeleteMeetingResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def GenerateSummary(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/GenerateSummary', + noteflow__pb2.GenerateSummaryRequest.SerializeToString, + noteflow__pb2.Summary.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def AddAnnotation(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/AddAnnotation', + noteflow__pb2.AddAnnotationRequest.SerializeToString, + noteflow__pb2.Annotation.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def GetAnnotation(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/GetAnnotation', + noteflow__pb2.GetAnnotationRequest.SerializeToString, + noteflow__pb2.Annotation.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def ListAnnotations(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/ListAnnotations', + noteflow__pb2.ListAnnotationsRequest.SerializeToString, + noteflow__pb2.ListAnnotationsResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def UpdateAnnotation(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/UpdateAnnotation', + noteflow__pb2.UpdateAnnotationRequest.SerializeToString, + noteflow__pb2.Annotation.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def DeleteAnnotation(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/DeleteAnnotation', + noteflow__pb2.DeleteAnnotationRequest.SerializeToString, + noteflow__pb2.DeleteAnnotationResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def ExportTranscript(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/ExportTranscript', + noteflow__pb2.ExportTranscriptRequest.SerializeToString, + noteflow__pb2.ExportTranscriptResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def RefineSpeakerDiarization(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/RefineSpeakerDiarization', + noteflow__pb2.RefineSpeakerDiarizationRequest.SerializeToString, + noteflow__pb2.RefineSpeakerDiarizationResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def RenameSpeaker(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/RenameSpeaker', + noteflow__pb2.RenameSpeakerRequest.SerializeToString, + noteflow__pb2.RenameSpeakerResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def GetDiarizationJobStatus(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/GetDiarizationJobStatus', + noteflow__pb2.GetDiarizationJobStatusRequest.SerializeToString, + noteflow__pb2.DiarizationJobStatus.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def GetServerInfo(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/GetServerInfo', + noteflow__pb2.ServerInfoRequest.SerializeToString, + noteflow__pb2.ServerInfo.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) +```` + +## File: src/noteflow/grpc/proto/noteflow_pb2.py +````python +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: noteflow.proto +# Protobuf Python Version: 6.31.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 6, + 31, + 1, + '', + 'noteflow.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0enoteflow.proto\x12\x08noteflow\"n\n\nAudioChunk\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\naudio_data\x18\x02 \x01(\x0c\x12\x11\n\ttimestamp\x18\x03 \x01(\x01\x12\x13\n\x0bsample_rate\x18\x04 \x01(\x05\x12\x10\n\x08\x63hannels\x18\x05 \x01(\x05\"\xaa\x01\n\x10TranscriptUpdate\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12)\n\x0bupdate_type\x18\x02 \x01(\x0e\x32\x14.noteflow.UpdateType\x12\x14\n\x0cpartial_text\x18\x03 \x01(\t\x12\'\n\x07segment\x18\x04 \x01(\x0b\x32\x16.noteflow.FinalSegment\x12\x18\n\x10server_timestamp\x18\x05 \x01(\x01\"\x87\x02\n\x0c\x46inalSegment\x12\x12\n\nsegment_id\x18\x01 \x01(\x05\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\x12#\n\x05words\x18\x05 \x03(\x0b\x32\x14.noteflow.WordTiming\x12\x10\n\x08language\x18\x06 \x01(\t\x12\x1b\n\x13language_confidence\x18\x07 \x01(\x02\x12\x13\n\x0b\x61vg_logprob\x18\x08 \x01(\x02\x12\x16\n\x0eno_speech_prob\x18\t \x01(\x02\x12\x12\n\nspeaker_id\x18\n \x01(\t\x12\x1a\n\x12speaker_confidence\x18\x0b \x01(\x02\"U\n\nWordTiming\x12\x0c\n\x04word\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\x12\x13\n\x0bprobability\x18\x04 \x01(\x02\"\xd1\x02\n\x07Meeting\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12%\n\x05state\x18\x03 \x01(\x0e\x32\x16.noteflow.MeetingState\x12\x12\n\ncreated_at\x18\x04 \x01(\x01\x12\x12\n\nstarted_at\x18\x05 \x01(\x01\x12\x10\n\x08\x65nded_at\x18\x06 \x01(\x01\x12\x18\n\x10\x64uration_seconds\x18\x07 \x01(\x01\x12(\n\x08segments\x18\x08 \x03(\x0b\x32\x16.noteflow.FinalSegment\x12\"\n\x07summary\x18\t \x01(\x0b\x32\x11.noteflow.Summary\x12\x31\n\x08metadata\x18\n \x03(\x0b\x32\x1f.noteflow.Meeting.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x96\x01\n\x14\x43reateMeetingRequest\x12\r\n\x05title\x18\x01 \x01(\t\x12>\n\x08metadata\x18\x02 \x03(\x0b\x32,.noteflow.CreateMeetingRequest.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"(\n\x12StopMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"\x85\x01\n\x13ListMeetingsRequest\x12&\n\x06states\x18\x01 \x03(\x0e\x32\x16.noteflow.MeetingState\x12\r\n\x05limit\x18\x02 \x01(\x05\x12\x0e\n\x06offset\x18\x03 \x01(\x05\x12\'\n\nsort_order\x18\x04 \x01(\x0e\x32\x13.noteflow.SortOrder\"P\n\x14ListMeetingsResponse\x12#\n\x08meetings\x18\x01 \x03(\x0b\x32\x11.noteflow.Meeting\x12\x13\n\x0btotal_count\x18\x02 \x01(\x05\"Z\n\x11GetMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10include_segments\x18\x02 \x01(\x08\x12\x17\n\x0finclude_summary\x18\x03 \x01(\x08\"*\n\x14\x44\x65leteMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"(\n\x15\x44\x65leteMeetingResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\xb9\x01\n\x07Summary\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x19\n\x11\x65xecutive_summary\x18\x02 \x01(\t\x12&\n\nkey_points\x18\x03 \x03(\x0b\x32\x12.noteflow.KeyPoint\x12*\n\x0c\x61\x63tion_items\x18\x04 \x03(\x0b\x32\x14.noteflow.ActionItem\x12\x14\n\x0cgenerated_at\x18\x05 \x01(\x01\x12\x15\n\rmodel_version\x18\x06 \x01(\t\"S\n\x08KeyPoint\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x13\n\x0bsegment_ids\x18\x02 \x03(\x05\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\"y\n\nActionItem\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x10\n\x08\x61ssignee\x18\x02 \x01(\t\x12\x10\n\x08\x64ue_date\x18\x03 \x01(\x01\x12$\n\x08priority\x18\x04 \x01(\x0e\x32\x12.noteflow.Priority\x12\x13\n\x0bsegment_ids\x18\x05 \x03(\x05\"F\n\x16GenerateSummaryRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10\x66orce_regenerate\x18\x02 \x01(\x08\"\x13\n\x11ServerInfoRequest\"\xe4\x01\n\nServerInfo\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\x11\n\tasr_model\x18\x02 \x01(\t\x12\x11\n\tasr_ready\x18\x03 \x01(\x08\x12\x1e\n\x16supported_sample_rates\x18\x04 \x03(\x05\x12\x16\n\x0emax_chunk_size\x18\x05 \x01(\x05\x12\x16\n\x0euptime_seconds\x18\x06 \x01(\x01\x12\x17\n\x0f\x61\x63tive_meetings\x18\x07 \x01(\x05\x12\x1b\n\x13\x64iarization_enabled\x18\x08 \x01(\x08\x12\x19\n\x11\x64iarization_ready\x18\t \x01(\x08\"\xbc\x01\n\nAnnotation\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\nmeeting_id\x18\x02 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x03 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x12\n\nstart_time\x18\x05 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x06 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x07 \x03(\x05\x12\x12\n\ncreated_at\x18\x08 \x01(\x01\"\xa6\x01\n\x14\x41\x64\x64\x41nnotationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"-\n\x14GetAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"R\n\x16ListAnnotationsRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\"D\n\x17ListAnnotationsResponse\x12)\n\x0b\x61nnotations\x18\x01 \x03(\x0b\x32\x14.noteflow.Annotation\"\xac\x01\n\x17UpdateAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"0\n\x17\x44\x65leteAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"+\n\x18\x44\x65leteAnnotationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"U\n\x17\x45xportTranscriptRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12&\n\x06\x66ormat\x18\x02 \x01(\x0e\x32\x16.noteflow.ExportFormat\"X\n\x18\x45xportTranscriptResponse\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x13\n\x0b\x66ormat_name\x18\x02 \x01(\t\x12\x16\n\x0e\x66ile_extension\x18\x03 \x01(\t\"K\n\x1fRefineSpeakerDiarizationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x14\n\x0cnum_speakers\x18\x02 \x01(\x05\"\x9d\x01\n RefineSpeakerDiarizationResponse\x12\x18\n\x10segments_updated\x18\x01 \x01(\x05\x12\x13\n\x0bspeaker_ids\x18\x02 \x03(\t\x12\x15\n\rerror_message\x18\x03 \x01(\t\x12\x0e\n\x06job_id\x18\x04 \x01(\t\x12#\n\x06status\x18\x05 \x01(\x0e\x32\x13.noteflow.JobStatus\"\\\n\x14RenameSpeakerRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x16\n\x0eold_speaker_id\x18\x02 \x01(\t\x12\x18\n\x10new_speaker_name\x18\x03 \x01(\t\"B\n\x15RenameSpeakerResponse\x12\x18\n\x10segments_updated\x18\x01 \x01(\x05\x12\x0f\n\x07success\x18\x02 \x01(\x08\"0\n\x1eGetDiarizationJobStatusRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x91\x01\n\x14\x44iarizationJobStatus\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12#\n\x06status\x18\x02 \x01(\x0e\x32\x13.noteflow.JobStatus\x12\x18\n\x10segments_updated\x18\x03 \x01(\x05\x12\x13\n\x0bspeaker_ids\x18\x04 \x03(\t\x12\x15\n\rerror_message\x18\x05 \x01(\t*\x8d\x01\n\nUpdateType\x12\x1b\n\x17UPDATE_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13UPDATE_TYPE_PARTIAL\x10\x01\x12\x15\n\x11UPDATE_TYPE_FINAL\x10\x02\x12\x19\n\x15UPDATE_TYPE_VAD_START\x10\x03\x12\x17\n\x13UPDATE_TYPE_VAD_END\x10\x04*\xb6\x01\n\x0cMeetingState\x12\x1d\n\x19MEETING_STATE_UNSPECIFIED\x10\x00\x12\x19\n\x15MEETING_STATE_CREATED\x10\x01\x12\x1b\n\x17MEETING_STATE_RECORDING\x10\x02\x12\x19\n\x15MEETING_STATE_STOPPED\x10\x03\x12\x1b\n\x17MEETING_STATE_COMPLETED\x10\x04\x12\x17\n\x13MEETING_STATE_ERROR\x10\x05*`\n\tSortOrder\x12\x1a\n\x16SORT_ORDER_UNSPECIFIED\x10\x00\x12\x1b\n\x17SORT_ORDER_CREATED_DESC\x10\x01\x12\x1a\n\x16SORT_ORDER_CREATED_ASC\x10\x02*^\n\x08Priority\x12\x18\n\x14PRIORITY_UNSPECIFIED\x10\x00\x12\x10\n\x0cPRIORITY_LOW\x10\x01\x12\x13\n\x0fPRIORITY_MEDIUM\x10\x02\x12\x11\n\rPRIORITY_HIGH\x10\x03*\xa4\x01\n\x0e\x41nnotationType\x12\x1f\n\x1b\x41NNOTATION_TYPE_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x41NNOTATION_TYPE_ACTION_ITEM\x10\x01\x12\x1c\n\x18\x41NNOTATION_TYPE_DECISION\x10\x02\x12\x18\n\x14\x41NNOTATION_TYPE_NOTE\x10\x03\x12\x18\n\x14\x41NNOTATION_TYPE_RISK\x10\x04*a\n\x0c\x45xportFormat\x12\x1d\n\x19\x45XPORT_FORMAT_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x45XPORT_FORMAT_MARKDOWN\x10\x01\x12\x16\n\x12\x45XPORT_FORMAT_HTML\x10\x02*\x87\x01\n\tJobStatus\x12\x1a\n\x16JOB_STATUS_UNSPECIFIED\x10\x00\x12\x15\n\x11JOB_STATUS_QUEUED\x10\x01\x12\x16\n\x12JOB_STATUS_RUNNING\x10\x02\x12\x18\n\x14JOB_STATUS_COMPLETED\x10\x03\x12\x15\n\x11JOB_STATUS_FAILED\x10\x04\x32\xe0\n\n\x0fNoteFlowService\x12K\n\x13StreamTranscription\x12\x14.noteflow.AudioChunk\x1a\x1a.noteflow.TranscriptUpdate(\x01\x30\x01\x12\x42\n\rCreateMeeting\x12\x1e.noteflow.CreateMeetingRequest\x1a\x11.noteflow.Meeting\x12>\n\x0bStopMeeting\x12\x1c.noteflow.StopMeetingRequest\x1a\x11.noteflow.Meeting\x12M\n\x0cListMeetings\x12\x1d.noteflow.ListMeetingsRequest\x1a\x1e.noteflow.ListMeetingsResponse\x12<\n\nGetMeeting\x12\x1b.noteflow.GetMeetingRequest\x1a\x11.noteflow.Meeting\x12P\n\rDeleteMeeting\x12\x1e.noteflow.DeleteMeetingRequest\x1a\x1f.noteflow.DeleteMeetingResponse\x12\x46\n\x0fGenerateSummary\x12 .noteflow.GenerateSummaryRequest\x1a\x11.noteflow.Summary\x12\x45\n\rAddAnnotation\x12\x1e.noteflow.AddAnnotationRequest\x1a\x14.noteflow.Annotation\x12\x45\n\rGetAnnotation\x12\x1e.noteflow.GetAnnotationRequest\x1a\x14.noteflow.Annotation\x12V\n\x0fListAnnotations\x12 .noteflow.ListAnnotationsRequest\x1a!.noteflow.ListAnnotationsResponse\x12K\n\x10UpdateAnnotation\x12!.noteflow.UpdateAnnotationRequest\x1a\x14.noteflow.Annotation\x12Y\n\x10\x44\x65leteAnnotation\x12!.noteflow.DeleteAnnotationRequest\x1a\".noteflow.DeleteAnnotationResponse\x12Y\n\x10\x45xportTranscript\x12!.noteflow.ExportTranscriptRequest\x1a\".noteflow.ExportTranscriptResponse\x12q\n\x18RefineSpeakerDiarization\x12).noteflow.RefineSpeakerDiarizationRequest\x1a*.noteflow.RefineSpeakerDiarizationResponse\x12P\n\rRenameSpeaker\x12\x1e.noteflow.RenameSpeakerRequest\x1a\x1f.noteflow.RenameSpeakerResponse\x12\x63\n\x17GetDiarizationJobStatus\x12(.noteflow.GetDiarizationJobStatusRequest\x1a\x1e.noteflow.DiarizationJobStatus\x12\x42\n\rGetServerInfo\x12\x1b.noteflow.ServerInfoRequest\x1a\x14.noteflow.ServerInfob\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'noteflow_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + DESCRIPTOR._loaded_options = None + _globals['_MEETING_METADATAENTRY']._loaded_options = None + _globals['_MEETING_METADATAENTRY']._serialized_options = b'8\001' + _globals['_CREATEMEETINGREQUEST_METADATAENTRY']._loaded_options = None + _globals['_CREATEMEETINGREQUEST_METADATAENTRY']._serialized_options = b'8\001' + _globals['_UPDATETYPE']._serialized_start=3923 + _globals['_UPDATETYPE']._serialized_end=4064 + _globals['_MEETINGSTATE']._serialized_start=4067 + _globals['_MEETINGSTATE']._serialized_end=4249 + _globals['_SORTORDER']._serialized_start=4251 + _globals['_SORTORDER']._serialized_end=4347 + _globals['_PRIORITY']._serialized_start=4349 + _globals['_PRIORITY']._serialized_end=4443 + _globals['_ANNOTATIONTYPE']._serialized_start=4446 + _globals['_ANNOTATIONTYPE']._serialized_end=4610 + _globals['_EXPORTFORMAT']._serialized_start=4612 + _globals['_EXPORTFORMAT']._serialized_end=4709 + _globals['_JOBSTATUS']._serialized_start=4712 + _globals['_JOBSTATUS']._serialized_end=4847 + _globals['_AUDIOCHUNK']._serialized_start=28 + _globals['_AUDIOCHUNK']._serialized_end=138 + _globals['_TRANSCRIPTUPDATE']._serialized_start=141 + _globals['_TRANSCRIPTUPDATE']._serialized_end=311 + _globals['_FINALSEGMENT']._serialized_start=314 + _globals['_FINALSEGMENT']._serialized_end=577 + _globals['_WORDTIMING']._serialized_start=579 + _globals['_WORDTIMING']._serialized_end=664 + _globals['_MEETING']._serialized_start=667 + _globals['_MEETING']._serialized_end=1004 + _globals['_MEETING_METADATAENTRY']._serialized_start=957 + _globals['_MEETING_METADATAENTRY']._serialized_end=1004 + _globals['_CREATEMEETINGREQUEST']._serialized_start=1007 + _globals['_CREATEMEETINGREQUEST']._serialized_end=1157 + _globals['_CREATEMEETINGREQUEST_METADATAENTRY']._serialized_start=957 + _globals['_CREATEMEETINGREQUEST_METADATAENTRY']._serialized_end=1004 + _globals['_STOPMEETINGREQUEST']._serialized_start=1159 + _globals['_STOPMEETINGREQUEST']._serialized_end=1199 + _globals['_LISTMEETINGSREQUEST']._serialized_start=1202 + _globals['_LISTMEETINGSREQUEST']._serialized_end=1335 + _globals['_LISTMEETINGSRESPONSE']._serialized_start=1337 + _globals['_LISTMEETINGSRESPONSE']._serialized_end=1417 + _globals['_GETMEETINGREQUEST']._serialized_start=1419 + _globals['_GETMEETINGREQUEST']._serialized_end=1509 + _globals['_DELETEMEETINGREQUEST']._serialized_start=1511 + _globals['_DELETEMEETINGREQUEST']._serialized_end=1553 + _globals['_DELETEMEETINGRESPONSE']._serialized_start=1555 + _globals['_DELETEMEETINGRESPONSE']._serialized_end=1595 + _globals['_SUMMARY']._serialized_start=1598 + _globals['_SUMMARY']._serialized_end=1783 + _globals['_KEYPOINT']._serialized_start=1785 + _globals['_KEYPOINT']._serialized_end=1868 + _globals['_ACTIONITEM']._serialized_start=1870 + _globals['_ACTIONITEM']._serialized_end=1991 + _globals['_GENERATESUMMARYREQUEST']._serialized_start=1993 + _globals['_GENERATESUMMARYREQUEST']._serialized_end=2063 + _globals['_SERVERINFOREQUEST']._serialized_start=2065 + _globals['_SERVERINFOREQUEST']._serialized_end=2084 + _globals['_SERVERINFO']._serialized_start=2087 + _globals['_SERVERINFO']._serialized_end=2315 + _globals['_ANNOTATION']._serialized_start=2318 + _globals['_ANNOTATION']._serialized_end=2506 + _globals['_ADDANNOTATIONREQUEST']._serialized_start=2509 + _globals['_ADDANNOTATIONREQUEST']._serialized_end=2675 + _globals['_GETANNOTATIONREQUEST']._serialized_start=2677 + _globals['_GETANNOTATIONREQUEST']._serialized_end=2722 + _globals['_LISTANNOTATIONSREQUEST']._serialized_start=2724 + _globals['_LISTANNOTATIONSREQUEST']._serialized_end=2806 + _globals['_LISTANNOTATIONSRESPONSE']._serialized_start=2808 + _globals['_LISTANNOTATIONSRESPONSE']._serialized_end=2876 + _globals['_UPDATEANNOTATIONREQUEST']._serialized_start=2879 + _globals['_UPDATEANNOTATIONREQUEST']._serialized_end=3051 + _globals['_DELETEANNOTATIONREQUEST']._serialized_start=3053 + _globals['_DELETEANNOTATIONREQUEST']._serialized_end=3101 + _globals['_DELETEANNOTATIONRESPONSE']._serialized_start=3103 + _globals['_DELETEANNOTATIONRESPONSE']._serialized_end=3146 + _globals['_EXPORTTRANSCRIPTREQUEST']._serialized_start=3148 + _globals['_EXPORTTRANSCRIPTREQUEST']._serialized_end=3233 + _globals['_EXPORTTRANSCRIPTRESPONSE']._serialized_start=3235 + _globals['_EXPORTTRANSCRIPTRESPONSE']._serialized_end=3323 + _globals['_REFINESPEAKERDIARIZATIONREQUEST']._serialized_start=3325 + _globals['_REFINESPEAKERDIARIZATIONREQUEST']._serialized_end=3400 + _globals['_REFINESPEAKERDIARIZATIONRESPONSE']._serialized_start=3403 + _globals['_REFINESPEAKERDIARIZATIONRESPONSE']._serialized_end=3560 + _globals['_RENAMESPEAKERREQUEST']._serialized_start=3562 + _globals['_RENAMESPEAKERREQUEST']._serialized_end=3654 + _globals['_RENAMESPEAKERRESPONSE']._serialized_start=3656 + _globals['_RENAMESPEAKERRESPONSE']._serialized_end=3722 + _globals['_GETDIARIZATIONJOBSTATUSREQUEST']._serialized_start=3724 + _globals['_GETDIARIZATIONJOBSTATUSREQUEST']._serialized_end=3772 + _globals['_DIARIZATIONJOBSTATUS']._serialized_start=3775 + _globals['_DIARIZATIONJOBSTATUS']._serialized_end=3920 + _globals['_NOTEFLOWSERVICE']._serialized_start=4850 + _globals['_NOTEFLOWSERVICE']._serialized_end=6226 +# @@protoc_insertion_point(module_scope) +```` + +## File: src/noteflow/grpc/proto/noteflow.proto +````protobuf +// NoteFlow gRPC Service Definition +// Provides real-time ASR streaming and meeting management + +syntax = "proto3"; + +package noteflow; + +// ============================================================================= +// Core Service +// ============================================================================= + +service NoteFlowService { + // Bidirectional streaming: client sends audio chunks, server returns transcripts + rpc StreamTranscription(stream AudioChunk) returns (stream TranscriptUpdate); + + // Meeting lifecycle management + rpc CreateMeeting(CreateMeetingRequest) returns (Meeting); + rpc StopMeeting(StopMeetingRequest) returns (Meeting); + rpc ListMeetings(ListMeetingsRequest) returns (ListMeetingsResponse); + rpc GetMeeting(GetMeetingRequest) returns (Meeting); + rpc DeleteMeeting(DeleteMeetingRequest) returns (DeleteMeetingResponse); + + // Summary generation + rpc GenerateSummary(GenerateSummaryRequest) returns (Summary); + + // Annotation management + rpc AddAnnotation(AddAnnotationRequest) returns (Annotation); + rpc GetAnnotation(GetAnnotationRequest) returns (Annotation); + rpc ListAnnotations(ListAnnotationsRequest) returns (ListAnnotationsResponse); + rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation); + rpc DeleteAnnotation(DeleteAnnotationRequest) returns (DeleteAnnotationResponse); + + // Export functionality + rpc ExportTranscript(ExportTranscriptRequest) returns (ExportTranscriptResponse); + + // Speaker diarization + rpc RefineSpeakerDiarization(RefineSpeakerDiarizationRequest) returns (RefineSpeakerDiarizationResponse); + rpc RenameSpeaker(RenameSpeakerRequest) returns (RenameSpeakerResponse); + rpc GetDiarizationJobStatus(GetDiarizationJobStatusRequest) returns (DiarizationJobStatus); + + // Server health and capabilities + rpc GetServerInfo(ServerInfoRequest) returns (ServerInfo); +} + +// ============================================================================= +// Audio Streaming Messages +// ============================================================================= + +message AudioChunk { + // Meeting ID this audio belongs to + string meeting_id = 1; + + // Raw audio data (float32, mono, 16kHz expected) + bytes audio_data = 2; + + // Timestamp when audio was captured (monotonic, seconds) + double timestamp = 3; + + // Sample rate in Hz (default 16000) + int32 sample_rate = 4; + + // Number of channels (default 1 for mono) + int32 channels = 5; +} + +message TranscriptUpdate { + // Meeting ID this transcript belongs to + string meeting_id = 1; + + // Type of update + UpdateType update_type = 2; + + // For partial updates - tentative transcript text + string partial_text = 3; + + // For final segments - confirmed transcript + FinalSegment segment = 4; + + // Server-side processing timestamp + double server_timestamp = 5; +} + +enum UpdateType { + UPDATE_TYPE_UNSPECIFIED = 0; + UPDATE_TYPE_PARTIAL = 1; // Tentative, may change + UPDATE_TYPE_FINAL = 2; // Confirmed segment + UPDATE_TYPE_VAD_START = 3; // Voice activity started + UPDATE_TYPE_VAD_END = 4; // Voice activity ended +} + +message FinalSegment { + // Segment ID (sequential within meeting) + int32 segment_id = 1; + + // Transcript text + string text = 2; + + // Start time relative to meeting start (seconds) + double start_time = 3; + + // End time relative to meeting start (seconds) + double end_time = 4; + + // Word-level timestamps + repeated WordTiming words = 5; + + // Detected language + string language = 6; + + // Language detection confidence (0.0-1.0) + float language_confidence = 7; + + // Average log probability (quality indicator) + float avg_logprob = 8; + + // Probability that segment contains no speech + float no_speech_prob = 9; + + // Speaker identification (from diarization) + string speaker_id = 10; + + // Speaker assignment confidence (0.0-1.0) + float speaker_confidence = 11; +} + +message WordTiming { + string word = 1; + double start_time = 2; + double end_time = 3; + float probability = 4; +} + +// ============================================================================= +// Meeting Management Messages +// ============================================================================= + +message Meeting { + // Unique meeting identifier + string id = 1; + + // User-provided title + string title = 2; + + // Meeting state + MeetingState state = 3; + + // Creation timestamp (Unix epoch seconds) + double created_at = 4; + + // Start timestamp (when recording began) + double started_at = 5; + + // End timestamp (when recording stopped) + double ended_at = 6; + + // Duration in seconds + double duration_seconds = 7; + + // Full transcript segments + repeated FinalSegment segments = 8; + + // Generated summary (if available) + Summary summary = 9; + + // Metadata + map metadata = 10; +} + +enum MeetingState { + MEETING_STATE_UNSPECIFIED = 0; + MEETING_STATE_CREATED = 1; // Created but not started + MEETING_STATE_RECORDING = 2; // Actively recording + MEETING_STATE_STOPPED = 3; // Recording stopped, processing may continue + MEETING_STATE_COMPLETED = 4; // All processing complete + MEETING_STATE_ERROR = 5; // Error occurred +} + +message CreateMeetingRequest { + // Optional title (generated if not provided) + string title = 1; + + // Optional metadata + map metadata = 2; +} + +message StopMeetingRequest { + string meeting_id = 1; +} + +message ListMeetingsRequest { + // Optional filter by state + repeated MeetingState states = 1; + + // Pagination + int32 limit = 2; + int32 offset = 3; + + // Sort order + SortOrder sort_order = 4; +} + +enum SortOrder { + SORT_ORDER_UNSPECIFIED = 0; + SORT_ORDER_CREATED_DESC = 1; // Newest first (default) + SORT_ORDER_CREATED_ASC = 2; // Oldest first +} + +message ListMeetingsResponse { + repeated Meeting meetings = 1; + int32 total_count = 2; +} + +message GetMeetingRequest { + string meeting_id = 1; + + // Whether to include full transcript segments + bool include_segments = 2; + + // Whether to include summary + bool include_summary = 3; +} + +message DeleteMeetingRequest { + string meeting_id = 1; +} + +message DeleteMeetingResponse { + bool success = 1; +} + +// ============================================================================= +// Summary Messages +// ============================================================================= + +message Summary { + // Meeting this summary belongs to + string meeting_id = 1; + + // Executive summary (2-3 sentences) + string executive_summary = 2; + + // Key points / highlights + repeated KeyPoint key_points = 3; + + // Action items extracted + repeated ActionItem action_items = 4; + + // Generated timestamp + double generated_at = 5; + + // Model/version used for generation + string model_version = 6; +} + +message KeyPoint { + // The key point text + string text = 1; + + // Segment IDs that support this point (evidence linking) + repeated int32 segment_ids = 2; + + // Timestamp range this point covers + double start_time = 3; + double end_time = 4; +} + +message ActionItem { + // Action item text + string text = 1; + + // Assigned to (if mentioned) + string assignee = 2; + + // Due date (if mentioned, Unix epoch) + double due_date = 3; + + // Priority level + Priority priority = 4; + + // Segment IDs that mention this action + repeated int32 segment_ids = 5; +} + +enum Priority { + PRIORITY_UNSPECIFIED = 0; + PRIORITY_LOW = 1; + PRIORITY_MEDIUM = 2; + PRIORITY_HIGH = 3; +} + +message GenerateSummaryRequest { + string meeting_id = 1; + + // Force regeneration even if summary exists + bool force_regenerate = 2; +} + +// ============================================================================= +// Server Info Messages +// ============================================================================= + +message ServerInfoRequest {} + +message ServerInfo { + // Server version + string version = 1; + + // ASR model loaded + string asr_model = 2; + + // Whether ASR is ready + bool asr_ready = 3; + + // Supported sample rates + repeated int32 supported_sample_rates = 4; + + // Maximum audio chunk size in bytes + int32 max_chunk_size = 5; + + // Server uptime in seconds + double uptime_seconds = 6; + + // Number of active meetings + int32 active_meetings = 7; + + // Whether diarization is enabled + bool diarization_enabled = 8; + + // Whether diarization models are ready + bool diarization_ready = 9; +} + +// ============================================================================= +// Annotation Messages +// ============================================================================= + +enum AnnotationType { + ANNOTATION_TYPE_UNSPECIFIED = 0; + ANNOTATION_TYPE_ACTION_ITEM = 1; + ANNOTATION_TYPE_DECISION = 2; + ANNOTATION_TYPE_NOTE = 3; + ANNOTATION_TYPE_RISK = 4; +} + +message Annotation { + // Unique annotation identifier + string id = 1; + + // Meeting this annotation belongs to + string meeting_id = 2; + + // Type of annotation + AnnotationType annotation_type = 3; + + // Annotation text + string text = 4; + + // Start time relative to meeting start (seconds) + double start_time = 5; + + // End time relative to meeting start (seconds) + double end_time = 6; + + // Linked segment IDs (evidence linking) + repeated int32 segment_ids = 7; + + // Creation timestamp (Unix epoch seconds) + double created_at = 8; +} + +message AddAnnotationRequest { + // Meeting ID to add annotation to + string meeting_id = 1; + + // Type of annotation + AnnotationType annotation_type = 2; + + // Annotation text + string text = 3; + + // Start time relative to meeting start (seconds) + double start_time = 4; + + // End time relative to meeting start (seconds) + double end_time = 5; + + // Optional linked segment IDs + repeated int32 segment_ids = 6; +} + +message GetAnnotationRequest { + string annotation_id = 1; +} + +message ListAnnotationsRequest { + // Meeting ID to list annotations for + string meeting_id = 1; + + // Optional time range filter + double start_time = 2; + double end_time = 3; +} + +message ListAnnotationsResponse { + repeated Annotation annotations = 1; +} + +message UpdateAnnotationRequest { + // Annotation ID to update + string annotation_id = 1; + + // Updated type (optional, keeps existing if not set) + AnnotationType annotation_type = 2; + + // Updated text (optional, keeps existing if empty) + string text = 3; + + // Updated start time (optional, keeps existing if 0) + double start_time = 4; + + // Updated end time (optional, keeps existing if 0) + double end_time = 5; + + // Updated segment IDs (replaces existing) + repeated int32 segment_ids = 6; +} + +message DeleteAnnotationRequest { + string annotation_id = 1; +} + +message DeleteAnnotationResponse { + bool success = 1; +} + +// ============================================================================= +// Export Messages +// ============================================================================= + +enum ExportFormat { + EXPORT_FORMAT_UNSPECIFIED = 0; + EXPORT_FORMAT_MARKDOWN = 1; + EXPORT_FORMAT_HTML = 2; +} + +enum JobStatus { + JOB_STATUS_UNSPECIFIED = 0; + JOB_STATUS_QUEUED = 1; + JOB_STATUS_RUNNING = 2; + JOB_STATUS_COMPLETED = 3; + JOB_STATUS_FAILED = 4; +} + +message ExportTranscriptRequest { + // Meeting ID to export + string meeting_id = 1; + + // Export format + ExportFormat format = 2; +} + +message ExportTranscriptResponse { + // Exported content + string content = 1; + + // Format name + string format_name = 2; + + // Suggested file extension + string file_extension = 3; +} + +// ============================================================================= +// Speaker Diarization Messages +// ============================================================================= + +message RefineSpeakerDiarizationRequest { + // Meeting ID to run diarization on + string meeting_id = 1; + + // Optional known number of speakers (auto-detect if not set or 0) + int32 num_speakers = 2; +} + +message RefineSpeakerDiarizationResponse { + // Number of segments updated with speaker labels + int32 segments_updated = 1; + + // Distinct speaker IDs found + repeated string speaker_ids = 2; + + // Error message if diarization failed + string error_message = 3; + + // Background job identifier (empty if request failed) + string job_id = 4; + + // Current job status + JobStatus status = 5; +} + +message RenameSpeakerRequest { + // Meeting ID + string meeting_id = 1; + + // Original speaker ID (e.g., "SPEAKER_00") + string old_speaker_id = 2; + + // New speaker name (e.g., "Alice") + string new_speaker_name = 3; +} + +message RenameSpeakerResponse { + // Number of segments updated + int32 segments_updated = 1; + + // Success flag + bool success = 2; +} + +message GetDiarizationJobStatusRequest { + // Job ID returned by RefineSpeakerDiarization + string job_id = 1; +} + +message DiarizationJobStatus { + // Job ID + string job_id = 1; + + // Current status + JobStatus status = 2; + + // Number of segments updated (when completed) + int32 segments_updated = 3; + + // Distinct speaker IDs found (when completed) + repeated string speaker_ids = 4; + + // Error message if failed + string error_message = 5; +} +```` + +## File: src/noteflow/infrastructure/audio/__init__.py +````python +"""Audio infrastructure module. + +Provide audio capture, level metering, buffering, playback, and encrypted storage. +""" + +from noteflow.infrastructure.audio.capture import SoundDeviceCapture +from noteflow.infrastructure.audio.dto import ( + AudioDeviceInfo, + AudioFrameCallback, + TimestampedAudio, +) +from noteflow.infrastructure.audio.levels import RmsLevelProvider, compute_rms +from noteflow.infrastructure.audio.playback import PlaybackState, SoundDevicePlayback +from noteflow.infrastructure.audio.protocols import ( + AudioCapture, + AudioLevelProvider, + AudioPlayback, + RingBuffer, +) +from noteflow.infrastructure.audio.reader import MeetingAudioReader +from noteflow.infrastructure.audio.ring_buffer import TimestampedRingBuffer +from noteflow.infrastructure.audio.writer import MeetingAudioWriter + +__all__ = [ + "AudioCapture", + "AudioDeviceInfo", + "AudioFrameCallback", + "AudioLevelProvider", + "AudioPlayback", + "MeetingAudioReader", + "MeetingAudioWriter", + "PlaybackState", + "RingBuffer", + "RmsLevelProvider", + "SoundDeviceCapture", + "SoundDevicePlayback", + "TimestampedAudio", + "TimestampedRingBuffer", + "compute_rms", +] +```` + +## File: src/noteflow/infrastructure/converters/orm_converters.py +````python +"""Convert between ORM models and domain entities.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from noteflow.domain.entities import ( + ActionItem, + Annotation, + KeyPoint, + Meeting, + Segment, + Summary, +) +from noteflow.domain.entities import ( + WordTiming as DomainWordTiming, +) +from noteflow.domain.value_objects import ( + AnnotationId, + AnnotationType, + MeetingId, + MeetingState, +) + +if TYPE_CHECKING: + from noteflow.infrastructure.persistence.models import ( + ActionItemModel, + AnnotationModel, + KeyPointModel, + MeetingModel, + SegmentModel, + SummaryModel, + WordTimingModel, + ) + + +class OrmConverter: + """Convert between ORM models and domain entities.""" + + # --- WordTiming --- + + @staticmethod + def word_timing_to_domain(model: WordTimingModel) -> DomainWordTiming: + """Convert ORM WordTiming model to domain entity. + + Args: + model: SQLAlchemy WordTimingModel instance. + + Returns: + Domain WordTiming entity. + + Raises: + ValueError: If timing validation fails during entity construction. + """ + return DomainWordTiming( + word=model.word, + start_time=model.start_time, + end_time=model.end_time, + probability=model.probability, + ) + + @staticmethod + def word_timing_to_orm_kwargs(word: DomainWordTiming) -> dict[str, str | float]: + """Convert domain WordTiming to ORM model kwargs. + + Return a dict of kwargs rather than instantiating WordTimingModel directly + to avoid circular imports and allow the repository to handle ORM construction. + + Args: + word: Domain WordTiming entity. + + Returns: + Dict with word, start_time, end_time, probability for ORM construction. + """ + return { + "word": word.word, + "start_time": word.start_time, + "end_time": word.end_time, + "probability": word.probability, + } + + # --- Meeting --- + + @staticmethod + def meeting_to_domain(model: MeetingModel) -> Meeting: + """Convert ORM Meeting model to domain entity. + + Args: + model: SQLAlchemy MeetingModel instance. + + Returns: + Domain Meeting entity. + """ + return Meeting( + id=MeetingId(model.id), + title=model.title, + state=MeetingState(model.state), + created_at=model.created_at, + started_at=model.started_at, + ended_at=model.ended_at, + metadata=model.metadata_, + wrapped_dek=model.wrapped_dek, + ) + + # --- Segment --- + + @staticmethod + def segment_to_domain(model: SegmentModel, include_words: bool = True) -> Segment: + """Convert ORM Segment model to domain entity. + + Args: + model: SQLAlchemy SegmentModel instance. + include_words: Whether to include word-level timing. + + Returns: + Domain Segment entity. + """ + words: list[DomainWordTiming] = [] + if include_words: + words = [OrmConverter.word_timing_to_domain(w) for w in model.words] + + embedding = list(model.embedding) if model.embedding is not None else None + + return Segment( + segment_id=model.segment_id, + text=model.text, + start_time=model.start_time, + end_time=model.end_time, + meeting_id=MeetingId(model.meeting_id), + words=words, + language=model.language, + language_confidence=model.language_confidence, + avg_logprob=model.avg_logprob, + no_speech_prob=model.no_speech_prob, + embedding=embedding, + speaker_id=model.speaker_id, + speaker_confidence=model.speaker_confidence, + db_id=model.id, + ) + + # --- Annotation --- + + @staticmethod + def annotation_to_domain(model: AnnotationModel) -> Annotation: + """Convert ORM Annotation model to domain entity. + + Args: + model: SQLAlchemy AnnotationModel instance. + + Returns: + Domain Annotation entity. + """ + return Annotation( + id=AnnotationId(model.annotation_id), + meeting_id=MeetingId(model.meeting_id), + annotation_type=AnnotationType(model.annotation_type), + text=model.text, + start_time=model.start_time, + end_time=model.end_time, + segment_ids=model.segment_ids, + created_at=model.created_at, + db_id=model.id, + ) + + # --- Summary --- + + @staticmethod + def key_point_to_domain(model: KeyPointModel) -> KeyPoint: + """Convert ORM KeyPoint model to domain entity. + + Args: + model: SQLAlchemy KeyPointModel instance. + + Returns: + Domain KeyPoint entity. + """ + return KeyPoint( + text=model.text, + segment_ids=model.segment_ids, + start_time=model.start_time, + end_time=model.end_time, + db_id=model.id, + ) + + @staticmethod + def action_item_to_domain(model: ActionItemModel) -> ActionItem: + """Convert ORM ActionItem model to domain entity. + + Args: + model: SQLAlchemy ActionItemModel instance. + + Returns: + Domain ActionItem entity. + """ + return ActionItem( + text=model.text, + assignee=model.assignee, + due_date=model.due_date, + priority=model.priority, + segment_ids=model.segment_ids, + db_id=model.id, + ) + + @staticmethod + def summary_to_domain(model: SummaryModel, meeting_id: MeetingId) -> Summary: + """Convert ORM Summary model to domain entity. + + Args: + model: SQLAlchemy SummaryModel instance. + meeting_id: Meeting identifier (passed for type safety). + + Returns: + Domain Summary entity. + """ + return Summary( + meeting_id=meeting_id, + executive_summary=model.executive_summary or "", + key_points=[OrmConverter.key_point_to_domain(kp) for kp in model.key_points], + action_items=[OrmConverter.action_item_to_domain(ai) for ai in model.action_items], + generated_at=model.generated_at, + model_version=model.model_version or "", + db_id=model.id, + ) +```` + +## File: src/noteflow/infrastructure/export/html.py +````python +"""HTML exporter implementation. + +Export meeting transcripts to HTML format. +""" + +from __future__ import annotations + +import html +from datetime import datetime +from typing import TYPE_CHECKING + +from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp + +if TYPE_CHECKING: + from collections.abc import Sequence + + from noteflow.domain.entities.meeting import Meeting + from noteflow.domain.entities.segment import Segment + + +def _escape(text: str) -> str: + """Escape HTML special characters. + + Args: + text: Raw text to escape. + + Returns: + HTML-safe text. + """ + return html.escape(text) + + +# HTML template with embedded CSS for print-friendly output +_HTML_TEMPLATE = """ + + + + + {title} + + + +{content} + +""" + + +class HtmlExporter: + """Export meeting transcripts to HTML format. + + Produces clean, print-friendly HTML with embedded CSS styling, + meeting metadata, transcript with timestamps, and optional summary. + """ + + @property + def format_name(self) -> str: + """Human-readable format name.""" + return "HTML" + + @property + def file_extension(self) -> str: + """File extension for HTML.""" + return ".html" + + def export( + self, + meeting: Meeting, + segments: Sequence[Segment], + ) -> str: + """Export meeting transcript to HTML. + + Args: + meeting: Meeting entity with metadata. + segments: Ordered list of transcript segments. + + Returns: + HTML-formatted transcript string. + """ + content_parts: list[str] = [ + f"

{_escape(meeting.title)}

", + '", + "

Transcript

", + '
', + ) + ) + for segment in segments: + timestamp = format_timestamp(segment.start_time) + content_parts.append('
') + content_parts.append(f'[{timestamp}]') + content_parts.extend((f"{_escape(segment.text)}", "
")) + content_parts.append("
") + + # Summary section (if available) + if meeting.summary: + content_parts.extend(('
', "

Summary

")) + if meeting.summary.executive_summary: + content_parts.append(f"

{_escape(meeting.summary.executive_summary)}

") + + if meeting.summary.key_points: + content_parts.extend(("

Key Points

", '
    ')) + content_parts.extend( + f"
  • {_escape(point.text)}
  • " for point in meeting.summary.key_points + ) + content_parts.append("
") + + if meeting.summary.action_items: + content_parts.extend(("

Action Items

", '
    ')) + for item in meeting.summary.action_items: + assignee = ( + f' @{_escape(item.assignee)}' + if item.assignee + else "" + ) + content_parts.append(f"
  • {_escape(item.text)}{assignee}
  • ") + content_parts.append("
") + + content_parts.append("
") + + # Footer + content_parts.append("", + ) + ) + content = "\n".join(content_parts) + return _HTML_TEMPLATE.format(title=_escape(meeting.title), content=content) +```` + +## File: src/noteflow/infrastructure/persistence/repositories/meeting_repo.py +````python +"""SQLAlchemy implementation of MeetingRepository.""" + +from __future__ import annotations + +from collections.abc import Sequence +from datetime import datetime +from uuid import UUID + +from sqlalchemy import func, select + +from noteflow.domain.entities import Meeting +from noteflow.domain.value_objects import MeetingId, MeetingState +from noteflow.infrastructure.converters import OrmConverter +from noteflow.infrastructure.persistence.models import MeetingModel +from noteflow.infrastructure.persistence.repositories._base import BaseRepository + + +class SqlAlchemyMeetingRepository(BaseRepository): + """SQLAlchemy implementation of MeetingRepository.""" + + async def create(self, meeting: Meeting) -> Meeting: + """Persist a new meeting. + + Args: + meeting: Meeting to create. + + Returns: + Created meeting. + """ + model = MeetingModel( + id=UUID(str(meeting.id)), + title=meeting.title, + state=int(meeting.state), + created_at=meeting.created_at, + started_at=meeting.started_at, + ended_at=meeting.ended_at, + metadata_=meeting.metadata, + wrapped_dek=meeting.wrapped_dek, + ) + self._session.add(model) + await self._session.flush() + return meeting + + async def get(self, meeting_id: MeetingId) -> Meeting | None: + """Retrieve a meeting by ID. + + Args: + meeting_id: Meeting identifier. + + Returns: + Meeting if found, None otherwise. + """ + stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting_id))) + model = await self._execute_scalar(stmt) + + return None if model is None else OrmConverter.meeting_to_domain(model) + + async def update(self, meeting: Meeting) -> Meeting: + """Update an existing meeting. + + Args: + meeting: Meeting with updated fields. + + Returns: + Updated meeting. + + Raises: + ValueError: If meeting does not exist. + """ + stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting.id))) + model = await self._execute_scalar(stmt) + + if model is None: + raise ValueError(f"Meeting {meeting.id} not found") + + model.title = meeting.title + model.state = int(meeting.state) + model.started_at = meeting.started_at + model.ended_at = meeting.ended_at + model.metadata_ = meeting.metadata + model.wrapped_dek = meeting.wrapped_dek + + await self._session.flush() + return meeting + + async def delete(self, meeting_id: MeetingId) -> bool: + """Delete a meeting and all associated data. + + Args: + meeting_id: Meeting identifier. + + Returns: + True if deleted, False if not found. + """ + stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting_id))) + model = await self._execute_scalar(stmt) + + if model is None: + return False + + await self._delete_and_flush(model) + return True + + async def list_all( + self, + states: list[MeetingState] | None = None, + limit: int = 100, + offset: int = 0, + sort_desc: bool = True, + ) -> tuple[Sequence[Meeting], int]: + """List meetings with optional filtering. + + Args: + states: Optional list of states to filter by. + limit: Maximum number of meetings to return. + offset: Number of meetings to skip. + sort_desc: Sort by created_at descending if True. + + Returns: + Tuple of (meetings list, total count matching filter). + """ + # Build base query + stmt = select(MeetingModel) + + # Filter by states + if states: + state_values = [int(s) for s in states] + stmt = stmt.where(MeetingModel.state.in_(state_values)) + + # Count total + count_stmt = select(func.count()).select_from(stmt.subquery()) + total_result = await self._session.execute(count_stmt) + total = total_result.scalar() or 0 + + # Sort and paginate + order_col = MeetingModel.created_at.desc() if sort_desc else MeetingModel.created_at.asc() + stmt = stmt.order_by(order_col).offset(offset).limit(limit) + + result = await self._session.execute(stmt) + models = result.scalars().all() + + meetings = [OrmConverter.meeting_to_domain(m) for m in models] + return meetings, total + + async def count_by_state(self, state: MeetingState) -> int: + """Count meetings in a specific state. + + Args: + state: Meeting state to count. + + Returns: + Number of meetings in the specified state. + """ + stmt = ( + select(func.count()).select_from(MeetingModel).where(MeetingModel.state == int(state)) + ) + result = await self._session.execute(stmt) + return result.scalar() or 0 + + async def find_older_than(self, cutoff: datetime) -> Sequence[Meeting]: + """Find completed meetings older than cutoff date. + + Args: + cutoff: Cutoff datetime; meetings ended before this are returned. + + Returns: + Sequence of meetings with ended_at before cutoff. + """ + # Only consider completed meetings (have ended_at set) + stmt = ( + select(MeetingModel) + .where(MeetingModel.ended_at.isnot(None)) + .where(MeetingModel.ended_at < cutoff) + .order_by(MeetingModel.ended_at.asc()) + ) + result = await self._session.execute(stmt) + models = result.scalars().all() + return [OrmConverter.meeting_to_domain(m) for m in models] +```` + +## File: src/noteflow/infrastructure/persistence/repositories/segment_repo.py +````python +"""SQLAlchemy implementation of SegmentRepository.""" + +from __future__ import annotations + +from collections.abc import Sequence +from uuid import UUID + +from sqlalchemy import func, select + +from noteflow.domain.entities import Segment +from noteflow.domain.value_objects import MeetingId +from noteflow.infrastructure.converters import OrmConverter +from noteflow.infrastructure.persistence.models import SegmentModel, WordTimingModel +from noteflow.infrastructure.persistence.repositories._base import BaseRepository + + +class SqlAlchemySegmentRepository(BaseRepository): + """SQLAlchemy implementation of SegmentRepository.""" + + async def add(self, meeting_id: MeetingId, segment: Segment) -> Segment: + """Add a segment to a meeting. + + Args: + meeting_id: Meeting identifier. + segment: Segment to add. + + Returns: + Added segment with db_id populated. + """ + model = SegmentModel( + meeting_id=UUID(str(meeting_id)), + segment_id=segment.segment_id, + text=segment.text, + start_time=segment.start_time, + end_time=segment.end_time, + language=segment.language, + language_confidence=segment.language_confidence, + avg_logprob=segment.avg_logprob, + no_speech_prob=segment.no_speech_prob, + embedding=segment.embedding, + speaker_id=segment.speaker_id, + speaker_confidence=segment.speaker_confidence, + ) + + # Add word timings + for word in segment.words: + word_kwargs = OrmConverter.word_timing_to_orm_kwargs(word) + word_model = WordTimingModel(**word_kwargs) + model.words.append(word_model) + + self._session.add(model) + await self._session.flush() + + # Update segment with db_id + segment.db_id = model.id + segment.meeting_id = meeting_id + return segment + + async def add_batch( + self, + meeting_id: MeetingId, + segments: Sequence[Segment], + ) -> Sequence[Segment]: + """Add multiple segments to a meeting in batch. + + Args: + meeting_id: Meeting identifier. + segments: Segments to add. + + Returns: + Added segments with db_ids populated. + """ + result_segments: list[Segment] = [] + + for segment in segments: + added = await self.add(meeting_id, segment) + result_segments.append(added) + + return result_segments + + async def get_by_meeting( + self, + meeting_id: MeetingId, + include_words: bool = True, + ) -> Sequence[Segment]: + """Get all segments for a meeting. + + Args: + meeting_id: Meeting identifier. + include_words: Include word-level timing. + + Returns: + List of segments ordered by segment_id. + """ + stmt = ( + select(SegmentModel) + .where(SegmentModel.meeting_id == UUID(str(meeting_id))) + .order_by(SegmentModel.segment_id) + ) + + models = await self._execute_scalars(stmt) + + return [OrmConverter.segment_to_domain(m, include_words) for m in models] + + async def search_semantic( + self, + query_embedding: list[float], + limit: int = 10, + meeting_id: MeetingId | None = None, + ) -> Sequence[tuple[Segment, float]]: + """Search segments by semantic similarity. + + Args: + query_embedding: Query embedding vector. + limit: Maximum number of results. + meeting_id: Optional meeting to restrict search to. + + Returns: + List of (segment, similarity_score) tuples. + """ + # Build query with cosine similarity + similarity = SegmentModel.embedding.cosine_distance(query_embedding) + + stmt = select(SegmentModel, similarity.label("distance")).where( + SegmentModel.embedding.is_not(None) + ) + + if meeting_id: + stmt = stmt.where(SegmentModel.meeting_id == UUID(str(meeting_id))) + + stmt = stmt.order_by(similarity).limit(limit) + + result = await self._session.execute(stmt) + rows = result.all() + + results: list[tuple[Segment, float]] = [] + for row in rows: + model = row[0] + distance = row[1] + # Convert distance to similarity (1 - distance for cosine) + similarity_score = 1.0 - float(distance) + segment = OrmConverter.segment_to_domain(model, include_words=False) + results.append((segment, similarity_score)) + + return results + + async def update_embedding( + self, + segment_db_id: int, + embedding: list[float], + ) -> None: + """Update the embedding for a segment. + + Args: + segment_db_id: Segment database primary key. + embedding: New embedding vector. + """ + stmt = select(SegmentModel).where(SegmentModel.id == segment_db_id) + result = await self._session.execute(stmt) + if model := result.scalar_one_or_none(): + model.embedding = embedding + await self._session.flush() + + async def update_speaker( + self, + segment_db_id: int, + speaker_id: str | None, + speaker_confidence: float, + ) -> None: + """Update speaker diarization fields for a segment. + + Args: + segment_db_id: Segment database primary key. + speaker_id: Speaker identifier from diarization. + speaker_confidence: Confidence of speaker assignment (0.0-1.0). + """ + stmt = select(SegmentModel).where(SegmentModel.id == segment_db_id) + result = await self._session.execute(stmt) + if model := result.scalar_one_or_none(): + model.speaker_id = speaker_id + model.speaker_confidence = speaker_confidence + await self._session.flush() + + async def get_next_segment_id(self, meeting_id: MeetingId) -> int: + """Get the next segment_id for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + Next segment_id (max + 1), or 0 if no segments exist. + """ + stmt = select(func.max(SegmentModel.segment_id)).where( + SegmentModel.meeting_id == UUID(str(meeting_id)) + ) + result = await self._session.execute(stmt) + max_segment_id = result.scalar_one_or_none() + return 0 if max_segment_id is None else int(max_segment_id) + 1 +```` + +## File: src/noteflow/infrastructure/persistence/unit_of_work.py +````python +"""SQLAlchemy implementation of Unit of Work pattern.""" + +from __future__ import annotations + +from collections.abc import Callable +from typing import Self + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from noteflow.config.settings import Settings +from noteflow.infrastructure.persistence.database import ( + create_async_engine, + get_async_session_factory, +) + +from .repositories import ( + SqlAlchemyAnnotationRepository, + SqlAlchemyMeetingRepository, + SqlAlchemySegmentRepository, + SqlAlchemySummaryRepository, +) + + +class SqlAlchemyUnitOfWork: + """SQLAlchemy implementation of Unit of Work. + + Provides transactional consistency across repositories. + Use as an async context manager for automatic commit/rollback. + + Example: + async with SqlAlchemyUnitOfWork(session_factory) as uow: + meeting = await uow.meetings.get(meeting_id) + await uow.segments.add(meeting_id, segment) + await uow.commit() + """ + + def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None: + """Initialize unit of work with session factory. + + Args: + session_factory: Factory for creating async sessions. + """ + self._session_factory = session_factory + self._session: AsyncSession | None = None + self._annotations_repo: SqlAlchemyAnnotationRepository | None = None + self._meetings_repo: SqlAlchemyMeetingRepository | None = None + self._segments_repo: SqlAlchemySegmentRepository | None = None + self._summaries_repo: SqlAlchemySummaryRepository | None = None + + # --- Constructors ------------------------------------------------- + + @classmethod + def from_settings(cls, settings: Settings) -> SqlAlchemyUnitOfWork: + """Create a unit of work from application settings. + + Builds an async engine and session factory using configured database + settings (URL, pool size, echo), then returns a new unit of work + instance bound to that factory. + """ + + engine = create_async_engine(settings) + session_factory = get_async_session_factory(engine) + return cls(session_factory) + + @classmethod + def factory_from_settings(cls, settings: Settings) -> Callable[[], SqlAlchemyUnitOfWork]: + """Create a reusable factory that yields fresh UoW instances. + + The factory reuses a shared async session factory (and engine) while + returning a new `SqlAlchemyUnitOfWork` object each time. Useful when + callers need independent UoW instances for sequential operations + (e.g., retention cleanup) to avoid re-entrancy issues. + """ + + engine = create_async_engine(settings) + session_factory = get_async_session_factory(engine) + + def _factory() -> SqlAlchemyUnitOfWork: + return cls(session_factory) + + return _factory + + @property + def annotations(self) -> SqlAlchemyAnnotationRepository: + """Get annotations repository.""" + if self._annotations_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._annotations_repo + + @property + def meetings(self) -> SqlAlchemyMeetingRepository: + """Get meetings repository.""" + if self._meetings_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._meetings_repo + + @property + def segments(self) -> SqlAlchemySegmentRepository: + """Get segments repository.""" + if self._segments_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._segments_repo + + @property + def summaries(self) -> SqlAlchemySummaryRepository: + """Get summaries repository.""" + if self._summaries_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._summaries_repo + + async def __aenter__(self) -> Self: + """Enter the unit of work context. + + Creates session and caches repository instances. + + Returns: + Self for use in async with statement. + """ + self._session = self._session_factory() + self._annotations_repo = SqlAlchemyAnnotationRepository(self._session) + self._meetings_repo = SqlAlchemyMeetingRepository(self._session) + self._segments_repo = SqlAlchemySegmentRepository(self._session) + self._summaries_repo = SqlAlchemySummaryRepository(self._session) + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: object, + ) -> None: + """Exit the unit of work context. + + Rolls back on exception, otherwise does nothing (explicit commit required). + + Args: + exc_type: Exception type if raised. + exc_val: Exception value if raised. + exc_tb: Exception traceback if raised. + """ + if self._session is None: + return + + if exc_type is not None: + await self.rollback() + + await self._session.close() + self._session = None + self._annotations_repo = None + self._meetings_repo = None + self._segments_repo = None + self._summaries_repo = None + + async def commit(self) -> None: + """Commit the current transaction.""" + if self._session is None: + raise RuntimeError("UnitOfWork not in context") + await self._session.commit() + + async def rollback(self) -> None: + """Rollback the current transaction.""" + if self._session is None: + raise RuntimeError("UnitOfWork not in context") + await self._session.rollback() +```` + +## File: tests/application/test_meeting_service.py +````python +"""Tests for MeetingService application service.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING +from unittest.mock import AsyncMock, MagicMock +from uuid import uuid4 + +import pytest + +from noteflow.application.services.meeting_service import MeetingService +from noteflow.domain.entities import Annotation, Meeting, Segment, Summary +from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState + +if TYPE_CHECKING: + from collections.abc import Sequence + + +class TestMeetingServiceCreation: + """Tests for meeting creation operations.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.rollback = AsyncMock() + uow.meetings = MagicMock() + uow.segments = MagicMock() + uow.summaries = MagicMock() + return uow + + async def test_create_meeting_success(self, mock_uow: MagicMock) -> None: + """Test successful meeting creation.""" + created_meeting = Meeting.create(title="Test Meeting") + mock_uow.meetings.create = AsyncMock(return_value=created_meeting) + + service = MeetingService(mock_uow) + result = await service.create_meeting(title="Test Meeting") + + assert result.title == "Test Meeting" + mock_uow.meetings.create.assert_called_once() + mock_uow.commit.assert_called_once() + + async def test_create_meeting_with_metadata(self, mock_uow: MagicMock) -> None: + """Test meeting creation with metadata.""" + metadata = {"project": "NoteFlow"} + created_meeting = Meeting.create(title="Test", metadata=metadata) + mock_uow.meetings.create = AsyncMock(return_value=created_meeting) + + service = MeetingService(mock_uow) + result = await service.create_meeting(title="Test", metadata=metadata) + + assert result.metadata == metadata + + +class TestMeetingServiceRetrieval: + """Tests for meeting retrieval operations.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.meetings = MagicMock() + uow.segments = MagicMock() + uow.summaries = MagicMock() + return uow + + async def test_get_meeting_found(self, mock_uow: MagicMock) -> None: + """Test retrieving existing meeting.""" + meeting_id = MeetingId(uuid4()) + expected_meeting = Meeting.create(title="Found") + mock_uow.meetings.get = AsyncMock(return_value=expected_meeting) + + service = MeetingService(mock_uow) + result = await service.get_meeting(meeting_id) + + assert result is not None + assert result.title == "Found" + + async def test_get_meeting_not_found(self, mock_uow: MagicMock) -> None: + """Test retrieving non-existent meeting.""" + meeting_id = MeetingId(uuid4()) + mock_uow.meetings.get = AsyncMock(return_value=None) + + service = MeetingService(mock_uow) + result = await service.get_meeting(meeting_id) + + assert result is None + + async def test_list_meetings(self, mock_uow: MagicMock) -> None: + """Test listing meetings with pagination.""" + meetings: Sequence[Meeting] = [ + Meeting.create(title="Meeting 1"), + Meeting.create(title="Meeting 2"), + ] + mock_uow.meetings.list_all = AsyncMock(return_value=(meetings, 10)) + + service = MeetingService(mock_uow) + result, total = await service.list_meetings(limit=2, offset=0) + + assert len(result) == 2 + assert total == 10 + mock_uow.meetings.list_all.assert_called_once_with( + states=None, limit=2, offset=0, sort_desc=True + ) + + +class TestMeetingServiceStateTransitions: + """Tests for meeting state transition operations.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.meetings = MagicMock() + return uow + + async def test_start_recording_success(self, mock_uow: MagicMock) -> None: + """Test starting recording on existing meeting.""" + meeting = Meeting.create(title="Test") + meeting_id = meeting.id + mock_uow.meetings.get = AsyncMock(return_value=meeting) + mock_uow.meetings.update = AsyncMock(return_value=meeting) + + service = MeetingService(mock_uow) + result = await service.start_recording(meeting_id) + + assert result is not None + assert result.state == MeetingState.RECORDING + mock_uow.commit.assert_called_once() + + async def test_start_recording_invalid_state_raises(self, mock_uow: MagicMock) -> None: + """Test start_recording propagates invalid transition errors.""" + meeting = Meeting.create(title="Test") + meeting.start_recording() + mock_uow.meetings.get = AsyncMock(return_value=meeting) + + service = MeetingService(mock_uow) + + with pytest.raises(ValueError, match="Cannot start recording"): + await service.start_recording(meeting.id) + + mock_uow.commit.assert_not_called() + + async def test_start_recording_not_found(self, mock_uow: MagicMock) -> None: + """Test starting recording on non-existent meeting.""" + meeting_id = MeetingId(uuid4()) + mock_uow.meetings.get = AsyncMock(return_value=None) + + service = MeetingService(mock_uow) + result = await service.start_recording(meeting_id) + + assert result is None + mock_uow.commit.assert_not_called() + + async def test_stop_meeting_success(self, mock_uow: MagicMock) -> None: + """Test stopping recording on meeting.""" + meeting = Meeting.create(title="Test") + meeting.start_recording() # Move to RECORDING state + meeting_id = meeting.id + mock_uow.meetings.get = AsyncMock(return_value=meeting) + mock_uow.meetings.update = AsyncMock(return_value=meeting) + + service = MeetingService(mock_uow) + result = await service.stop_meeting(meeting_id) + + assert result is not None + assert result.state == MeetingState.STOPPED + mock_uow.commit.assert_called_once() + + async def test_stop_meeting_invalid_state_raises(self, mock_uow: MagicMock) -> None: + """Test stop_meeting raises when not in RECORDING state.""" + meeting = Meeting.create(title="Test") + mock_uow.meetings.get = AsyncMock(return_value=meeting) + + service = MeetingService(mock_uow) + + with pytest.raises(ValueError, match="Cannot begin stopping"): + await service.stop_meeting(meeting.id) + + mock_uow.commit.assert_not_called() + + async def test_complete_meeting_success(self, mock_uow: MagicMock) -> None: + """Test completing a stopped meeting.""" + meeting = Meeting.create(title="Test") + meeting.start_recording() + meeting.begin_stopping() + meeting.stop_recording() # Move to STOPPED state (via STOPPING) + meeting_id = meeting.id + mock_uow.meetings.get = AsyncMock(return_value=meeting) + mock_uow.meetings.update = AsyncMock(return_value=meeting) + + service = MeetingService(mock_uow) + result = await service.complete_meeting(meeting_id) + + assert result is not None + assert result.state == MeetingState.COMPLETED + mock_uow.commit.assert_called_once() + + async def test_complete_meeting_invalid_state_raises(self, mock_uow: MagicMock) -> None: + """Test complete_meeting raises from invalid state.""" + meeting = Meeting.create(title="Test") + mock_uow.meetings.get = AsyncMock(return_value=meeting) + + service = MeetingService(mock_uow) + + with pytest.raises(ValueError, match="Cannot complete"): + await service.complete_meeting(meeting.id) + + mock_uow.commit.assert_not_called() + + +class TestMeetingServiceDeletion: + """Tests for meeting deletion operations.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.meetings = MagicMock() + return uow + + async def test_delete_meeting_success(self, mock_uow: MagicMock) -> None: + """Test successful meeting deletion.""" + meeting_id = MeetingId(uuid4()) + mock_meeting = Meeting.create(title="Test Meeting") + mock_uow.meetings.get = AsyncMock(return_value=mock_meeting) + mock_uow.meetings.delete = AsyncMock(return_value=True) + + service = MeetingService(mock_uow) + result = await service.delete_meeting(meeting_id) + + assert result is True + mock_uow.commit.assert_called_once() + + async def test_delete_meeting_not_found(self, mock_uow: MagicMock) -> None: + """Test deleting non-existent meeting returns False.""" + meeting_id = MeetingId(uuid4()) + mock_uow.meetings.get = AsyncMock(return_value=None) + mock_uow.meetings.delete = AsyncMock(return_value=False) + + service = MeetingService(mock_uow) + result = await service.delete_meeting(meeting_id) + + assert result is False + mock_uow.meetings.delete.assert_not_called() + mock_uow.commit.assert_not_called() + + async def test_delete_meeting_removes_filesystem_assets( + self, mock_uow: MagicMock, tmp_path: Path + ) -> None: + """Test deletion removes filesystem assets when directory provided.""" + meeting_id = MeetingId(uuid4()) + mock_meeting = Meeting.create(title="Test Meeting") + mock_uow.meetings.get = AsyncMock(return_value=mock_meeting) + mock_uow.meetings.delete = AsyncMock(return_value=True) + + # Create meeting directory with test files + meeting_dir = tmp_path / str(meeting_id) + meeting_dir.mkdir() + (meeting_dir / "audio.wav").touch() + (meeting_dir / "manifest.json").touch() + + service = MeetingService(mock_uow) + result = await service.delete_meeting(meeting_id, meetings_dir=tmp_path) + + assert result is True + assert not meeting_dir.exists() + + async def test_delete_meeting_handles_missing_assets( + self, mock_uow: MagicMock, tmp_path: Path + ) -> None: + """Test deletion succeeds even when assets directory doesn't exist.""" + meeting_id = MeetingId(uuid4()) + mock_meeting = Meeting.create(title="Test Meeting") + mock_uow.meetings.get = AsyncMock(return_value=mock_meeting) + mock_uow.meetings.delete = AsyncMock(return_value=True) + + # Don't create the meeting directory + service = MeetingService(mock_uow) + result = await service.delete_meeting(meeting_id, meetings_dir=tmp_path) + + assert result is True + mock_uow.commit.assert_called_once() + + async def test_delete_meeting_without_dir_only_deletes_db(self, mock_uow: MagicMock) -> None: + """Test deletion without meetings_dir only deletes database records.""" + meeting_id = MeetingId(uuid4()) + mock_meeting = Meeting.create(title="Test Meeting") + mock_uow.meetings.get = AsyncMock(return_value=mock_meeting) + mock_uow.meetings.delete = AsyncMock(return_value=True) + + service = MeetingService(mock_uow) + result = await service.delete_meeting(meeting_id) + + assert result is True + mock_uow.meetings.delete.assert_called_once_with(meeting_id) + mock_uow.commit.assert_called_once() + + +class TestMeetingServiceSegments: + """Tests for segment operations.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.segments = MagicMock() + return uow + + async def test_add_segment_success(self, mock_uow: MagicMock) -> None: + """Test adding a segment to meeting.""" + meeting_id = MeetingId(uuid4()) + segment = Segment( + segment_id=0, text="Hello", start_time=0.0, end_time=1.0, meeting_id=meeting_id + ) + mock_uow.segments.add = AsyncMock(return_value=segment) + + service = MeetingService(mock_uow) + result = await service.add_segment( + meeting_id=meeting_id, + segment_id=0, + text="Hello", + start_time=0.0, + end_time=1.0, + ) + + assert result.text == "Hello" + mock_uow.segments.add.assert_called_once() + mock_uow.commit.assert_called_once() + + async def test_get_segments(self, mock_uow: MagicMock) -> None: + """Test retrieving segments for meeting.""" + meeting_id = MeetingId(uuid4()) + segments: Sequence[Segment] = [ + Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0), + Segment(segment_id=1, text="Second", start_time=1.0, end_time=2.0), + ] + mock_uow.segments.get_by_meeting = AsyncMock(return_value=segments) + + service = MeetingService(mock_uow) + result = await service.get_segments(meeting_id) + + assert len(result) == 2 + mock_uow.segments.get_by_meeting.assert_called_once_with(meeting_id, include_words=True) + + async def test_add_segments_batch(self, mock_uow: MagicMock) -> None: + """Test batch adding segments commits once.""" + meeting_id = MeetingId(uuid4()) + segments = [ + Segment(segment_id=0, text="A", start_time=0.0, end_time=1.0), + Segment(segment_id=1, text="B", start_time=1.0, end_time=2.0), + ] + mock_uow.segments.add_batch = AsyncMock(return_value=segments) + + service = MeetingService(mock_uow) + result = await service.add_segments_batch(meeting_id=meeting_id, segments=segments) + + assert len(result) == 2 + mock_uow.segments.add_batch.assert_called_once_with(meeting_id, segments) + mock_uow.commit.assert_called_once() + + +class TestMeetingServiceSummaries: + """Tests for summary operations.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.summaries = MagicMock() + return uow + + async def test_save_summary_success(self, mock_uow: MagicMock) -> None: + """Test saving a meeting summary.""" + meeting_id = MeetingId(uuid4()) + summary = Summary( + meeting_id=meeting_id, + executive_summary="Test summary", + generated_at=datetime.now(UTC), + model_version="test-v1", + ) + mock_uow.summaries.save = AsyncMock(return_value=summary) + + service = MeetingService(mock_uow) + result = await service.save_summary( + meeting_id=meeting_id, + executive_summary="Test summary", + model_version="test-v1", + ) + + assert result.executive_summary == "Test summary" + mock_uow.summaries.save.assert_called_once() + mock_uow.commit.assert_called_once() + + async def test_get_summary_found(self, mock_uow: MagicMock) -> None: + """Test retrieving existing summary.""" + meeting_id = MeetingId(uuid4()) + summary = Summary(meeting_id=meeting_id, executive_summary="Found") + mock_uow.summaries.get_by_meeting = AsyncMock(return_value=summary) + + service = MeetingService(mock_uow) + result = await service.get_summary(meeting_id) + + assert result is not None + assert result.executive_summary == "Found" + + async def test_get_summary_not_found(self, mock_uow: MagicMock) -> None: + """Test retrieving non-existent summary.""" + meeting_id = MeetingId(uuid4()) + mock_uow.summaries.get_by_meeting = AsyncMock(return_value=None) + + service = MeetingService(mock_uow) + result = await service.get_summary(meeting_id) + + assert result is None + + +class TestMeetingServiceSearch: + """Tests for semantic search operations.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.segments = MagicMock() + return uow + + async def test_search_segments_delegates(self, mock_uow: MagicMock) -> None: + """Test search_segments delegates to repository.""" + meeting_id = MeetingId(uuid4()) + segment = Segment(segment_id=0, text="A", start_time=0.0, end_time=1.0) + mock_uow.segments.search_semantic = AsyncMock(return_value=[(segment, 0.9)]) + + service = MeetingService(mock_uow) + result = await service.search_segments(query_embedding=[0.1], meeting_id=meeting_id) + + assert len(result) == 1 + mock_uow.segments.search_semantic.assert_called_once_with( + query_embedding=[0.1], limit=10, meeting_id=meeting_id + ) + + +class TestMeetingServiceAnnotations: + """Tests for annotation operations.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.annotations = MagicMock() + return uow + + async def test_add_annotation_success(self, mock_uow: MagicMock) -> None: + """Test adding an annotation commits and returns saved entity.""" + meeting_id = MeetingId(uuid4()) + mock_uow.annotations.add = AsyncMock() + + service = MeetingService(mock_uow) + await service.add_annotation( + meeting_id=meeting_id, + annotation_type=AnnotationType.NOTE, + text="Note", + start_time=0.0, + end_time=1.0, + ) + + mock_uow.annotations.add.assert_called_once() + mock_uow.commit.assert_called_once() + + async def test_get_annotations_in_range(self, mock_uow: MagicMock) -> None: + """Test get_annotations_in_range delegates to repository.""" + meeting_id = MeetingId(uuid4()) + mock_uow.annotations.get_by_time_range = AsyncMock(return_value=[]) + + service = MeetingService(mock_uow) + await service.get_annotations_in_range(meeting_id, start_time=1.0, end_time=2.0) + + mock_uow.annotations.get_by_time_range.assert_called_once_with(meeting_id, 1.0, 2.0) + + async def test_update_annotation_not_found_raises(self, mock_uow: MagicMock) -> None: + """Test update_annotation propagates repository errors.""" + meeting_id = MeetingId(uuid4()) + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=meeting_id, + annotation_type=AnnotationType.NOTE, + text="Note", + start_time=0.0, + end_time=1.0, + ) + mock_uow.annotations.update = AsyncMock(side_effect=ValueError("Annotation not found")) + + service = MeetingService(mock_uow) + with pytest.raises(ValueError, match="Annotation not found"): + await service.update_annotation(annotation) + + mock_uow.commit.assert_not_called() + + async def test_delete_annotation_not_found(self, mock_uow: MagicMock) -> None: + """Test delete_annotation returns False when missing.""" + annotation_id = AnnotationId(uuid4()) + mock_uow.annotations.delete = AsyncMock(return_value=False) + + service = MeetingService(mock_uow) + result = await service.delete_annotation(annotation_id) + + assert result is False + mock_uow.commit.assert_not_called() + + +class TestMeetingServiceAdditionalBranches: + """Additional branch coverage for MeetingService.""" + + @pytest.fixture + def mock_uow(self) -> MagicMock: + """Create a mock UnitOfWork with all repos.""" + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.meetings = MagicMock() + uow.segments = MagicMock() + uow.summaries = MagicMock() + uow.annotations = MagicMock() + return uow + + async def test_stop_meeting_not_found(self, mock_uow: MagicMock) -> None: + """stop_meeting should return None when meeting is missing.""" + mock_uow.meetings.get = AsyncMock(return_value=None) + service = MeetingService(mock_uow) + + result = await service.stop_meeting(MeetingId(uuid4())) + + assert result is None + mock_uow.commit.assert_not_called() + + async def test_complete_meeting_not_found(self, mock_uow: MagicMock) -> None: + """complete_meeting should return None when meeting is missing.""" + mock_uow.meetings.get = AsyncMock(return_value=None) + service = MeetingService(mock_uow) + + result = await service.complete_meeting(MeetingId(uuid4())) + + assert result is None + mock_uow.commit.assert_not_called() + + async def test_get_annotation_delegates_repository(self, mock_uow: MagicMock) -> None: + """get_annotation should delegate to repository.""" + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=MeetingId(uuid4()), + annotation_type=AnnotationType.NOTE, + text="note", + start_time=0.0, + end_time=1.0, + ) + mock_uow.annotations.get = AsyncMock(return_value=annotation) + service = MeetingService(mock_uow) + + result = await service.get_annotation(annotation.id) + + assert result is annotation + mock_uow.annotations.get.assert_called_once_with(annotation.id) + + async def test_get_annotations_delegates_repository(self, mock_uow: MagicMock) -> None: + """get_annotations should delegate to repository.""" + meeting_id = MeetingId(uuid4()) + mock_uow.annotations.get_by_meeting = AsyncMock(return_value=[]) + service = MeetingService(mock_uow) + + await service.get_annotations(meeting_id) + + mock_uow.annotations.get_by_meeting.assert_called_once_with(meeting_id) + + async def test_delete_annotation_success_commits(self, mock_uow: MagicMock) -> None: + """delete_annotation should commit on success.""" + annotation_id = AnnotationId(uuid4()) + mock_uow.annotations.delete = AsyncMock(return_value=True) + service = MeetingService(mock_uow) + + result = await service.delete_annotation(annotation_id) + + assert result is True + mock_uow.commit.assert_called_once() +```` + +## File: tests/application/test_summarization_service.py +````python +"""Tests for summarization service.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from uuid import uuid4 + +import pytest + +from noteflow.application.services import ( + SummarizationMode, + SummarizationService, + SummarizationServiceSettings, +) +from noteflow.domain.entities import KeyPoint, Segment, Summary +from noteflow.domain.summarization import ( + CitationVerificationResult, + ProviderUnavailableError, + SummarizationRequest, + SummarizationResult, +) +from noteflow.domain.value_objects import MeetingId + + +def _segment(segment_id: int, text: str = "Test") -> Segment: + """Create a test segment.""" + return Segment( + segment_id=segment_id, + text=text, + start_time=segment_id * 5.0, + end_time=(segment_id + 1) * 5.0, + ) + + +class MockProvider: + """Mock summarizer provider for testing.""" + + def __init__( + self, + name: str = "mock", + available: bool = True, + requires_consent: bool = False, + ) -> None: + self._name = name + self._available = available + self._requires_consent = requires_consent + self.call_count = 0 + + @property + def provider_name(self) -> str: + return self._name + + @property + def is_available(self) -> bool: + return self._available + + @property + def requires_cloud_consent(self) -> bool: + return self._requires_consent + + async def summarize(self, request: SummarizationRequest) -> SummarizationResult: + self.call_count += 1 + summary = Summary( + meeting_id=request.meeting_id, + executive_summary=f"Summary from {self._name}", + key_points=[KeyPoint(text=f"Point from {self._name}", segment_ids=[0])], + generated_at=datetime.now(UTC), + ) + return SummarizationResult( + summary=summary, + model_name=f"{self._name}-model", + provider_name=self._name, + ) + + +class MockVerifier: + """Mock citation verifier for testing.""" + + def __init__(self, is_valid: bool = True) -> None: + self._is_valid = is_valid + self.verify_call_count = 0 + self.filter_call_count = 0 + + def verify_citations( + self, summary: Summary, segments: list[Segment] + ) -> CitationVerificationResult: + self.verify_call_count += 1 + if self._is_valid: + return CitationVerificationResult(is_valid=True) + return CitationVerificationResult( + is_valid=False, + invalid_key_point_indices=(0,), + missing_segment_ids=(99,), + ) + + def filter_invalid_citations(self, summary: Summary, segments: list[Segment]) -> Summary: + self.filter_call_count += 1 + # Return summary with empty segment_ids for key points + return Summary( + meeting_id=summary.meeting_id, + executive_summary=summary.executive_summary, + key_points=[KeyPoint(text=kp.text, segment_ids=[]) for kp in summary.key_points], + action_items=[], + generated_at=summary.generated_at, + ) + + +class TestSummarizationServiceConfiguration: + """Tests for SummarizationService configuration.""" + + def test_register_provider(self) -> None: + """Provider should be registered for mode.""" + service = SummarizationService() + provider = MockProvider() + + service.register_provider(SummarizationMode.LOCAL, provider) + + assert SummarizationMode.LOCAL in service.providers + + def test_set_verifier(self) -> None: + """Verifier should be set.""" + service = SummarizationService() + verifier = MockVerifier() + + service.set_verifier(verifier) + + assert service.verifier is verifier + + def test_get_available_modes_with_local(self) -> None: + """Available modes should include local when provider is available.""" + service = SummarizationService() + service.register_provider(SummarizationMode.LOCAL, MockProvider()) + + available = service.get_available_modes() + + assert SummarizationMode.LOCAL in available + + def test_get_available_modes_excludes_unavailable(self) -> None: + """Unavailable providers should not be in available modes.""" + service = SummarizationService() + service.register_provider(SummarizationMode.LOCAL, MockProvider(available=False)) + + available = service.get_available_modes() + + assert SummarizationMode.LOCAL not in available + + def test_cloud_requires_consent(self) -> None: + """Cloud mode should require consent to be available.""" + service = SummarizationService() + service.register_provider( + SummarizationMode.CLOUD, + MockProvider(name="cloud", requires_consent=True), + ) + + available_without_consent = service.get_available_modes() + service.grant_cloud_consent() + available_with_consent = service.get_available_modes() + + assert SummarizationMode.CLOUD not in available_without_consent + assert SummarizationMode.CLOUD in available_with_consent + + def test_revoke_cloud_consent(self) -> None: + """Revoking consent should remove cloud from available modes.""" + service = SummarizationService() + service.register_provider( + SummarizationMode.CLOUD, + MockProvider(name="cloud", requires_consent=True), + ) + service.grant_cloud_consent() + + service.revoke_cloud_consent() + available = service.get_available_modes() + + assert SummarizationMode.CLOUD not in available + + +class TestSummarizationServiceSummarize: + """Tests for SummarizationService.summarize method.""" + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Create test meeting ID.""" + return MeetingId(uuid4()) + + @pytest.mark.asyncio + async def test_summarize_uses_default_mode(self, meeting_id: MeetingId) -> None: + """Summarize should use default mode when not specified.""" + provider = MockProvider() + service = SummarizationService( + settings=SummarizationServiceSettings(default_mode=SummarizationMode.LOCAL) + ) + service.register_provider(SummarizationMode.LOCAL, provider) + + segments = [_segment(0)] + result = await service.summarize(meeting_id, segments) + + assert result.provider_used == "mock" + assert provider.call_count == 1 + + @pytest.mark.asyncio + async def test_summarize_uses_specified_mode(self, meeting_id: MeetingId) -> None: + """Summarize should use specified mode.""" + local_provider = MockProvider(name="local") + mock_provider = MockProvider(name="mock") + service = SummarizationService() + service.register_provider(SummarizationMode.LOCAL, local_provider) + service.register_provider(SummarizationMode.MOCK, mock_provider) + + segments = [_segment(0)] + result = await service.summarize(meeting_id, segments, mode=SummarizationMode.MOCK) + + assert result.provider_used == "mock" + assert mock_provider.call_count == 1 + assert local_provider.call_count == 0 + + @pytest.mark.asyncio + async def test_summarize_falls_back_on_unavailable(self, meeting_id: MeetingId) -> None: + """Should fall back to available provider when primary unavailable.""" + unavailable = MockProvider(name="cloud", available=False) + fallback = MockProvider(name="local") + service = SummarizationService( + settings=SummarizationServiceSettings( + fallback_to_local=True, + cloud_consent_granted=True, + ) + ) + service.register_provider(SummarizationMode.CLOUD, unavailable) + service.register_provider(SummarizationMode.LOCAL, fallback) + + segments = [_segment(0)] + result = await service.summarize(meeting_id, segments, mode=SummarizationMode.CLOUD) + + assert result.provider_used == "local" + assert result.fallback_used is True + + @pytest.mark.asyncio + async def test_summarize_raises_when_no_fallback(self, meeting_id: MeetingId) -> None: + """Should raise error when no fallback available.""" + unavailable = MockProvider(name="local", available=False) + service = SummarizationService( + settings=SummarizationServiceSettings(fallback_to_local=False) + ) + service.register_provider(SummarizationMode.LOCAL, unavailable) + + segments = [_segment(0)] + with pytest.raises(ProviderUnavailableError): + await service.summarize(meeting_id, segments, mode=SummarizationMode.LOCAL) + + @pytest.mark.asyncio + async def test_summarize_verifies_citations(self, meeting_id: MeetingId) -> None: + """Citations should be verified when enabled.""" + provider = MockProvider() + verifier = MockVerifier(is_valid=True) + service = SummarizationService(settings=SummarizationServiceSettings(verify_citations=True)) + service.register_provider(SummarizationMode.LOCAL, provider) + service.set_verifier(verifier) + + segments = [_segment(0)] + result = await service.summarize(meeting_id, segments) + + assert verifier.verify_call_count == 1 + assert result.verification is not None + assert result.verification.is_valid is True + + @pytest.mark.asyncio + async def test_summarize_filters_invalid_citations(self, meeting_id: MeetingId) -> None: + """Invalid citations should be filtered when enabled.""" + provider = MockProvider() + verifier = MockVerifier(is_valid=False) + service = SummarizationService( + settings=SummarizationServiceSettings( + verify_citations=True, + filter_invalid_citations=True, + ) + ) + service.register_provider(SummarizationMode.LOCAL, provider) + service.set_verifier(verifier) + + segments = [_segment(0)] + result = await service.summarize(meeting_id, segments) + + assert verifier.filter_call_count == 1 + assert result.filtered_summary is not None + assert result.has_invalid_citations is True + + @pytest.mark.asyncio + async def test_summarize_passes_max_limits(self, meeting_id: MeetingId) -> None: + """Max limits should be passed to provider.""" + captured_request: SummarizationRequest | None = None + + class CapturingProvider(MockProvider): + async def summarize(self, request: SummarizationRequest) -> SummarizationResult: + nonlocal captured_request + captured_request = request + return await super().summarize(request) + + provider = CapturingProvider() + service = SummarizationService() + service.register_provider(SummarizationMode.LOCAL, provider) + + segments = [_segment(0)] + await service.summarize(meeting_id, segments, max_key_points=3, max_action_items=5) + + assert captured_request is not None + assert captured_request.max_key_points == 3 + assert captured_request.max_action_items == 5 + + @pytest.mark.asyncio + async def test_summarize_requires_cloud_consent(self, meeting_id: MeetingId) -> None: + """Cloud mode should require consent.""" + cloud = MockProvider(name="cloud", requires_consent=True) + fallback = MockProvider(name="local") + service = SummarizationService( + settings=SummarizationServiceSettings( + cloud_consent_granted=False, fallback_to_local=True + ) + ) + service.register_provider(SummarizationMode.CLOUD, cloud) + service.register_provider(SummarizationMode.LOCAL, fallback) + + segments = [_segment(0)] + result = await service.summarize(meeting_id, segments, mode=SummarizationMode.CLOUD) + + assert result.provider_used == "local" + assert result.fallback_used is True + assert cloud.call_count == 0 + + @pytest.mark.asyncio + async def test_summarize_calls_persist_callback(self, meeting_id: MeetingId) -> None: + """Persist callback should be called with final summary.""" + persisted: list[Summary] = [] + + async def mock_persist(summary: Summary) -> None: + persisted.append(summary) + + provider = MockProvider() + service = SummarizationService(on_persist=mock_persist) + service.register_provider(SummarizationMode.LOCAL, provider) + + segments = [_segment(0)] + await service.summarize(meeting_id, segments) + + assert len(persisted) == 1 + assert persisted[0].meeting_id == meeting_id + + @pytest.mark.asyncio + async def test_summarize_persist_callback_receives_filtered_summary( + self, meeting_id: MeetingId + ) -> None: + """Persist callback should receive filtered summary when available.""" + persisted: list[Summary] = [] + + async def mock_persist(summary: Summary) -> None: + persisted.append(summary) + + provider = MockProvider() + verifier = MockVerifier(is_valid=False) + service = SummarizationService( + settings=SummarizationServiceSettings( + verify_citations=True, + filter_invalid_citations=True, + ), + on_persist=mock_persist, + ) + service.register_provider(SummarizationMode.LOCAL, provider) + service.set_verifier(verifier) + + segments = [_segment(0)] + result = await service.summarize(meeting_id, segments) + + assert len(persisted) == 1 + # Should persist the filtered summary, not original + assert persisted[0] is result.filtered_summary + + +class TestSummarizationServiceResult: + """Tests for SummarizationServiceResult.""" + + def test_summary_returns_filtered_when_available(self) -> None: + """summary property should return filtered_summary if available.""" + from noteflow.application.services import SummarizationServiceResult + + original = Summary( + meeting_id=MeetingId(uuid4()), + executive_summary="Original", + key_points=[KeyPoint(text="Point", segment_ids=[99])], + ) + filtered = Summary( + meeting_id=original.meeting_id, + executive_summary="Original", + key_points=[KeyPoint(text="Point", segment_ids=[])], + ) + result = SummarizationServiceResult( + result=SummarizationResult( + summary=original, + model_name="test", + provider_name="test", + ), + filtered_summary=filtered, + ) + + assert result.summary is filtered + + def test_summary_returns_original_when_no_filter(self) -> None: + """summary property should return original when no filter applied.""" + from noteflow.application.services import SummarizationServiceResult + + original = Summary( + meeting_id=MeetingId(uuid4()), + executive_summary="Original", + key_points=[], + ) + result = SummarizationServiceResult( + result=SummarizationResult( + summary=original, + model_name="test", + provider_name="test", + ), + ) + + assert result.summary is original + + def test_has_invalid_citations_true(self) -> None: + """has_invalid_citations should be True when verification fails.""" + from noteflow.application.services import SummarizationServiceResult + + result = SummarizationServiceResult( + result=SummarizationResult( + summary=Summary( + meeting_id=MeetingId(uuid4()), + executive_summary="Test", + key_points=[], + ), + model_name="test", + provider_name="test", + ), + verification=CitationVerificationResult(is_valid=False, invalid_key_point_indices=(0,)), + ) + + assert result.has_invalid_citations is True + + def test_has_invalid_citations_false_when_valid(self) -> None: + """has_invalid_citations should be False when verification passes.""" + from noteflow.application.services import SummarizationServiceResult + + result = SummarizationServiceResult( + result=SummarizationResult( + summary=Summary( + meeting_id=MeetingId(uuid4()), + executive_summary="Test", + key_points=[], + ), + model_name="test", + provider_name="test", + ), + verification=CitationVerificationResult(is_valid=True), + ) + + assert result.has_invalid_citations is False + + +class TestSummarizationServiceAdditionalBranches: + """Additional branch and utility coverage.""" + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Create test meeting ID.""" + return MeetingId(uuid4()) + + def test_is_mode_available_false_when_not_registered(self) -> None: + """is_mode_available should respect registered providers.""" + service = SummarizationService() + assert service.is_mode_available(SummarizationMode.LOCAL) is False + + @pytest.mark.asyncio + async def test_cloud_without_consent_and_no_fallback_raises( + self, meeting_id: MeetingId + ) -> None: + """Requesting cloud without consent should raise when fallback disabled.""" + provider = MockProvider(name="cloud", available=True) + service = SummarizationService( + providers={SummarizationMode.CLOUD: provider}, + settings=SummarizationServiceSettings( + default_mode=SummarizationMode.CLOUD, + cloud_consent_granted=False, + fallback_to_local=False, + ), + ) + + with pytest.raises(ProviderUnavailableError): + await service.summarize(meeting_id, [_segment(0)], mode=SummarizationMode.CLOUD) + + @pytest.mark.asyncio + async def test_no_fallback_provider_available_raises(self, meeting_id: MeetingId) -> None: + """When no fallback provider exists, provider selection should fail.""" + unavailable = MockProvider(name="cloud", available=False) + service = SummarizationService( + providers={SummarizationMode.CLOUD: unavailable}, + settings=SummarizationServiceSettings(fallback_to_local=True), + ) + + with pytest.raises(ProviderUnavailableError): + await service.summarize(meeting_id, [_segment(0)], mode=SummarizationMode.CLOUD) + + def test_filter_citations_returns_summary_when_no_verifier(self) -> None: + """_filter_citations should return original summary when verifier is absent.""" + summary = Summary( + meeting_id=MeetingId(uuid4()), + executive_summary="Exec", + generated_at=datetime.now(UTC), + ) + service = SummarizationService() + + result = service._filter_citations(summary, []) + + assert result is summary + + def test_set_default_mode_updates_settings(self) -> None: + """set_default_mode should update default mode.""" + service = SummarizationService() + service.set_default_mode(SummarizationMode.MOCK) + assert service.settings.default_mode == SummarizationMode.MOCK + + def test_set_persist_callback_updates_callback(self) -> None: + """set_persist_callback should update on_persist field.""" + + async def callback(summary: Summary) -> None: + pass + + service = SummarizationService() + assert service.on_persist is None + + service.set_persist_callback(callback) + assert service.on_persist is callback + + service.set_persist_callback(None) + assert service.on_persist is None +```` + +## File: tests/application/test_trigger_service.py +````python +"""Tests for TriggerService application logic.""" + +from __future__ import annotations + +import time +from dataclasses import dataclass + +import pytest + +from noteflow.application.services.trigger_service import ( + TriggerService, + TriggerServiceSettings, +) +from noteflow.domain.triggers import TriggerAction, TriggerSignal, TriggerSource + + +@dataclass +class FakeProvider: + """Simple signal provider for testing.""" + + signal: TriggerSignal | None + enabled: bool = True + calls: int = 0 + + @property + def source(self) -> TriggerSource: + return TriggerSource.AUDIO_ACTIVITY + + @property + def max_weight(self) -> float: + return 1.0 + + def is_enabled(self) -> bool: + return self.enabled + + def get_signal(self) -> TriggerSignal | None: + self.calls += 1 + return self.signal + + +def _settings( + *, + enabled: bool = True, + auto_start: bool = False, + rate_limit_seconds: int = 60, + snooze_seconds: int = 30, + threshold_ignore: float = 0.2, + threshold_auto: float = 0.8, +) -> TriggerServiceSettings: + return TriggerServiceSettings( + enabled=enabled, + auto_start_enabled=auto_start, + rate_limit_seconds=rate_limit_seconds, + snooze_seconds=snooze_seconds, + threshold_ignore=threshold_ignore, + threshold_auto_start=threshold_auto, + ) + + +def test_trigger_service_disabled_skips_providers() -> None: + """Disabled trigger service should ignore without evaluating providers.""" + provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5)) + service = TriggerService([provider], settings=_settings(enabled=False)) + + decision = service.evaluate() + + assert decision.action == TriggerAction.IGNORE + assert decision.confidence == 0.0 + assert decision.signals == () + assert provider.calls == 0 + + +def test_trigger_service_snooze_ignores_signals(monkeypatch: pytest.MonkeyPatch) -> None: + """Snoozed trigger service ignores signals until snooze expires.""" + provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5)) + service = TriggerService([provider], settings=_settings()) + + monkeypatch.setattr(time, "monotonic", lambda: 100.0) + service.snooze(seconds=20) + + monkeypatch.setattr(time, "monotonic", lambda: 110.0) + decision = service.evaluate() + assert decision.action == TriggerAction.IGNORE + + monkeypatch.setattr(time, "monotonic", lambda: 130.0) + decision = service.evaluate() + assert decision.action == TriggerAction.NOTIFY + + +def test_trigger_service_rate_limit(monkeypatch: pytest.MonkeyPatch) -> None: + """TriggerService enforces rate limit between prompts.""" + provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5)) + service = TriggerService([provider], settings=_settings(rate_limit_seconds=60)) + + monkeypatch.setattr(time, "monotonic", lambda: 100.0) + first = service.evaluate() + assert first.action == TriggerAction.NOTIFY + + monkeypatch.setattr(time, "monotonic", lambda: 120.0) + second = service.evaluate() + assert second.action == TriggerAction.IGNORE + + monkeypatch.setattr(time, "monotonic", lambda: 200.0) + third = service.evaluate() + assert third.action == TriggerAction.NOTIFY + + +def test_trigger_service_auto_start(monkeypatch: pytest.MonkeyPatch) -> None: + """Auto-start fires when confidence passes threshold and auto-start is enabled.""" + provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9)) + service = TriggerService([provider], settings=_settings(auto_start=True, threshold_auto=0.8)) + + monkeypatch.setattr(time, "monotonic", lambda: 100.0) + decision = service.evaluate() + + assert decision.action == TriggerAction.AUTO_START + + +def test_trigger_service_auto_start_disabled_notifies(monkeypatch: pytest.MonkeyPatch) -> None: + """High confidence should still notify when auto-start is disabled.""" + provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9)) + service = TriggerService([provider], settings=_settings(auto_start=False, threshold_auto=0.8)) + + monkeypatch.setattr(time, "monotonic", lambda: 100.0) + decision = service.evaluate() + + assert decision.action == TriggerAction.NOTIFY + + +def test_trigger_service_below_ignore_threshold(monkeypatch: pytest.MonkeyPatch) -> None: + """Signals below ignore threshold should be ignored.""" + provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.1)) + service = TriggerService([provider], settings=_settings(threshold_ignore=0.2)) + + monkeypatch.setattr(time, "monotonic", lambda: 100.0) + decision = service.evaluate() + + assert decision.action == TriggerAction.IGNORE + + +def test_trigger_service_threshold_validation() -> None: + """Invalid threshold ordering should raise.""" + with pytest.raises(ValueError, match="threshold_auto_start"): + TriggerServiceSettings( + enabled=True, + auto_start_enabled=False, + rate_limit_seconds=10, + snooze_seconds=5, + threshold_ignore=0.9, + threshold_auto_start=0.2, + ) + + +def test_trigger_service_skips_disabled_providers() -> None: + """Disabled providers should be skipped when evaluating.""" + enabled_signal = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.3)) + disabled_signal = FakeProvider( + signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.7), enabled=False + ) + service = TriggerService([enabled_signal, disabled_signal], settings=_settings()) + + decision = service.evaluate() + + assert decision.confidence == pytest.approx(0.3) + assert enabled_signal.calls == 1 + assert disabled_signal.calls == 0 + + +def test_trigger_service_snooze_state_properties(monkeypatch: pytest.MonkeyPatch) -> None: + """is_snoozed and remaining seconds should reflect snooze window.""" + service = TriggerService([], settings=_settings()) + monkeypatch.setattr(time, "monotonic", lambda: 50.0) + service.snooze(seconds=10) + + monkeypatch.setattr(time, "monotonic", lambda: 55.0) + assert service.is_snoozed is True + assert service.snooze_remaining_seconds == pytest.approx(5.0) + + service.clear_snooze() + assert service.is_snoozed is False + assert service.snooze_remaining_seconds == 0.0 + + +def test_trigger_service_rate_limit_with_existing_prompt(monkeypatch: pytest.MonkeyPatch) -> None: + """Existing prompt time inside rate limit should short-circuit to IGNORE.""" + provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9)) + service = TriggerService([provider], settings=_settings(rate_limit_seconds=30)) + + monkeypatch.setattr(time, "monotonic", lambda: 100.0) + service._last_prompt = 90.0 # Pretend we prompted 10s ago + decision = service.evaluate() + + assert decision.action == TriggerAction.IGNORE + assert service.is_enabled is True + + +def test_trigger_service_enable_toggles() -> None: + """set_enabled and set_auto_start should update settings.""" + service = TriggerService([], settings=_settings(enabled=True, auto_start=False)) + + service.set_enabled(False) + assert service.is_enabled is False + + service.set_auto_start(True) + assert service._settings.auto_start_enabled is True +```` + +## File: tests/client/test_async_mixin.py +````python +"""Tests for AsyncOperationMixin.""" + +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from noteflow.client.components._async_mixin import AsyncOperationMixin + + +class ConcreteComponent(AsyncOperationMixin[str]): + """Concrete implementation for testing.""" + + def __init__(self, page: MagicMock | None = None) -> None: + self._page = page + + +class TestAsyncOperationMixin: + """Tests for AsyncOperationMixin.""" + + @pytest.fixture + def mock_page(self) -> MagicMock: + """Create mock Flet page.""" + page = MagicMock() + + def _run_task(fn): + try: + loop = asyncio.get_running_loop() + return loop.create_task(fn()) + except RuntimeError: + # No running loop (sync tests); run immediately + return asyncio.run(fn()) + + page.run_task = MagicMock(side_effect=_run_task) + return page + + @pytest.fixture + def component(self, mock_page: MagicMock) -> ConcreteComponent: + """Create component with mock page.""" + return ConcreteComponent(page=mock_page) + + @pytest.mark.asyncio + async def test_run_async_operation_success_calls_callbacks( + self, component: ConcreteComponent + ) -> None: + """Successful operation calls on_success and set_loading.""" + operation = AsyncMock(return_value="result") + on_success = MagicMock() + on_error = MagicMock() + set_loading = MagicMock() + + result = await component.run_async_operation( + operation=operation, + on_success=on_success, + on_error=on_error, + set_loading=set_loading, + ) + + await asyncio.sleep(0) + + assert result == "result" + operation.assert_awaited_once() + on_success.assert_called_once_with("result") + on_error.assert_not_called() + # Loading: True then False + assert set_loading.call_count == 2 + set_loading.assert_any_call(True) + set_loading.assert_any_call(False) + + @pytest.mark.asyncio + async def test_run_async_operation_error_calls_on_error( + self, component: ConcreteComponent + ) -> None: + """Failed operation calls on_error and returns None.""" + operation = AsyncMock(side_effect=ValueError("test error")) + on_success = MagicMock() + on_error = MagicMock() + set_loading = MagicMock() + + result = await component.run_async_operation( + operation=operation, + on_success=on_success, + on_error=on_error, + set_loading=set_loading, + ) + + await asyncio.sleep(0) + + assert result is None + on_success.assert_not_called() + on_error.assert_called_once_with("test error") + # Loading: True then False (finally block) + assert set_loading.call_count == 2 + + @pytest.mark.asyncio + async def test_run_async_operation_always_clears_loading( + self, component: ConcreteComponent + ) -> None: + """Loading state always cleared in finally block.""" + operation = AsyncMock(side_effect=RuntimeError("boom")) + set_loading = MagicMock() + + await component.run_async_operation( + operation=operation, + on_success=MagicMock(), + on_error=MagicMock(), + set_loading=set_loading, + ) + + await asyncio.sleep(0) + + # Final call should be set_loading(False) + assert set_loading.call_args_list[-1][0][0] is False + + def test_dispatch_ui_no_page_is_noop(self) -> None: + """Dispatch with no page does nothing.""" + component = ConcreteComponent(page=None) + callback = MagicMock() + + # Should not raise + component._dispatch_ui(callback) + + callback.assert_not_called() + + def test_dispatch_ui_with_page_calls_run_task( + self, component: ConcreteComponent, mock_page: MagicMock + ) -> None: + """Dispatch with page calls page.run_task.""" + callback = MagicMock() + + component._dispatch_ui(callback) + + mock_page.run_task.assert_called_once() + callback.assert_called_once() +```` + +## File: tests/client/test_summary_panel.py +````python +"""Tests for SummaryPanelComponent.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from unittest.mock import Mock +from uuid import uuid4 + +import flet as ft +import pytest + +from noteflow.client.components.summary_panel import ( + PRIORITY_COLORS, + PRIORITY_LABELS, + SummaryPanelComponent, +) +from noteflow.domain.entities import ActionItem, KeyPoint, Summary +from noteflow.domain.value_objects import MeetingId + + +@dataclass +class MockAppState: + """Minimal mock AppState for testing.""" + + transcript_segments: list = field(default_factory=list) + current_meeting: Mock | None = None + current_summary: Summary | None = None + summary_loading: bool = False + summary_error: str | None = None + _page: Mock | None = None + + def request_update(self) -> None: + """No-op for tests.""" + + def run_on_ui_thread(self, callback) -> None: + """Execute callback immediately for tests.""" + callback() if callable(callback) else None + + +def _create_mock_state() -> MockAppState: + """Create mock AppState with meeting.""" + state = MockAppState() + state.current_meeting = Mock() + state.current_meeting.id = str(uuid4()) + return state + + +def _create_summary( + key_points: list[KeyPoint] | None = None, + action_items: list[ActionItem] | None = None, +) -> Summary: + """Create test Summary.""" + return Summary( + meeting_id=MeetingId(uuid4()), + executive_summary="Test executive summary.", + key_points=key_points or [], + action_items=action_items or [], + ) + + +class TestSummaryPanelBuild: + """Tests for SummaryPanelComponent.build().""" + + def test_build_returns_container(self) -> None: + """build() should return ft.Container.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + result = panel.build() + + assert isinstance(result, ft.Container) + + def test_build_initially_hidden(self) -> None: + """Panel should be hidden by default.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + container = panel.build() + + assert container.visible is False + + def test_build_creates_ui_elements(self) -> None: + """build() should create all UI elements.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + panel.build() + + assert panel._summary_text is not None + assert panel._key_points_list is not None + assert panel._action_items_list is not None + assert panel._generate_btn is not None + assert panel._loading_indicator is not None + assert panel._error_text is not None + + +class TestSummaryPanelVisibility: + """Tests for visibility control.""" + + def test_set_visible_shows_panel(self) -> None: + """set_visible(True) should show panel.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + + panel.set_visible(True) + + assert panel._container is not None + assert panel._container.visible is True + + def test_set_visible_hides_panel(self) -> None: + """set_visible(False) should hide panel.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + panel.set_visible(True) + + panel.set_visible(False) + + assert panel._container is not None + assert panel._container.visible is False + + +class TestSummaryPanelEnabled: + """Tests for enabled state control.""" + + def test_set_enabled_enables_button(self) -> None: + """set_enabled(True) should enable generate button.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + + panel.set_enabled(True) + + assert panel._generate_btn is not None + assert panel._generate_btn.disabled is False + + def test_set_enabled_disables_button(self) -> None: + """set_enabled(False) should disable generate button.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + panel.set_enabled(True) + + panel.set_enabled(False) + + assert panel._generate_btn is not None + assert panel._generate_btn.disabled is True + + +class TestSummaryPanelRender: + """Tests for rendering summary content.""" + + def test_render_summary_shows_executive_summary(self) -> None: + """_render_summary should display executive summary text.""" + state = _create_mock_state() + state.current_summary = _create_summary() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + + panel._render_summary() + + assert panel._summary_text is not None + assert panel._summary_text.value == "Test executive summary." + + def test_render_summary_populates_key_points(self) -> None: + """_render_summary should populate key points list.""" + state = _create_mock_state() + state.current_summary = _create_summary( + key_points=[ + KeyPoint(text="Point 1", segment_ids=[0]), + KeyPoint(text="Point 2", segment_ids=[1]), + ] + ) + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + + panel._render_summary() + + assert panel._key_points_list is not None + assert len(panel._key_points_list.controls) == 2 + + def test_render_summary_populates_action_items(self) -> None: + """_render_summary should populate action items list.""" + state = _create_mock_state() + state.current_summary = _create_summary( + action_items=[ + ActionItem(text="Action 1", segment_ids=[0], priority=1), + ActionItem(text="Action 2", segment_ids=[1], priority=2, assignee="Alice"), + ] + ) + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + + panel._render_summary() + + assert panel._action_items_list is not None + assert len(panel._action_items_list.controls) == 2 + + +class TestCitationChips: + """Tests for citation chip functionality.""" + + def test_create_citation_chip_returns_container(self) -> None: + """_create_citation_chip should return Container.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + chip = panel._create_citation_chip(5) + + assert isinstance(chip, ft.Container) + + def test_citation_chip_has_correct_label(self) -> None: + """Citation chip should display [#N] format.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + chip = panel._create_citation_chip(42) + text = chip.content + + assert isinstance(text, ft.Text) + assert text.value == "[#42]" + + def test_citation_chip_click_calls_callback(self) -> None: + """Clicking citation chip should call on_citation_click.""" + clicked_ids: list[int] = [] + state = _create_mock_state() + panel = SummaryPanelComponent( + state, + get_service=lambda: None, + on_citation_click=lambda sid: clicked_ids.append(sid), + ) + + panel._handle_citation_click(7) + + assert clicked_ids == [7] + + def test_citation_click_no_callback_is_noop(self) -> None: + """Citation click with no callback should not raise.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None, on_citation_click=None) + + panel._handle_citation_click(5) # Should not raise + + +class TestPriorityBadge: + """Tests for priority badge functionality.""" + + @pytest.mark.parametrize( + ("priority", "expected_label"), + [ + (0, "—"), + (1, "Low"), + (2, "Med"), + (3, "High"), + ], + ) + def test_priority_badge_labels(self, priority: int, expected_label: str) -> None: + """Priority badge should show correct label.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + badge = panel._create_priority_badge(priority) + text = badge.content + + assert isinstance(text, ft.Text) + assert text.value == expected_label + + @pytest.mark.parametrize( + ("priority", "expected_color"), + [ + (0, ft.Colors.GREY_400), + (1, ft.Colors.BLUE_400), + (2, ft.Colors.ORANGE_400), + (3, ft.Colors.RED_400), + ], + ) + def test_priority_badge_colors(self, priority: int, expected_color: str) -> None: + """Priority badge should have correct background color.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + badge = panel._create_priority_badge(priority) + + assert badge.bgcolor == expected_color + + +class TestLoadingAndError: + """Tests for loading and error states.""" + + def test_update_loading_state_shows_indicator(self) -> None: + """Loading indicator should be visible when loading.""" + state = _create_mock_state() + state.summary_loading = True + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + + panel._update_loading_state() + + assert panel._loading_indicator is not None + assert panel._generate_btn is not None + assert panel._loading_indicator.visible is True + assert panel._generate_btn.disabled is True + + def test_update_loading_state_hides_indicator(self) -> None: + """Loading indicator should be hidden when not loading.""" + state = _create_mock_state() + state.summary_loading = False + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + assert panel._loading_indicator is not None + panel._loading_indicator.visible = True + + panel._update_loading_state() + + assert not panel._loading_indicator.visible + + def test_show_error_displays_message(self) -> None: + """_show_error should display error message.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + + panel._show_error("Test error message") + + assert panel._error_text is not None + assert panel._error_text.value == "Test error message" + assert panel._error_text.visible is True + + def test_clear_error_hides_message(self) -> None: + """_clear_error should hide error message.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + panel._show_error("Error") + + panel._clear_error() + + assert panel._error_text is not None + assert panel._error_text.value == "" + assert panel._error_text.visible is False + + +class TestPriorityConstants: + """Tests for priority constant values.""" + + def test_priority_colors_has_all_levels(self) -> None: + """PRIORITY_COLORS should have entries for all priority levels.""" + assert 0 in PRIORITY_COLORS + assert 1 in PRIORITY_COLORS + assert 2 in PRIORITY_COLORS + assert 3 in PRIORITY_COLORS + + def test_priority_labels_has_all_levels(self) -> None: + """PRIORITY_LABELS should have entries for all priority levels.""" + assert 0 in PRIORITY_LABELS + assert 1 in PRIORITY_LABELS + assert 2 in PRIORITY_LABELS + assert 3 in PRIORITY_LABELS + + +class TestUncitedDraftsToggle: + """Tests for uncited drafts toggle functionality.""" + + def test_build_creates_toggle_ui(self) -> None: + """build() should create uncited toggle and count text.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + panel.build() + + assert panel._uncited_toggle is not None + assert panel._uncited_count_text is not None + + def test_toggle_initially_hidden(self) -> None: + """Uncited toggle should be hidden by default.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + + assert panel._uncited_toggle is not None + assert panel._uncited_toggle.visible is False + + def test_calculate_uncited_counts_with_no_summaries(self) -> None: + """Uncited counts should be zero when no summaries.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + panel._calculate_uncited_counts() + + assert panel._uncited_key_points == 0 + assert panel._uncited_action_items == 0 + + def test_calculate_uncited_counts_with_filtered_items(self) -> None: + """Uncited counts should reflect difference between original and filtered.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + + # Original has 3 key points + panel._original_summary = _create_summary( + key_points=[ + KeyPoint(text="Point 1", segment_ids=[0]), + KeyPoint(text="Point 2", segment_ids=[1]), + KeyPoint(text="Point 3", segment_ids=[]), # uncited + ], + action_items=[ + ActionItem(text="Action 1", segment_ids=[0]), + ActionItem(text="Action 2", segment_ids=[]), # uncited + ], + ) + # Filtered has 2 key points (1 filtered out) + panel._filtered_summary = _create_summary( + key_points=[ + KeyPoint(text="Point 1", segment_ids=[0]), + KeyPoint(text="Point 2", segment_ids=[1]), + ], + action_items=[ + ActionItem(text="Action 1", segment_ids=[0]), + ], + ) + + panel._calculate_uncited_counts() + + assert panel._uncited_key_points == 1 + assert panel._uncited_action_items == 1 + + def test_has_uncited_items_true_when_filtered(self) -> None: + """_has_uncited_items should return True when items filtered.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel._uncited_key_points = 2 + panel._uncited_action_items = 0 + + assert panel._has_uncited_items() is True + + def test_has_uncited_items_false_when_none_filtered(self) -> None: + """_has_uncited_items should return False when nothing filtered.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel._uncited_key_points = 0 + panel._uncited_action_items = 0 + + assert panel._has_uncited_items() is False + + def test_update_uncited_ui_shows_toggle_when_uncited(self) -> None: + """Toggle should be visible when uncited items exist.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + panel._uncited_key_points = 2 + panel._uncited_action_items = 1 + + panel._update_uncited_ui() + + assert panel._uncited_toggle is not None + assert panel._uncited_toggle.visible is True + + def test_update_uncited_ui_hides_toggle_when_no_uncited(self) -> None: + """Toggle should be hidden when no uncited items.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + panel._uncited_key_points = 0 + panel._uncited_action_items = 0 + + panel._update_uncited_ui() + + assert panel._uncited_toggle is not None + assert panel._uncited_toggle.visible is False + + def test_update_uncited_ui_shows_count_text(self) -> None: + """Count text should show total uncited when toggle is off.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + panel._uncited_key_points = 2 + panel._uncited_action_items = 3 + panel._show_uncited = False + + panel._update_uncited_ui() + + assert panel._uncited_count_text is not None + assert panel._uncited_count_text.visible is True + assert panel._uncited_count_text.value == "(5 hidden)" + + def test_update_uncited_ui_hides_count_when_showing_uncited(self) -> None: + """Count text should be hidden when showing uncited items.""" + state = _create_mock_state() + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + panel._uncited_key_points = 2 + panel._uncited_action_items = 0 + panel._show_uncited = True + + panel._update_uncited_ui() + + assert panel._uncited_count_text is not None + assert panel._uncited_count_text.visible is False + + def test_get_display_summary_returns_original_when_toggled(self) -> None: + """_get_display_summary should return original when showing uncited.""" + state = _create_mock_state() + original = _create_summary(key_points=[KeyPoint(text="Original", segment_ids=[])]) + filtered = _create_summary(key_points=[]) + state.current_summary = filtered + + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel._original_summary = original + panel._filtered_summary = filtered + panel._show_uncited = True + + result = panel._get_display_summary() + + assert result is original + + def test_get_display_summary_returns_current_when_not_toggled(self) -> None: + """_get_display_summary should return current_summary when toggle off.""" + state = _create_mock_state() + original = _create_summary(key_points=[KeyPoint(text="Original", segment_ids=[])]) + filtered = _create_summary(key_points=[]) + state.current_summary = filtered + + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel._original_summary = original + panel._filtered_summary = filtered + panel._show_uncited = False + + result = panel._get_display_summary() + + assert result is filtered + + def test_render_summary_switches_on_toggle(self) -> None: + """Rendering should switch content based on toggle state.""" + state = _create_mock_state() + original = _create_summary( + key_points=[ + KeyPoint(text="Point 1", segment_ids=[0]), + KeyPoint(text="Uncited", segment_ids=[]), + ] + ) + filtered = _create_summary(key_points=[KeyPoint(text="Point 1", segment_ids=[0])]) + state.current_summary = filtered + + panel = SummaryPanelComponent(state, get_service=lambda: None) + panel.build() + panel._original_summary = original + panel._filtered_summary = filtered + panel._uncited_key_points = 1 + + # First render with toggle off + panel._show_uncited = False + panel._render_summary() + assert panel._key_points_list is not None + assert len(panel._key_points_list.controls) == 1 + + # Toggle on and re-render + panel._show_uncited = True + panel._render_summary() + assert len(panel._key_points_list.controls) == 2 +```` + +## File: tests/client/test_transcript_component.py +````python +"""Tests for TranscriptComponent including partial rendering.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING +from unittest.mock import MagicMock + +import flet as ft + +if TYPE_CHECKING: + from collections.abc import Callable + +from noteflow.client.components.transcript import TranscriptComponent + + +@dataclass +class MockTranscriptSegment: + """Mock TranscriptSegment for testing.""" + + text: str + start_time: float + end_time: float + is_final: bool = True + speaker_id: str = "" + speaker_confidence: float = 0.0 + + +@dataclass +class MockServerInfo: + """Mock ServerInfo for testing.""" + + version: str = "1.0.0" + asr_model: str = "base" + asr_ready: bool = True + active_meetings: int = 0 + + +@dataclass +class MockAppState: + """Minimal mock AppState for testing transcript component.""" + + transcript_segments: list[MockTranscriptSegment] = field(default_factory=list) + current_partial_text: str = "" + _page: MagicMock | None = None + + def request_update(self) -> None: + """No-op for tests.""" + + def run_on_ui_thread(self, callback: Callable[[], None]) -> None: + """Execute callback immediately for tests.""" + callback() + + def clear_transcript(self) -> None: + """Clear transcript segments and partial text.""" + self.transcript_segments.clear() + self.current_partial_text = "" + + +class TestTranscriptComponentBuild: + """Tests for TranscriptComponent.build().""" + + def test_build_returns_column(self) -> None: + """build() should return ft.Column.""" + state = MockAppState() + component = TranscriptComponent(state) + + result = component.build() + + assert isinstance(result, ft.Column) + + def test_build_creates_search_field(self) -> None: + """build() should create search field.""" + state = MockAppState() + component = TranscriptComponent(state) + + component.build() + + assert component._search_field is not None + assert isinstance(component._search_field, ft.TextField) + + def test_build_creates_list_view(self) -> None: + """build() should create ListView.""" + state = MockAppState() + component = TranscriptComponent(state) + + component.build() + + assert component._list_view is not None + assert isinstance(component._list_view, ft.ListView) + + +class TestTranscriptPartialRendering: + """Tests for partial transcript rendering.""" + + def test_add_partial_segment_updates_state(self) -> None: + """Adding partial segment should update state partial text.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + partial = MockTranscriptSegment( + text="Hello, I am speaking...", + start_time=0.0, + end_time=1.0, + is_final=False, + ) + component.add_segment(partial) + + assert state.current_partial_text == "Hello, I am speaking..." + + def test_add_partial_creates_partial_row(self) -> None: + """Adding partial segment should create partial row in ListView.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + partial = MockTranscriptSegment( + text="Speaking now...", + start_time=0.0, + end_time=1.0, + is_final=False, + ) + component.add_segment(partial) + + assert component._partial_row is not None + assert component._list_view is not None + assert component._partial_row in component._list_view.controls + + def test_partial_row_has_live_indicator(self) -> None: + """Partial row should contain [LIVE] indicator.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + partial = MockTranscriptSegment( + text="Testing...", + start_time=0.0, + end_time=1.0, + is_final=False, + ) + component.add_segment(partial) + + # Check that partial row content contains LIVE indicator + assert component._partial_row is not None + partial_content = component._partial_row.content + assert isinstance(partial_content, ft.Row) + # First element should be the LIVE text + live_text = partial_content.controls[0] + assert isinstance(live_text, ft.Text) + assert live_text.value is not None + assert "[LIVE]" in live_text.value + + def test_partial_row_has_italic_styling(self) -> None: + """Partial row text should be italicized.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + partial = MockTranscriptSegment( + text="Testing...", + start_time=0.0, + end_time=1.0, + is_final=False, + ) + component.add_segment(partial) + + assert component._partial_row is not None + partial_content = component._partial_row.content + assert isinstance(partial_content, ft.Row) + text_element = partial_content.controls[1] + assert isinstance(text_element, ft.Text) + assert text_element.italic is True + + def test_partial_row_updated_on_new_partial(self) -> None: + """Subsequent partials should update existing row, not create new.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + # First partial + component.add_segment( + MockTranscriptSegment(text="First", start_time=0.0, end_time=1.0, is_final=False) + ) + first_row = component._partial_row + assert component._list_view is not None + initial_count = len(component._list_view.controls) + + # Second partial + component.add_segment( + MockTranscriptSegment(text="Second", start_time=1.0, end_time=2.0, is_final=False) + ) + + # Should update same row, not add new + assert component._partial_row is first_row + assert component._list_view is not None + assert len(component._list_view.controls) == initial_count + + +class TestTranscriptFinalSegment: + """Tests for final segment handling.""" + + def test_add_final_segment_clears_partial_text(self) -> None: + """Adding final segment should clear partial text state.""" + state = MockAppState() + state.current_partial_text = "Partial text..." + component = TranscriptComponent(state) + component.build() + + final = MockTranscriptSegment( + text="Final transcript.", + start_time=0.0, + end_time=2.0, + is_final=True, + ) + component.add_segment(final) + + assert not state.current_partial_text + + def test_add_final_removes_partial_row(self) -> None: + """Adding final segment should remove partial row.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + # Add partial first + partial = MockTranscriptSegment( + text="Speaking...", + start_time=0.0, + end_time=1.0, + is_final=False, + ) + component.add_segment(partial) + assert component._partial_row is not None + + # Add final + final = MockTranscriptSegment( + text="Final text.", + start_time=0.0, + end_time=2.0, + is_final=True, + ) + component.add_segment(final) + + # Partial row should be removed + assert component._partial_row is None + + def test_add_final_appends_to_segments(self) -> None: + """Adding final segment should append to state transcript_segments.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + final = MockTranscriptSegment( + text="Final text.", + start_time=0.0, + end_time=2.0, + is_final=True, + ) + component.add_segment(final) + + assert len(state.transcript_segments) == 1 + assert state.transcript_segments[0].text == "Final text." + + +class TestTranscriptClear: + """Tests for transcript clearing.""" + + def test_clear_removes_partial_row(self) -> None: + """clear() should remove partial row.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + # Add partial + partial = MockTranscriptSegment( + text="Partial...", + start_time=0.0, + end_time=1.0, + is_final=False, + ) + component.add_segment(partial) + + component.clear() + + assert component._partial_row is None + + def test_clear_empties_list_view(self) -> None: + """clear() should empty ListView controls.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + # Add some segments + component.add_segment( + MockTranscriptSegment(text="First", start_time=0.0, end_time=1.0, is_final=True) + ) + component.add_segment( + MockTranscriptSegment(text="Second", start_time=1.0, end_time=2.0, is_final=True) + ) + + component.clear() + + assert component._list_view is not None + assert len(component._list_view.controls) == 0 + + def test_clear_clears_search_field(self) -> None: + """clear() should clear search field.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + assert component._search_field is not None + component._search_field.value = "test query" + + component.clear() + + assert component._search_field is not None + assert not component._search_field.value + + +class TestTranscriptSearch: + """Tests for transcript search functionality.""" + + def test_search_filters_segments(self) -> None: + """Search should filter visible segments.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + # Add segments to state + state.transcript_segments = [ + MockTranscriptSegment(text="Hello world", start_time=0.0, end_time=1.0), + MockTranscriptSegment(text="Goodbye world", start_time=1.0, end_time=2.0), + MockTranscriptSegment(text="Something else", start_time=2.0, end_time=3.0), + ] + + # Simulate search + component._search_query = "world" + component._rerender_all_segments() + + # Should only show segments containing "world" + visible_count = sum(row is not None for row in component._segment_rows) + assert visible_count == 2 + + def test_search_is_case_insensitive(self) -> None: + """Search should be case-insensitive.""" + state = MockAppState() + component = TranscriptComponent(state) + component.build() + + state.transcript_segments = [ + MockTranscriptSegment(text="Hello WORLD", start_time=0.0, end_time=1.0), + MockTranscriptSegment(text="something else", start_time=1.0, end_time=2.0), + ] + + component._search_query = "world" + component._rerender_all_segments() + + visible_count = sum(row is not None for row in component._segment_rows) + assert visible_count == 1 + + +class TestTranscriptSegmentClick: + """Tests for segment click handling.""" + + def test_click_callback_receives_segment_index(self) -> None: + """Clicking segment should call callback with segment index.""" + clicked_indices: list[int] = [] + state = MockAppState() + component = TranscriptComponent( + state, + on_segment_click=lambda idx: clicked_indices.append(idx), + ) + component.build() + + component._handle_click(5) + + assert clicked_indices == [5] + + def test_click_without_callback_is_noop(self) -> None: + """Click without callback should not raise.""" + state = MockAppState() + component = TranscriptComponent(state, on_segment_click=None) + component.build() + + component._handle_click(3) # Should not raise +```` + +## File: tests/infrastructure/asr/test_engine.py +````python +"""Tests for FasterWhisperEngine behavior without loading models.""" + +from __future__ import annotations + +import sys +import types + +import numpy as np +import pytest + +from noteflow.infrastructure.asr.engine import FasterWhisperEngine + + +class TestFasterWhisperEngine: + """Tests for FasterWhisperEngine.""" + + def test_transcribe_without_load_raises(self) -> None: + """Calling transcribe before load_model raises RuntimeError.""" + engine = FasterWhisperEngine() + audio = np.zeros(1600, dtype=np.float32) + with pytest.raises(RuntimeError, match="Model not loaded"): + list(engine.transcribe(audio)) + + def test_load_invalid_model_size_raises(self) -> None: + """Invalid model size raises ValueError when faster-whisper is available.""" + pytest.importorskip("faster_whisper") + engine = FasterWhisperEngine() + with pytest.raises(ValueError, match="Invalid model size"): + engine.load_model(model_size="not-a-model") + + def test_load_model_with_stub_sets_state(self, monkeypatch: pytest.MonkeyPatch) -> None: + """load_model should set model and size when stubbed module is present.""" + + class DummyModel: + def __init__( + self, model_size: str, device: str, compute_type: str, num_workers: int + ) -> None: + self.args = (model_size, device, compute_type, num_workers) + + fake_module = types.SimpleNamespace(WhisperModel=DummyModel) + monkeypatch.setitem(sys.modules, "faster_whisper", fake_module) + + engine = FasterWhisperEngine(compute_type="float32", device="cpu", num_workers=2) + engine.load_model(model_size="base") + + assert engine.is_loaded is True + assert engine.model_size == "base" + assert engine._model.args == ("base", "cpu", "float32", 2) # type: ignore[attr-defined] + + def test_load_model_wraps_errors(self, monkeypatch: pytest.MonkeyPatch) -> None: + """load_model should surface model construction errors as RuntimeError.""" + + class FailingModel: + def __init__(self, *_: object, **__: object) -> None: + raise ValueError("boom") + + fake_module = types.SimpleNamespace(WhisperModel=FailingModel) + monkeypatch.setitem(sys.modules, "faster_whisper", fake_module) + + engine = FasterWhisperEngine() + with pytest.raises(RuntimeError, match="Failed to load model"): + engine.load_model(model_size="base") + + def test_transcribe_with_stubbed_model(self) -> None: + """transcribe should yield AsrResult objects when model is preset.""" + engine = FasterWhisperEngine() + + class DummyWord: + def __init__(self) -> None: + self.word = "hi" + self.start = 0.0 + self.end = 0.5 + self.probability = 0.9 + + class DummySegment: + def __init__(self) -> None: + self.text = " hi " + self.start = 0.0 + self.end = 1.0 + self.words = [DummyWord()] + self.avg_logprob = -0.1 + self.no_speech_prob = 0.01 + + class DummyInfo: + language = "en" + language_probability = 0.95 + + class DummyModel: + def transcribe(self, audio: np.ndarray, **_: object): + return [DummySegment()], DummyInfo() + + engine._model = DummyModel() + engine._model_size = "base" + + audio = np.zeros(1600, dtype=np.float32) + results = list(engine.transcribe(audio)) + + assert len(results) == 1 + first = results[0] + assert first.text == "hi" + assert first.words[0].word == "hi" + assert engine.is_loaded is True + + engine.unload() + assert engine.is_loaded is False +```` + +## File: tests/infrastructure/audio/test_capture.py +````python +"""Tests for SoundDeviceCapture.""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +from noteflow.infrastructure.audio import SoundDeviceCapture + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + + +class TestSoundDeviceCapture: + """Tests for SoundDeviceCapture class.""" + + @pytest.fixture + def capture(self) -> SoundDeviceCapture: + """Create SoundDeviceCapture instance.""" + return SoundDeviceCapture() + + def test_init_defaults(self, capture: SoundDeviceCapture) -> None: + """Test capture initializes with correct defaults.""" + assert capture.sample_rate == 16000 + assert capture.channels == 1 + assert capture.current_device_id is None + + def test_is_capturing_initially_false(self, capture: SoundDeviceCapture) -> None: + """Test is_capturing returns False when not started.""" + assert capture.is_capturing() is False + + def test_list_devices_returns_list(self, capture: SoundDeviceCapture) -> None: + """Test list_devices returns a list (may be empty in CI).""" + devices = capture.list_devices() + assert isinstance(devices, list) + + def test_get_default_device_returns_device_or_none(self, capture: SoundDeviceCapture) -> None: + """Test get_default_device returns device info or None.""" + device = capture.get_default_device() + # May be None in CI environments without audio + if device is not None: + assert device.device_id >= 0 + assert isinstance(device.name, str) + assert device.channels > 0 + + def test_stop_when_not_capturing_is_safe(self, capture: SoundDeviceCapture) -> None: + """Test stop() is safe to call when not capturing.""" + # Should not raise + capture.stop() + assert capture.is_capturing() is False + + def test_start_when_already_capturing_raises(self, capture: SoundDeviceCapture) -> None: + """Test start() raises if already capturing. + + Note: This test may be skipped in CI without audio devices. + """ + devices = capture.list_devices() + if not devices: + pytest.skip("No audio devices available") + + def dummy_callback(frames: NDArray[np.float32], timestamp: float) -> None: + pass + + try: + capture.start( + device_id=None, + on_frames=dummy_callback, + sample_rate=16000, + channels=1, + ) + + # Second start should raise + with pytest.raises(RuntimeError, match="Already capturing"): + capture.start( + device_id=None, + on_frames=dummy_callback, + ) + finally: + capture.stop() + + def test_properties_after_start(self, capture: SoundDeviceCapture) -> None: + """Test properties reflect configured values after start. + + Note: This test may be skipped in CI without audio devices. + """ + devices = capture.list_devices() + if not devices: + pytest.skip("No audio devices available") + + def dummy_callback(frames: NDArray[np.float32], timestamp: float) -> None: + pass + + try: + capture.start( + device_id=None, + on_frames=dummy_callback, + sample_rate=44100, + channels=1, + ) + + assert capture.sample_rate == 44100 + assert capture.channels == 1 + assert capture.is_capturing() is True + finally: + capture.stop() + + def test_start_with_stubbed_stream_invokes_callback( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """start should configure and invoke callback when stream is stubbed.""" + captured: list[np.ndarray] = [] + + class DummyStream: + def __init__(self, *, callback, **_: object) -> None: + self.callback = callback + self.active = False + + def start(self) -> None: + self.active = True + data = np.zeros((4, 1), dtype=np.float32) + self.callback(data, len(data), None, 0) + + def stop(self) -> None: + self.active = False + + def close(self) -> None: + self.active = False + + monkeypatch.setattr( + "noteflow.infrastructure.audio.capture.sd.InputStream", + DummyStream, + ) + monkeypatch.setattr( + "noteflow.infrastructure.audio.capture.sd.PortAudioError", + RuntimeError, + ) + monkeypatch.setattr( + "noteflow.infrastructure.audio.capture.sd.CallbackFlags", + int, + ) + monkeypatch.setattr( + "noteflow.infrastructure.audio.capture.sd.query_devices", + lambda: [{"name": "Mic", "max_input_channels": 1, "default_samplerate": 16000}], + ) + monkeypatch.setattr( + "noteflow.infrastructure.audio.capture.sd.default", + SimpleNamespace(device=(0, 1)), + ) + + def on_frames(frames: NDArray[np.float32], timestamp: float) -> None: # type: ignore[name-defined] + captured.append(frames) + assert isinstance(timestamp, float) + + capture = SoundDeviceCapture() + capture.start(device_id=None, on_frames=on_frames, sample_rate=16000, channels=1) + + assert captured, "callback should have been invoked" + assert capture.is_capturing() is True + capture.stop() + assert capture.is_capturing() is False + + def test_start_wraps_portaudio_error(self, monkeypatch: pytest.MonkeyPatch) -> None: + """PortAudio errors should be converted to RuntimeError.""" + + class DummyError(Exception): ... + + def failing_stream(**_: object) -> None: + raise DummyError("boom") + + monkeypatch.setattr("noteflow.infrastructure.audio.capture.sd.InputStream", failing_stream) + monkeypatch.setattr("noteflow.infrastructure.audio.capture.sd.PortAudioError", DummyError) + + capture = SoundDeviceCapture() + with pytest.raises(RuntimeError, match="Failed to start audio capture"): + capture.start(device_id=None, on_frames=lambda *_: None) +```` + +## File: tests/infrastructure/summarization/test_cloud_provider.py +````python +"""Tests for cloud summarization provider.""" + +from __future__ import annotations + +import json +import sys +import types +from typing import Any +from uuid import uuid4 + +import pytest + +from noteflow.domain.entities import Segment +from noteflow.domain.summarization import ( + InvalidResponseError, + ProviderUnavailableError, + SummarizationRequest, +) +from noteflow.domain.value_objects import MeetingId +from noteflow.infrastructure.summarization import CloudBackend + + +def _segment( + segment_id: int, + text: str, + start: float = 0.0, + end: float = 5.0, +) -> Segment: + """Create a test segment.""" + return Segment( + segment_id=segment_id, + text=text, + start_time=start, + end_time=end, + ) + + +def _valid_json_response( + summary: str = "Test summary.", + key_points: list[dict[str, Any]] | None = None, + action_items: list[dict[str, Any]] | None = None, +) -> str: + """Build a valid JSON response string.""" + return json.dumps( + { + "executive_summary": summary, + "key_points": key_points or [], + "action_items": action_items or [], + } + ) + + +class TestCloudSummarizerProperties: + """Tests for CloudSummarizer properties.""" + + def test_provider_name_openai(self) -> None: + """Provider name should be 'openai' for OpenAI backend.""" + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(backend=CloudBackend.OPENAI) + assert summarizer.provider_name == "openai" + + def test_provider_name_anthropic(self) -> None: + """Provider name should be 'anthropic' for Anthropic backend.""" + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC) + assert summarizer.provider_name == "anthropic" + + def test_requires_cloud_consent_true(self) -> None: + """Cloud providers should require consent.""" + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer() + assert summarizer.requires_cloud_consent is True + + def test_is_available_with_api_key(self) -> None: + """is_available should be True when API key is provided.""" + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="test-key") + assert summarizer.is_available is True + + def test_is_available_without_api_key(self, monkeypatch: pytest.MonkeyPatch) -> None: + """is_available should be False without API key or env var.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer() + assert summarizer.is_available is False + + def test_is_available_with_openai_env_var(self, monkeypatch: pytest.MonkeyPatch) -> None: + """is_available should be True with OPENAI_API_KEY env var.""" + monkeypatch.setenv("OPENAI_API_KEY", "sk-test") + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(backend=CloudBackend.OPENAI) + assert summarizer.is_available is True + + def test_is_available_with_anthropic_env_var(self, monkeypatch: pytest.MonkeyPatch) -> None: + """is_available should be True with ANTHROPIC_API_KEY env var.""" + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test") + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC) + assert summarizer.is_available is True + + def test_default_model_openai(self) -> None: + """Default model for OpenAI should be gpt-4o-mini.""" + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(backend=CloudBackend.OPENAI) + assert summarizer._model == "gpt-4o-mini" + + def test_default_model_anthropic(self) -> None: + """Default model for Anthropic should be claude-3-haiku.""" + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC) + assert summarizer._model == "claude-3-haiku-20240307" + + def test_custom_model(self) -> None: + """Custom model should override default.""" + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(model="gpt-4-turbo") + assert summarizer._model == "gpt-4-turbo" + + def test_openai_base_url_is_passed(self, monkeypatch: pytest.MonkeyPatch) -> None: + """OPENAI_BASE_URL should be forwarded to the client when provided.""" + captured = {} + + def fake_openai_client(**kwargs: Any) -> types.SimpleNamespace: + captured.update(kwargs) + return types.SimpleNamespace( + chat=types.SimpleNamespace( + completions=types.SimpleNamespace( + create=lambda **_: types.SimpleNamespace( + choices=[ + types.SimpleNamespace( + message=types.SimpleNamespace(content=_valid_json_response()) + ) + ], + usage=None, + ) + ) + ) + ) + + mock_module = types.ModuleType("openai") + mock_module.OpenAI = fake_openai_client + monkeypatch.setitem(sys.modules, "openai", mock_module) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer( + api_key="key", backend=CloudBackend.OPENAI, base_url="https://custom" + ) + # Trigger client creation + _ = summarizer._get_openai_client() + assert captured.get("base_url") == "https://custom" + + +class TestCloudSummarizerOpenAI: + """Tests for CloudSummarizer with OpenAI backend.""" + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Create test meeting ID.""" + return MeetingId(uuid4()) + + @pytest.fixture + def mock_openai(self, monkeypatch: pytest.MonkeyPatch) -> types.ModuleType: + """Mock openai module.""" + + def create_response(content: str, tokens: int = 100) -> types.SimpleNamespace: + """Create mock OpenAI response.""" + return types.SimpleNamespace( + choices=[types.SimpleNamespace(message=types.SimpleNamespace(content=content))], + usage=types.SimpleNamespace(total_tokens=tokens), + ) + + mock_client = types.SimpleNamespace( + chat=types.SimpleNamespace( + completions=types.SimpleNamespace( + create=lambda **_: create_response(_valid_json_response()) + ) + ) + ) + mock_module = types.ModuleType("openai") + mock_module.OpenAI = lambda **_: mock_client + monkeypatch.setitem(sys.modules, "openai", mock_module) + return mock_module + + @pytest.mark.asyncio + async def test_summarize_empty_segments( + self, meeting_id: MeetingId, mock_openai: types.ModuleType + ) -> None: + """Empty segments should return empty summary.""" + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="test-key") + request = SummarizationRequest(meeting_id=meeting_id, segments=[]) + + result = await summarizer.summarize(request) + + assert result.summary.key_points == [] + assert result.summary.action_items == [] + + @pytest.mark.asyncio + async def test_summarize_returns_result( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Summarize should return SummarizationResult.""" + response_content = _valid_json_response( + summary="Project meeting summary.", + key_points=[{"text": "Key point", "segment_ids": [0]}], + action_items=[{"text": "Action", "assignee": "Bob", "priority": 1, "segment_ids": [1]}], + ) + + def create_response(**_: Any) -> types.SimpleNamespace: + return types.SimpleNamespace( + choices=[ + types.SimpleNamespace(message=types.SimpleNamespace(content=response_content)) + ], + usage=types.SimpleNamespace(total_tokens=150), + ) + + mock_client = types.SimpleNamespace( + chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response)) + ) + mock_module = types.ModuleType("openai") + mock_module.OpenAI = lambda **_: mock_client + monkeypatch.setitem(sys.modules, "openai", mock_module) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.OPENAI) + segments = [_segment(0, "Key point"), _segment(1, "Action item")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert result.provider_name == "openai" + assert result.summary.executive_summary == "Project meeting summary." + assert result.tokens_used == 150 + + @pytest.mark.asyncio + async def test_raises_unavailable_on_auth_error( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Should raise ProviderUnavailableError on auth failure.""" + + def raise_auth_error(**_: Any) -> None: + raise ValueError("Invalid API key provided") + + mock_client = types.SimpleNamespace( + chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=raise_auth_error)) + ) + mock_module = types.ModuleType("openai") + mock_module.OpenAI = lambda **_: mock_client + monkeypatch.setitem(sys.modules, "openai", mock_module) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="bad-key") + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + with pytest.raises(ProviderUnavailableError, match="authentication failed"): + await summarizer.summarize(request) + + @pytest.mark.asyncio + async def test_raises_invalid_response_on_empty_content( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Should raise InvalidResponseError on empty response.""" + + def create_empty_response(**_: Any) -> types.SimpleNamespace: + return types.SimpleNamespace( + choices=[types.SimpleNamespace(message=types.SimpleNamespace(content=""))], + usage=None, + ) + + mock_client = types.SimpleNamespace( + chat=types.SimpleNamespace( + completions=types.SimpleNamespace(create=create_empty_response) + ) + ) + mock_module = types.ModuleType("openai") + mock_module.OpenAI = lambda **_: mock_client + monkeypatch.setitem(sys.modules, "openai", mock_module) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="test-key") + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + with pytest.raises(InvalidResponseError, match="Empty response"): + await summarizer.summarize(request) + + +class TestCloudSummarizerAnthropic: + """Tests for CloudSummarizer with Anthropic backend.""" + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Create test meeting ID.""" + return MeetingId(uuid4()) + + @pytest.mark.asyncio + async def test_summarize_returns_result( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Summarize should return SummarizationResult.""" + response_content = _valid_json_response( + summary="Anthropic summary.", + key_points=[{"text": "Point", "segment_ids": [0]}], + ) + + def create_response(**_: Any) -> types.SimpleNamespace: + return types.SimpleNamespace( + content=[types.SimpleNamespace(text=response_content)], + usage=types.SimpleNamespace(input_tokens=50, output_tokens=100), + ) + + mock_client = types.SimpleNamespace(messages=types.SimpleNamespace(create=create_response)) + mock_module = types.ModuleType("anthropic") + mock_module.Anthropic = lambda **_: mock_client + monkeypatch.setitem(sys.modules, "anthropic", mock_module) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.ANTHROPIC) + segments = [_segment(0, "Test point")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert result.provider_name == "anthropic" + assert result.summary.executive_summary == "Anthropic summary." + assert result.tokens_used == 150 + + @pytest.mark.asyncio + async def test_raises_unavailable_when_package_missing( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Should raise ProviderUnavailableError when package not installed.""" + monkeypatch.delitem(sys.modules, "anthropic", raising=False) + + import builtins + + original_import = builtins.__import__ + + def mock_import(name: str, *args: Any, **kwargs: Any) -> Any: + if name == "anthropic": + raise ImportError("No module named 'anthropic'") + return original_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", mock_import) + + from noteflow.infrastructure.summarization import cloud_provider + + summarizer = cloud_provider.CloudSummarizer( + api_key="test-key", backend=CloudBackend.ANTHROPIC + ) + summarizer._client = None + + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + with pytest.raises(ProviderUnavailableError, match="anthropic package"): + await summarizer.summarize(request) + + @pytest.mark.asyncio + async def test_raises_invalid_response_on_empty_content( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Should raise InvalidResponseError on empty response.""" + + def create_empty_response(**_: Any) -> types.SimpleNamespace: + return types.SimpleNamespace( + content=[], + usage=types.SimpleNamespace(input_tokens=10, output_tokens=0), + ) + + mock_client = types.SimpleNamespace( + messages=types.SimpleNamespace(create=create_empty_response) + ) + mock_module = types.ModuleType("anthropic") + mock_module.Anthropic = lambda **_: mock_client + monkeypatch.setitem(sys.modules, "anthropic", mock_module) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.ANTHROPIC) + segments = [_segment(0, "Test")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + with pytest.raises(InvalidResponseError, match="Empty response"): + await summarizer.summarize(request) + + +class TestCloudSummarizerFiltering: + """Tests for response filtering in CloudSummarizer.""" + + @pytest.fixture + def meeting_id(self) -> MeetingId: + """Create test meeting ID.""" + return MeetingId(uuid4()) + + @pytest.mark.asyncio + async def test_filters_invalid_segment_ids( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Invalid segment_ids should be filtered from response.""" + response_content = _valid_json_response( + summary="Test", + key_points=[{"text": "Point", "segment_ids": [0, 99, 100]}], + ) + + def create_response(**_: Any) -> types.SimpleNamespace: + return types.SimpleNamespace( + choices=[ + types.SimpleNamespace(message=types.SimpleNamespace(content=response_content)) + ], + usage=None, + ) + + mock_client = types.SimpleNamespace( + chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response)) + ) + mock_module = types.ModuleType("openai") + mock_module.OpenAI = lambda **_: mock_client + monkeypatch.setitem(sys.modules, "openai", mock_module) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="test-key") + segments = [_segment(0, "Only valid segment")] + request = SummarizationRequest(meeting_id=meeting_id, segments=segments) + + result = await summarizer.summarize(request) + + assert result.summary.key_points[0].segment_ids == [0] + + @pytest.mark.asyncio + async def test_respects_max_limits( + self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Max limits should truncate response items.""" + response_content = _valid_json_response( + summary="Test", + key_points=[{"text": f"Point {i}", "segment_ids": [0]} for i in range(10)], + action_items=[{"text": f"Action {i}", "segment_ids": [0]} for i in range(10)], + ) + + def create_response(**_: Any) -> types.SimpleNamespace: + return types.SimpleNamespace( + choices=[ + types.SimpleNamespace(message=types.SimpleNamespace(content=response_content)) + ], + usage=None, + ) + + mock_client = types.SimpleNamespace( + chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response)) + ) + mock_module = types.ModuleType("openai") + mock_module.OpenAI = lambda **_: mock_client + monkeypatch.setitem(sys.modules, "openai", mock_module) + + from noteflow.infrastructure.summarization import CloudSummarizer + + summarizer = CloudSummarizer(api_key="test-key") + segments = [_segment(0, "Test")] + request = SummarizationRequest( + meeting_id=meeting_id, + segments=segments, + max_key_points=2, + max_action_items=3, + ) + + result = await summarizer.summarize(request) + + assert len(result.summary.key_points) == 2 + assert len(result.summary.action_items) == 3 +```` + +## File: tests/integration/conftest.py +````python +"""Pytest fixtures for integration tests.""" + +from __future__ import annotations + +import time +from collections.abc import AsyncGenerator +from importlib import import_module +from typing import TYPE_CHECKING +from urllib.parse import quote + +import pytest +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +if TYPE_CHECKING: + from collections.abc import Self + +from noteflow.infrastructure.persistence.models import Base + + +# Store container reference at module level to reuse +class PgTestContainer: + """Minimal Postgres testcontainer wrapper with custom readiness wait.""" + + def __init__( + self, + image: str = "pgvector/pgvector:pg16", + username: str = "test", + password: str = "test", + dbname: str = "noteflow_test", + port: int = 5432, + ) -> None: + self.username = username + self.password = password + self.dbname = dbname + self.port = port + + container_module = import_module("testcontainers.core.container") + docker_container_cls = container_module.DockerContainer + self._container = ( + docker_container_cls(image) + .with_env("POSTGRES_USER", username) + .with_env("POSTGRES_PASSWORD", password) + .with_env("POSTGRES_DB", dbname) + .with_exposed_ports(port) + ) + + def start(self) -> Self: + """Start the container.""" + self._container.start() + self._wait_until_ready() + return self + + def stop(self) -> None: + """Stop the container.""" + self._container.stop() + + def get_connection_url(self) -> str: + """Return a SQLAlchemy-style connection URL.""" + host = self._container.get_container_host_ip() + port = self._container._get_exposed_port(self.port) + quoted_password = quote(self.password, safe=" +") + return f"postgresql+psycopg2://{self.username}:{quoted_password}@{host}:{port}/{self.dbname}" + + def _wait_until_ready(self, timeout: float = 30.0, interval: float = 0.5) -> None: + """Wait for Postgres to accept connections by running a simple query.""" + start_time = time.time() + escaped_password = self.password.replace("'", "'\"'\"'") + cmd = [ + "sh", + "-c", + ( + f"PGPASSWORD='{escaped_password}' " + f"psql --username {self.username} --dbname {self.dbname} --host 127.0.0.1 " + "-c 'select 1;'" + ), + ] + last_error: str | None = None + + while True: + result = self._container.exec(cmd) + if result.exit_code == 0: + return + if result.output: + last_error = result.output.decode(errors="ignore") + if time.time() - start_time > timeout: + raise TimeoutError( + "Postgres container did not become ready in time" + + (f": {last_error}" if last_error else "") + ) + time.sleep(interval) + + +_container: PgTestContainer | None = None +_database_url: str | None = None + + +def get_or_create_container() -> tuple[PgTestContainer, str]: + """Get or create the PostgreSQL container.""" + global _container, _database_url + + if _container is None: + container = PgTestContainer().start() + _container = container + url = container.get_connection_url() + _database_url = url.replace("postgresql+psycopg2://", "postgresql+asyncpg://") + + assert _container is not None, "Container should be initialized" + assert _database_url is not None, "Database URL should be initialized" + return _container, _database_url + + +@pytest.fixture +async def session_factory() -> AsyncGenerator[async_sessionmaker[AsyncSession], None]: + """Create a session factory and initialize the database schema.""" + _, database_url = get_or_create_container() + + engine = create_async_engine(database_url, echo=False) + + async with engine.begin() as conn: + # Create pgvector extension and schema + await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) + await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) + await conn.execute(text("CREATE SCHEMA noteflow")) + # Create all tables + await conn.run_sync(Base.metadata.create_all) + + yield async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, + autocommit=False, + autoflush=False, + ) + # Cleanup - drop schema to reset for next test + async with engine.begin() as conn: + await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) + + await engine.dispose() + + +@pytest.fixture +async def session( + session_factory: async_sessionmaker[AsyncSession], +) -> AsyncGenerator[AsyncSession, None]: + """Provide a database session for each test.""" + async with session_factory() as session: + yield session + # Rollback any uncommitted changes + await session.rollback() + + +def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: + """Cleanup container after all tests complete.""" + global _container + if _container is not None: + _container.stop() + _container = None +```` + +## File: src/noteflow/client/components/transcript.py +````python +"""Transcript display component with click-to-seek and highlighting. + +Uses TranscriptSegment from grpc.client and format_timestamp from _formatting. +Does not recreate any types - imports and uses existing ones. +""" + +from __future__ import annotations + +import hashlib +from collections.abc import Callable +from typing import TYPE_CHECKING + +import flet as ft + +# REUSE existing formatting - do not recreate +from noteflow.infrastructure.export._formatting import format_timestamp + +if TYPE_CHECKING: + from noteflow.client.state import AppState + + # REUSE existing types - do not recreate + from noteflow.grpc.client import ServerInfo, TranscriptSegment + + +class TranscriptComponent: + """Transcript segment display with click-to-seek, highlighting, and search. + + Uses TranscriptSegment from grpc.client and format_timestamp from _formatting. + """ + + def __init__( + self, + state: AppState, + on_segment_click: Callable[[int], None] | None = None, + ) -> None: + """Initialize transcript component. + + Args: + state: Centralized application state. + on_segment_click: Callback when segment clicked (receives segment index). + """ + self._state = state + self._on_segment_click = on_segment_click + self._list_view: ft.ListView | None = None + self._segment_rows: list[ft.Container | None] = [] # Track rows for highlighting + self._search_field: ft.TextField | None = None + self._search_query: str = "" + self._partial_row: ft.Container | None = None # Live partial at bottom + + def build(self) -> ft.Column: + """Build transcript list view with search. + + Returns: + Column with search field and bordered ListView. + """ + self._search_field = ft.TextField( + label="Search transcript", + prefix_icon=ft.Icons.SEARCH, + on_change=self._on_search_change, + dense=True, + height=40, + ) + + self._list_view = ft.ListView( + spacing=10, + padding=10, + auto_scroll=False, # We control scrolling for sync + height=260, + ) + self._segment_rows.clear() + + return ft.Column( + [ + self._search_field, + ft.Container( + content=self._list_view, + border=ft.border.all(1, ft.Colors.GREY_400), + border_radius=8, + ), + ], + spacing=5, + ) + + def add_segment(self, segment: TranscriptSegment) -> None: + """Add transcript segment to display. + + For final segments, adds to transcript list. + For partials, updates the live partial row at bottom. + + Args: + segment: Transcript segment from server. + """ + if segment.is_final: + # Clear partial text when we get a final + self._state.current_partial_text = "" + self._state.transcript_segments.append(segment) + self._state.run_on_ui_thread(lambda: self._render_final_segment(segment)) + else: + # Update partial text + self._state.current_partial_text = segment.text + self._state.run_on_ui_thread(lambda: self._render_partial(segment.text)) + + def display_server_info(self, info: ServerInfo) -> None: + """Display server info in transcript area. + + Args: + info: Server info from connection. + """ + self._state.run_on_ui_thread(lambda: self._render_server_info(info)) + + def clear(self) -> None: + """Clear all transcript segments and partials.""" + self._state.clear_transcript() + self._segment_rows.clear() + self._partial_row = None + self._search_query = "" + if self._search_field: + self._search_field.value = "" + if self._list_view: + self._list_view.controls.clear() + self._state.request_update() + + def _on_search_change(self, e: ft.ControlEvent) -> None: + """Handle search field change. + + Args: + e: Control event with new search value. + """ + self._search_query = (e.control.value or "").lower() + self._rerender_all_segments() + + def _rerender_all_segments(self) -> None: + """Re-render all segments with current search filter.""" + if not self._list_view: + return + + self._list_view.controls.clear() + self._segment_rows.clear() + + for idx, segment in enumerate(self._state.transcript_segments): + # Filter by search query + if self._search_query and self._search_query not in segment.text.lower(): + # Add placeholder to maintain index alignment + self._segment_rows.append(None) + continue + + # Use original index for click handling + container = self._create_segment_row(segment, idx) + self._segment_rows.append(container) + self._list_view.controls.append(container) + + self._state.request_update() + + def _render_final_segment(self, segment: TranscriptSegment) -> None: + """Render final segment with click handler (UI thread only). + + Args: + segment: Transcript segment to render. + """ + if not self._list_view: + return + + # Remove partial row if present (final replaces partial) + if self._partial_row and self._partial_row in self._list_view.controls: + self._list_view.controls.remove(self._partial_row) + self._partial_row = None + + # Use the actual index from state (segments are appended before rendering) + segment_index = len(self._state.transcript_segments) - 1 + + # Filter by search query during live rendering + if self._search_query and self._search_query not in segment.text.lower(): + self._segment_rows.append(None) + return + + container = self._create_segment_row(segment, segment_index) + + self._segment_rows.append(container) + self._list_view.controls.append(container) + self._state.request_update() + + def _render_partial(self, text: str) -> None: + """Render or update the partial text row at the bottom (UI thread only). + + Args: + text: Partial transcript text. + """ + if not self._list_view or not text: + return + + # Create or update partial row + partial_content = ft.Row( + [ + ft.Text("[LIVE]", size=11, color=ft.Colors.BLUE_400, width=120, italic=True), + ft.Text( + text, + size=14, + color=ft.Colors.GREY_500, + weight=ft.FontWeight.W_300, + italic=True, + expand=True, + ), + ] + ) + + if self._partial_row: + # Update existing row + self._partial_row.content = partial_content + else: + # Create new row + self._partial_row = ft.Container( + content=partial_content, + padding=5, + border_radius=4, + bgcolor=ft.Colors.BLUE_50, + ) + self._list_view.controls.append(self._partial_row) + + self._state.request_update() + + def _create_segment_row(self, segment: TranscriptSegment, segment_index: int) -> ft.Container: + """Create a segment row container. + + Args: + segment: Transcript segment to render. + segment_index: Index for click handling. + + Returns: + Container with segment content. + """ + # REUSE existing format_timestamp from _formatting.py + # Format as time range for transcript display + time_str = ( + f"[{format_timestamp(segment.start_time)} - {format_timestamp(segment.end_time)}]" + ) + + # Style based on finality + color = ft.Colors.BLACK if segment.is_final else ft.Colors.GREY_600 + weight = ft.FontWeight.NORMAL if segment.is_final else ft.FontWeight.W_300 + + # Build row content with optional speaker label + row_controls: list[ft.Control] = [ + ft.Text(time_str, size=11, color=ft.Colors.GREY_500, width=120), + ] + + # Add speaker label if present + if segment.speaker_id: + speaker_color = self._get_speaker_color(segment.speaker_id) + row_controls.append( + ft.Container( + content=ft.Text( + segment.speaker_id, + size=10, + color=ft.Colors.WHITE, + weight=ft.FontWeight.BOLD, + ), + bgcolor=speaker_color, + border_radius=10, + padding=ft.padding.symmetric(horizontal=6, vertical=2), + margin=ft.margin.only(right=8), + ) + ) + + row_controls.append( + ft.Text( + segment.text, + size=14, + color=color, + weight=weight, + expand=True, + ) + ) + + row = ft.Row(row_controls) + + # Wrap in container for click handling and highlighting + return ft.Container( + content=row, + padding=5, + border_radius=4, + on_click=lambda e, idx=segment_index: self._handle_click(idx), + ink=True, + ) + + def _get_speaker_color(self, speaker_id: str) -> str: + """Get consistent color for a speaker. + + Args: + speaker_id: Speaker identifier. + + Returns: + Color string for the speaker label. + """ + # Use hash to get consistent color index + colors = [ + ft.Colors.BLUE_400, + ft.Colors.GREEN_400, + ft.Colors.PURPLE_400, + ft.Colors.ORANGE_400, + ft.Colors.TEAL_400, + ft.Colors.PINK_400, + ft.Colors.INDIGO_400, + ft.Colors.AMBER_600, + ] + digest = hashlib.md5(speaker_id.encode("utf-8")).hexdigest() + return colors[int(digest, 16) % len(colors)] + + def _handle_click(self, segment_index: int) -> None: + """Handle segment row click. + + Args: + segment_index: Index of clicked segment. + """ + if self._on_segment_click: + self._on_segment_click(segment_index) + + def _render_server_info(self, info: ServerInfo) -> None: + """Render server info (UI thread only). + + Args: + info: Server info to display. + """ + if not self._list_view: + return + + asr_status = "ready" if info.asr_ready else "not ready" + info_text = ( + f"Connected to server v{info.version} | " + f"ASR: {info.asr_model} ({asr_status}) | " + f"Active meetings: {info.active_meetings}" + ) + + self._list_view.controls.append( + ft.Text( + info_text, + size=12, + color=ft.Colors.GREEN_700, + italic=True, + ) + ) + self._state.request_update() + + def update_highlight(self, highlighted_index: int | None) -> None: + """Update visual highlight on segments. + + Args: + highlighted_index: Index of segment to highlight, or None to clear. + """ + for idx, container in enumerate(self._segment_rows): + if container is None: + continue + if idx == highlighted_index: + container.bgcolor = ft.Colors.YELLOW_100 + container.border = ft.border.all(1, ft.Colors.YELLOW_700) + else: + container.bgcolor = None + container.border = None + + # Scroll to highlighted segment + if highlighted_index is not None: + self._scroll_to_segment(highlighted_index) + + self._state.request_update() + + def _scroll_to_segment(self, segment_index: int) -> None: + """Scroll ListView to show specified segment. + + Args: + segment_index: Index of segment to scroll to. + """ + if not self._list_view or segment_index >= len(self._segment_rows): + return + + container = self._segment_rows[segment_index] + if container is None: + return + + # Estimate row height for scroll calculation + estimated_row_height = 50 + offset = segment_index * estimated_row_height + self._list_view.scroll_to(offset=offset, duration=200) +```` + +## File: src/noteflow/config/settings.py +````python +"""NoteFlow application settings using Pydantic settings.""" + +from __future__ import annotations + +import json +from functools import lru_cache +from pathlib import Path +from typing import Annotated, cast + +from pydantic import Field, PostgresDsn, field_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +def _default_meetings_dir() -> Path: + """Return default meetings directory path.""" + return Path.home() / ".noteflow" / "meetings" + + +class TriggerSettings(BaseSettings): + """Client trigger settings loaded from environment variables.""" + + model_config = SettingsConfigDict( + env_prefix="NOTEFLOW_", + env_file=".env", + env_file_encoding="utf-8", + enable_decoding=False, + extra="ignore", + ) + + # Trigger settings (client-side) + trigger_enabled: Annotated[ + bool, + Field(default=False, description="Enable smart recording triggers (opt-in)"), + ] + trigger_auto_start: Annotated[ + bool, + Field(default=False, description="Auto-start recording on high confidence"), + ] + trigger_rate_limit_minutes: Annotated[ + int, + Field(default=10, ge=1, le=60, description="Minimum minutes between trigger prompts"), + ] + trigger_snooze_minutes: Annotated[ + int, + Field(default=30, ge=5, le=480, description="Default snooze duration in minutes"), + ] + trigger_poll_interval_seconds: Annotated[ + float, + Field(default=2.0, ge=0.5, le=30.0, description="Trigger polling interval in seconds"), + ] + trigger_confidence_ignore: Annotated[ + float, + Field(default=0.40, ge=0.0, le=1.0, description="Confidence below which to ignore"), + ] + trigger_confidence_auto: Annotated[ + float, + Field(default=0.80, ge=0.0, le=1.0, description="Confidence to auto-start recording"), + ] + + # App audio trigger tuning (system output from whitelisted apps) + trigger_audio_enabled: Annotated[ + bool, + Field(default=True, description="Enable app audio activity detection"), + ] + trigger_audio_threshold_db: Annotated[ + float, + Field(default=-40.0, ge=-60.0, le=0.0, description="Audio activity threshold in dB"), + ] + trigger_audio_window_seconds: Annotated[ + float, + Field(default=5.0, ge=1.0, le=30.0, description="Audio activity window in seconds"), + ] + trigger_audio_min_active_ratio: Annotated[ + float, + Field(default=0.6, ge=0.0, le=1.0, description="Minimum active ratio in window"), + ] + trigger_audio_min_samples: Annotated[ + int, + Field(default=10, ge=1, le=200, description="Minimum samples before evaluating audio"), + ] + trigger_audio_max_history: Annotated[ + int, + Field(default=50, ge=10, le=1000, description="Max audio activity samples to retain"), + ] + + # Calendar trigger tuning (optional integration) + trigger_calendar_enabled: Annotated[ + bool, + Field(default=False, description="Enable calendar-based trigger detection"), + ] + trigger_calendar_lookahead_minutes: Annotated[ + int, + Field(default=5, ge=0, le=60, description="Minutes before event start to trigger"), + ] + trigger_calendar_lookbehind_minutes: Annotated[ + int, + Field(default=5, ge=0, le=60, description="Minutes after event start to keep triggering"), + ] + trigger_calendar_events: Annotated[ + list[dict[str, object]], + Field( + default_factory=list, + description="Calendar events as JSON list of {start, end, title}", + ), + ] + + # Foreground app trigger tuning + trigger_foreground_enabled: Annotated[ + bool, + Field(default=True, description="Enable foreground app detection"), + ] + trigger_meeting_apps: Annotated[ + list[str], + Field( + default_factory=lambda: [ + "zoom", + "teams", + "microsoft teams", + "meet", + "google meet", + "slack", + "webex", + "discord", + "skype", + "gotomeeting", + "facetime", + "webinar", + "ringcentral", + ], + description="Meeting app name substrings to detect", + ), + ] + trigger_suppressed_apps: Annotated[ + list[str], + Field(default_factory=list, description="Meeting app substrings to ignore"), + ] + + # Signal weights + trigger_weight_audio: Annotated[ + float, + Field(default=0.30, ge=0.0, le=1.0, description="Audio signal confidence weight"), + ] + trigger_weight_foreground: Annotated[ + float, + Field( + default=0.40, + ge=0.0, + le=1.0, + description="Foreground app signal confidence weight", + ), + ] + trigger_weight_calendar: Annotated[ + float, + Field(default=0.30, ge=0.0, le=1.0, description="Calendar signal confidence weight"), + ] + + @field_validator("trigger_meeting_apps", "trigger_suppressed_apps", mode="before") + @classmethod + def _parse_csv_list(cls, value: object) -> list[str]: + if not isinstance(value, str): + return [] if value is None else list(value) + stripped = value.strip() + if stripped.startswith("[") and stripped.endswith("]"): + try: + parsed = json.loads(stripped) + except json.JSONDecodeError: + parsed = None + if isinstance(parsed, list): + return [str(item).strip() for item in parsed if str(item).strip()] + return [item.strip() for item in value.split(",") if item.strip()] + + @field_validator("trigger_calendar_events", mode="before") + @classmethod + def _parse_calendar_events(cls, value: object) -> list[dict[str, object]]: + if value is None: + return [] + if isinstance(value, str): + stripped = value.strip() + if not stripped: + return [] + try: + parsed = json.loads(stripped) + except json.JSONDecodeError: + return [] + if isinstance(parsed, list): + return [item for item in parsed if isinstance(item, dict)] + return [parsed] if isinstance(parsed, dict) else [] + if isinstance(value, dict): + return [value] + if isinstance(value, list): + return [item for item in value if isinstance(item, dict)] + return [] + + +class Settings(TriggerSettings): + """Application settings loaded from environment variables. + + Environment variables: + NOTEFLOW_DATABASE_URL: PostgreSQL connection URL + Example: postgresql+asyncpg://user:pass@host:5432/dbname? + options=-csearch_path%3Dnoteflow + NOTEFLOW_DB_POOL_SIZE: Connection pool size (default: 5) + NOTEFLOW_DB_ECHO: Echo SQL statements (default: False) + NOTEFLOW_ASR_MODEL_SIZE: Whisper model size (default: base) + NOTEFLOW_ASR_DEVICE: ASR device (default: cpu) + NOTEFLOW_ASR_COMPUTE_TYPE: ASR compute type (default: int8) + NOTEFLOW_MEETINGS_DIR: Directory for meeting audio storage (default: ~/.noteflow/meetings) + NOTEFLOW_RETENTION_ENABLED: Enable automatic retention policy (default: False) + NOTEFLOW_RETENTION_DAYS: Days to retain completed meetings (default: 90) + NOTEFLOW_RETENTION_CHECK_INTERVAL_HOURS: Hours between retention checks (default: 24) + """ + + # Database settings + database_url: Annotated[ + PostgresDsn, + Field( + description="PostgreSQL connection URL with asyncpg driver", + examples=["postgresql+asyncpg://user:pass@localhost:5432/noteflow"], + ), + ] + db_pool_size: Annotated[ + int, + Field(default=5, ge=1, le=50, description="Database connection pool size"), + ] + db_echo: Annotated[ + bool, + Field(default=False, description="Echo SQL statements to log"), + ] + + # ASR settings + asr_model_size: Annotated[ + str, + Field(default="base", description="Whisper model size"), + ] + asr_device: Annotated[ + str, + Field(default="cpu", description="ASR device (cpu or cuda)"), + ] + asr_compute_type: Annotated[ + str, + Field(default="int8", description="ASR compute type"), + ] + + # Server settings + grpc_port: Annotated[ + int, + Field(default=50051, ge=1, le=65535, description="gRPC server port"), + ] + + # Storage settings + meetings_dir: Annotated[ + Path, + Field( + default_factory=_default_meetings_dir, + description="Directory for meeting audio and metadata storage", + ), + ] + + # Retention settings + retention_enabled: Annotated[ + bool, + Field(default=False, description="Enable automatic retention policy"), + ] + retention_days: Annotated[ + int, + Field(default=90, ge=1, le=3650, description="Days to retain completed meetings"), + ] + retention_check_interval_hours: Annotated[ + int, + Field(default=24, ge=1, le=168, description="Hours between retention checks"), + ] + + # Diarization settings + diarization_enabled: Annotated[ + bool, + Field(default=False, description="Enable speaker diarization"), + ] + diarization_hf_token: Annotated[ + str | None, + Field(default=None, description="HuggingFace token for pyannote models"), + ] + diarization_device: Annotated[ + str, + Field(default="auto", description="Diarization device (auto, cpu, cuda, mps)"), + ] + diarization_streaming_latency: Annotated[ + float, + Field(default=0.5, ge=0.1, le=5.0, description="Streaming diarization latency in seconds"), + ] + diarization_min_speakers: Annotated[ + int, + Field(default=1, ge=1, le=20, description="Minimum expected speakers"), + ] + diarization_max_speakers: Annotated[ + int, + Field(default=10, ge=1, le=50, description="Maximum expected speakers"), + ] + diarization_refinement_enabled: Annotated[ + bool, + Field(default=True, description="Enable post-meeting diarization refinement"), + ] + + @property + def database_url_str(self) -> str: + """Return database URL as string.""" + return str(self.database_url) + + +def _load_settings() -> Settings: + """Load settings from environment. + + Returns: + Settings instance. + + Raises: + ValidationError: If required environment variables are not set. + """ + # pydantic-settings reads from environment; model_validate handles this + return cast("Settings", Settings.model_validate({})) + + +def _load_trigger_settings() -> TriggerSettings: + """Load trigger settings from environment.""" + return cast("TriggerSettings", TriggerSettings.model_validate({})) + + +@lru_cache +def get_settings() -> Settings: + """Get cached settings instance. + + Returns: + Cached Settings instance loaded from environment. + + Raises: + ValidationError: If required environment variables are not set. + """ + return _load_settings() + + +@lru_cache +def get_trigger_settings() -> TriggerSettings: + """Get cached trigger settings instance.""" + return _load_trigger_settings() +```` + +## File: src/noteflow/grpc/client.py +````python +"""NoteFlow gRPC client for Flet app integration.""" + +from __future__ import annotations + +import logging +import queue +import threading +import time +from collections.abc import Callable, Iterator +from dataclasses import dataclass +from typing import TYPE_CHECKING, Final + +import grpc + +from noteflow.config.constants import DEFAULT_SAMPLE_RATE + +from .proto import noteflow_pb2, noteflow_pb2_grpc + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + +logger = logging.getLogger(__name__) + +DEFAULT_SERVER: Final[str] = "localhost:50051" +CHUNK_TIMEOUT: Final[float] = 0.1 # Timeout for getting chunks from queue + + +@dataclass +class TranscriptSegment: + """Transcript segment from server.""" + + segment_id: int + text: str + start_time: float + end_time: float + language: str + is_final: bool + speaker_id: str = "" # Speaker identifier from diarization + speaker_confidence: float = 0.0 # Speaker assignment confidence + + +@dataclass +class ServerInfo: + """Server information.""" + + version: str + asr_model: str + asr_ready: bool + uptime_seconds: float + active_meetings: int + diarization_enabled: bool = False + diarization_ready: bool = False + + +@dataclass +class MeetingInfo: + """Meeting information.""" + + id: str + title: str + state: str + created_at: float + started_at: float + ended_at: float + duration_seconds: float + segment_count: int + + +@dataclass +class AnnotationInfo: + """Annotation information.""" + + id: str + meeting_id: str + annotation_type: str + text: str + start_time: float + end_time: float + segment_ids: list[int] + created_at: float + + +@dataclass +class ExportResult: + """Export result.""" + + content: str + format_name: str + file_extension: str + + +@dataclass +class DiarizationResult: + """Result of speaker diarization refinement.""" + + job_id: str + status: str + segments_updated: int + speaker_ids: list[str] + error_message: str = "" + + @property + def success(self) -> bool: + """Check if diarization succeeded.""" + return self.status == "completed" and not self.error_message + + @property + def is_terminal(self) -> bool: + """Check if job reached a terminal state.""" + return self.status in {"completed", "failed"} + + +@dataclass +class RenameSpeakerResult: + """Result of speaker rename operation.""" + + segments_updated: int + success: bool + + +# Callback types +TranscriptCallback = Callable[[TranscriptSegment], None] +ConnectionCallback = Callable[[bool, str], None] + + +class NoteFlowClient: + """gRPC client for NoteFlow server. + + Provides async-safe methods for Flet app integration. + """ + + def __init__( + self, + server_address: str = DEFAULT_SERVER, + on_transcript: TranscriptCallback | None = None, + on_connection_change: ConnectionCallback | None = None, + ) -> None: + """Initialize the client. + + Args: + server_address: Server address (host:port). + on_transcript: Callback for transcript updates. + on_connection_change: Callback for connection state changes. + """ + self._server_address = server_address + self._on_transcript = on_transcript + self._on_connection_change = on_connection_change + + self._channel: grpc.Channel | None = None + self._stub: noteflow_pb2_grpc.NoteFlowServiceStub | None = None + self._connected = False + + # Streaming state + self._stream_thread: threading.Thread | None = None + self._audio_queue: queue.Queue[tuple[str, NDArray[np.float32], float]] = queue.Queue() + self._stop_streaming = threading.Event() + self._current_meeting_id: str | None = None + + @property + def connected(self) -> bool: + """Check if connected to server.""" + return self._connected + + @property + def server_address(self) -> str: + """Get server address.""" + return self._server_address + + def connect(self, timeout: float = 5.0) -> bool: + """Connect to the server. + + Args: + timeout: Connection timeout in seconds. + + Returns: + True if connected successfully. + """ + try: + self._channel = grpc.insecure_channel( + self._server_address, + options=[ + ("grpc.max_send_message_length", 100 * 1024 * 1024), + ("grpc.max_receive_message_length", 100 * 1024 * 1024), + ], + ) + + # Wait for channel to be ready + grpc.channel_ready_future(self._channel).result(timeout=timeout) + + self._stub = noteflow_pb2_grpc.NoteFlowServiceStub(self._channel) + self._connected = True + + logger.info("Connected to server at %s", self._server_address) + self._notify_connection(True, "Connected") + + return True + + except grpc.FutureTimeoutError: + logger.error("Connection timeout: %s", self._server_address) + self._notify_connection(False, "Connection timeout") + return False + except grpc.RpcError as e: + logger.error("Connection failed: %s", e) + self._notify_connection(False, str(e)) + return False + + def disconnect(self) -> None: + """Disconnect from the server.""" + self.stop_streaming() + + if self._channel: + self._channel.close() + self._channel = None + self._stub = None + + self._connected = False + logger.info("Disconnected from server") + self._notify_connection(False, "Disconnected") + + def get_server_info(self) -> ServerInfo | None: + """Get server information. + + Returns: + ServerInfo or None if request fails. + """ + if not self._stub: + return None + + try: + response = self._stub.GetServerInfo(noteflow_pb2.ServerInfoRequest()) + return ServerInfo( + version=response.version, + asr_model=response.asr_model, + asr_ready=response.asr_ready, + uptime_seconds=response.uptime_seconds, + active_meetings=response.active_meetings, + diarization_enabled=response.diarization_enabled, + diarization_ready=response.diarization_ready, + ) + except grpc.RpcError as e: + logger.error("Failed to get server info: %s", e) + return None + + def create_meeting(self, title: str = "") -> MeetingInfo | None: + """Create a new meeting. + + Args: + title: Optional meeting title. + + Returns: + MeetingInfo or None if request fails. + """ + if not self._stub: + return None + + try: + request = noteflow_pb2.CreateMeetingRequest(title=title) + response = self._stub.CreateMeeting(request) + return self._proto_to_meeting_info(response) + except grpc.RpcError as e: + logger.error("Failed to create meeting: %s", e) + return None + + def stop_meeting(self, meeting_id: str) -> MeetingInfo | None: + """Stop a meeting. + + Args: + meeting_id: Meeting ID. + + Returns: + Updated MeetingInfo or None if request fails. + """ + if not self._stub: + return None + + try: + request = noteflow_pb2.StopMeetingRequest(meeting_id=meeting_id) + response = self._stub.StopMeeting(request) + return self._proto_to_meeting_info(response) + except grpc.RpcError as e: + logger.error("Failed to stop meeting: %s", e) + return None + + def get_meeting(self, meeting_id: str) -> MeetingInfo | None: + """Get meeting details. + + Args: + meeting_id: Meeting ID. + + Returns: + MeetingInfo or None if not found. + """ + if not self._stub: + return None + + try: + request = noteflow_pb2.GetMeetingRequest( + meeting_id=meeting_id, + include_segments=False, + include_summary=False, + ) + response = self._stub.GetMeeting(request) + return self._proto_to_meeting_info(response) + except grpc.RpcError as e: + logger.error("Failed to get meeting: %s", e) + return None + + def get_meeting_segments(self, meeting_id: str) -> list[TranscriptSegment]: + """Retrieve transcript segments for a meeting. + + Uses existing GetMeetingRequest with include_segments=True. + + Args: + meeting_id: Meeting ID. + + Returns: + List of TranscriptSegment or empty list if not found. + """ + if not self._stub: + return [] + + try: + request = noteflow_pb2.GetMeetingRequest( + meeting_id=meeting_id, + include_segments=True, + include_summary=False, + ) + response = self._stub.GetMeeting(request) + return [ + TranscriptSegment( + segment_id=seg.segment_id, + text=seg.text, + start_time=seg.start_time, + end_time=seg.end_time, + language=seg.language, + is_final=True, + speaker_id=seg.speaker_id, + speaker_confidence=seg.speaker_confidence, + ) + for seg in response.segments + ] + except grpc.RpcError as e: + logger.error("Failed to get meeting segments: %s", e) + return [] + + def list_meetings(self, limit: int = 20) -> list[MeetingInfo]: + """List recent meetings. + + Args: + limit: Maximum number to return. + + Returns: + List of MeetingInfo. + """ + if not self._stub: + return [] + + try: + request = noteflow_pb2.ListMeetingsRequest( + limit=limit, + sort_order=noteflow_pb2.SORT_ORDER_CREATED_DESC, + ) + response = self._stub.ListMeetings(request) + return [self._proto_to_meeting_info(m) for m in response.meetings] + except grpc.RpcError as e: + logger.error("Failed to list meetings: %s", e) + return [] + + def start_streaming(self, meeting_id: str) -> bool: + """Start streaming audio for a meeting. + + Args: + meeting_id: Meeting ID to stream to. + + Returns: + True if streaming started. + """ + if not self._stub: + logger.error("Not connected") + return False + + if self._stream_thread and self._stream_thread.is_alive(): + logger.warning("Already streaming") + return False + + self._current_meeting_id = meeting_id + self._stop_streaming.clear() + + # Clear any pending audio + while not self._audio_queue.empty(): + try: + self._audio_queue.get_nowait() + except queue.Empty: + break + + # Start streaming thread + self._stream_thread = threading.Thread( + target=self._stream_worker, + daemon=True, + ) + self._stream_thread.start() + + logger.info("Started streaming for meeting %s", meeting_id) + return True + + def stop_streaming(self) -> None: + """Stop streaming audio.""" + self._stop_streaming.set() + + if self._stream_thread: + self._stream_thread.join(timeout=2.0) + self._stream_thread = None + + self._current_meeting_id = None + logger.info("Stopped streaming") + + def send_audio( + self, + audio: NDArray[np.float32], + timestamp: float | None = None, + ) -> None: + """Send audio chunk to server. + + Non-blocking - queues audio for streaming thread. + + Args: + audio: Audio samples (float32, mono, 16kHz). + timestamp: Optional capture timestamp. + """ + if not self._current_meeting_id: + return + + if timestamp is None: + timestamp = time.time() + + self._audio_queue.put( + ( + self._current_meeting_id, + audio, + timestamp, + ) + ) + + def _stream_worker(self) -> None: + """Background thread for audio streaming.""" + if not self._stub: + return + + def audio_generator() -> Iterator[noteflow_pb2.AudioChunk]: + """Generate audio chunks from queue.""" + while not self._stop_streaming.is_set(): + try: + meeting_id, audio, timestamp = self._audio_queue.get( + timeout=CHUNK_TIMEOUT, + ) + yield noteflow_pb2.AudioChunk( + meeting_id=meeting_id, + audio_data=audio.tobytes(), + timestamp=timestamp, + sample_rate=DEFAULT_SAMPLE_RATE, + channels=1, + ) + except queue.Empty: + continue + + try: + # Start bidirectional stream + responses = self._stub.StreamTranscription(audio_generator()) + + # Process responses + for response in responses: + if self._stop_streaming.is_set(): + break + + if response.update_type == noteflow_pb2.UPDATE_TYPE_FINAL: + segment = TranscriptSegment( + segment_id=response.segment.segment_id, + text=response.segment.text, + start_time=response.segment.start_time, + end_time=response.segment.end_time, + language=response.segment.language, + is_final=True, + speaker_id=response.segment.speaker_id, + speaker_confidence=response.segment.speaker_confidence, + ) + self._notify_transcript(segment) + + elif response.update_type == noteflow_pb2.UPDATE_TYPE_PARTIAL: + segment = TranscriptSegment( + segment_id=0, + text=response.partial_text, + start_time=0, + end_time=0, + language="", + is_final=False, + ) + self._notify_transcript(segment) + + except grpc.RpcError as e: + logger.error("Stream error: %s", e) + self._notify_connection(False, f"Stream error: {e}") + + def _notify_transcript(self, segment: TranscriptSegment) -> None: + """Notify transcript callback. + + Args: + segment: Transcript segment. + """ + if self._on_transcript: + try: + self._on_transcript(segment) + except Exception as e: + logger.error("Transcript callback error: %s", e) + + def _notify_connection(self, connected: bool, message: str) -> None: + """Notify connection callback. + + Args: + connected: Connection state. + message: Status message. + """ + if self._on_connection_change: + try: + self._on_connection_change(connected, message) + except Exception as e: + logger.error("Connection callback error: %s", e) + + @staticmethod + def _proto_to_meeting_info(meeting: noteflow_pb2.Meeting) -> MeetingInfo: + """Convert proto Meeting to MeetingInfo. + + Args: + meeting: Proto meeting. + + Returns: + MeetingInfo dataclass. + """ + state_map = { + noteflow_pb2.MEETING_STATE_UNSPECIFIED: "unknown", + noteflow_pb2.MEETING_STATE_CREATED: "created", + noteflow_pb2.MEETING_STATE_RECORDING: "recording", + noteflow_pb2.MEETING_STATE_STOPPED: "stopped", + noteflow_pb2.MEETING_STATE_COMPLETED: "completed", + noteflow_pb2.MEETING_STATE_ERROR: "error", + } + + return MeetingInfo( + id=meeting.id, + title=meeting.title, + state=state_map.get(meeting.state, "unknown"), + created_at=meeting.created_at, + started_at=meeting.started_at, + ended_at=meeting.ended_at, + duration_seconds=meeting.duration_seconds, + segment_count=len(meeting.segments), + ) + + # ========================================================================= + # Annotation Methods + # ========================================================================= + + def add_annotation( + self, + meeting_id: str, + annotation_type: str, + text: str, + start_time: float, + end_time: float, + segment_ids: list[int] | None = None, + ) -> AnnotationInfo | None: + """Add an annotation to a meeting. + + Args: + meeting_id: Meeting ID. + annotation_type: Type of annotation (action_item, decision, note). + text: Annotation text. + start_time: Start time in seconds. + end_time: End time in seconds. + segment_ids: Optional list of linked segment IDs. + + Returns: + AnnotationInfo or None if request fails. + """ + if not self._stub: + return None + + try: + proto_type = self._annotation_type_to_proto(annotation_type) + request = noteflow_pb2.AddAnnotationRequest( + meeting_id=meeting_id, + annotation_type=proto_type, + text=text, + start_time=start_time, + end_time=end_time, + segment_ids=segment_ids or [], + ) + response = self._stub.AddAnnotation(request) + return self._proto_to_annotation_info(response) + except grpc.RpcError as e: + logger.error("Failed to add annotation: %s", e) + return None + + def get_annotation(self, annotation_id: str) -> AnnotationInfo | None: + """Get an annotation by ID. + + Args: + annotation_id: Annotation ID. + + Returns: + AnnotationInfo or None if not found. + """ + if not self._stub: + return None + + try: + request = noteflow_pb2.GetAnnotationRequest(annotation_id=annotation_id) + response = self._stub.GetAnnotation(request) + return self._proto_to_annotation_info(response) + except grpc.RpcError as e: + logger.error("Failed to get annotation: %s", e) + return None + + def list_annotations( + self, + meeting_id: str, + start_time: float = 0, + end_time: float = 0, + ) -> list[AnnotationInfo]: + """List annotations for a meeting. + + Args: + meeting_id: Meeting ID. + start_time: Optional start time filter. + end_time: Optional end time filter. + + Returns: + List of AnnotationInfo. + """ + if not self._stub: + return [] + + try: + request = noteflow_pb2.ListAnnotationsRequest( + meeting_id=meeting_id, + start_time=start_time, + end_time=end_time, + ) + response = self._stub.ListAnnotations(request) + return [self._proto_to_annotation_info(a) for a in response.annotations] + except grpc.RpcError as e: + logger.error("Failed to list annotations: %s", e) + return [] + + def update_annotation( + self, + annotation_id: str, + annotation_type: str | None = None, + text: str | None = None, + start_time: float | None = None, + end_time: float | None = None, + segment_ids: list[int] | None = None, + ) -> AnnotationInfo | None: + """Update an existing annotation. + + Args: + annotation_id: Annotation ID. + annotation_type: Optional new type. + text: Optional new text. + start_time: Optional new start time. + end_time: Optional new end time. + segment_ids: Optional new segment IDs. + + Returns: + Updated AnnotationInfo or None if request fails. + """ + if not self._stub: + return None + + try: + proto_type = ( + self._annotation_type_to_proto(annotation_type) + if annotation_type + else noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED + ) + request = noteflow_pb2.UpdateAnnotationRequest( + annotation_id=annotation_id, + annotation_type=proto_type, + text=text or "", + start_time=start_time or 0, + end_time=end_time or 0, + segment_ids=segment_ids or [], + ) + response = self._stub.UpdateAnnotation(request) + return self._proto_to_annotation_info(response) + except grpc.RpcError as e: + logger.error("Failed to update annotation: %s", e) + return None + + def delete_annotation(self, annotation_id: str) -> bool: + """Delete an annotation. + + Args: + annotation_id: Annotation ID. + + Returns: + True if deleted successfully. + """ + if not self._stub: + return False + + try: + request = noteflow_pb2.DeleteAnnotationRequest(annotation_id=annotation_id) + response = self._stub.DeleteAnnotation(request) + return response.success + except grpc.RpcError as e: + logger.error("Failed to delete annotation: %s", e) + return False + + @staticmethod + def _proto_to_annotation_info( + annotation: noteflow_pb2.Annotation, + ) -> AnnotationInfo: + """Convert proto Annotation to AnnotationInfo. + + Args: + annotation: Proto annotation. + + Returns: + AnnotationInfo dataclass. + """ + type_map = { + noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED: "note", + noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM: "action_item", + noteflow_pb2.ANNOTATION_TYPE_DECISION: "decision", + noteflow_pb2.ANNOTATION_TYPE_NOTE: "note", + noteflow_pb2.ANNOTATION_TYPE_RISK: "risk", + } + + return AnnotationInfo( + id=annotation.id, + meeting_id=annotation.meeting_id, + annotation_type=type_map.get(annotation.annotation_type, "note"), + text=annotation.text, + start_time=annotation.start_time, + end_time=annotation.end_time, + segment_ids=list(annotation.segment_ids), + created_at=annotation.created_at, + ) + + @staticmethod + def _annotation_type_to_proto(annotation_type: str) -> int: + """Convert annotation type string to proto enum. + + Args: + annotation_type: Type string. + + Returns: + Proto enum value. + """ + type_map = { + "action_item": noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM, + "decision": noteflow_pb2.ANNOTATION_TYPE_DECISION, + "note": noteflow_pb2.ANNOTATION_TYPE_NOTE, + "risk": noteflow_pb2.ANNOTATION_TYPE_RISK, + } + return type_map.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_NOTE) + + # ========================================================================= + # Export Methods + # ========================================================================= + + def export_transcript( + self, + meeting_id: str, + format_name: str = "markdown", + ) -> ExportResult | None: + """Export meeting transcript. + + Args: + meeting_id: Meeting ID. + format_name: Export format (markdown, html). + + Returns: + ExportResult or None if request fails. + """ + if not self._stub: + return None + + try: + proto_format = self._export_format_to_proto(format_name) + request = noteflow_pb2.ExportTranscriptRequest( + meeting_id=meeting_id, + format=proto_format, + ) + response = self._stub.ExportTranscript(request) + return ExportResult( + content=response.content, + format_name=response.format_name, + file_extension=response.file_extension, + ) + except grpc.RpcError as e: + logger.error("Failed to export transcript: %s", e) + return None + + @staticmethod + def _export_format_to_proto(format_name: str) -> int: + """Convert export format string to proto enum. + + Args: + format_name: Format string. + + Returns: + Proto enum value. + """ + format_map = { + "markdown": noteflow_pb2.EXPORT_FORMAT_MARKDOWN, + "md": noteflow_pb2.EXPORT_FORMAT_MARKDOWN, + "html": noteflow_pb2.EXPORT_FORMAT_HTML, + } + return format_map.get(format_name.lower(), noteflow_pb2.EXPORT_FORMAT_MARKDOWN) + + @staticmethod + def _job_status_to_str(status: int) -> str: + """Convert job status enum to string.""" + status_map = { + noteflow_pb2.JOB_STATUS_UNSPECIFIED: "unspecified", + noteflow_pb2.JOB_STATUS_QUEUED: "queued", + noteflow_pb2.JOB_STATUS_RUNNING: "running", + noteflow_pb2.JOB_STATUS_COMPLETED: "completed", + noteflow_pb2.JOB_STATUS_FAILED: "failed", + } + return status_map.get(status, "unspecified") + + # ========================================================================= + # Speaker Diarization Methods + # ========================================================================= + + def refine_speaker_diarization( + self, + meeting_id: str, + num_speakers: int | None = None, + ) -> DiarizationResult | None: + """Run post-meeting speaker diarization refinement. + + Requests the server to run offline diarization on the meeting audio + as a background job and update segment speaker assignments. + + Args: + meeting_id: Meeting ID. + num_speakers: Optional known number of speakers (auto-detect if None). + + Returns: + DiarizationResult with job status or None if request fails. + """ + if not self._stub: + return None + + try: + request = noteflow_pb2.RefineSpeakerDiarizationRequest( + meeting_id=meeting_id, + num_speakers=num_speakers or 0, + ) + response = self._stub.RefineSpeakerDiarization(request) + return DiarizationResult( + job_id=response.job_id, + status=self._job_status_to_str(response.status), + segments_updated=response.segments_updated, + speaker_ids=list(response.speaker_ids), + error_message=response.error_message, + ) + except grpc.RpcError as e: + logger.error("Failed to refine speaker diarization: %s", e) + return None + + def get_diarization_job_status(self, job_id: str) -> DiarizationResult | None: + """Get status for a diarization background job.""" + if not self._stub: + return None + + try: + request = noteflow_pb2.GetDiarizationJobStatusRequest(job_id=job_id) + response = self._stub.GetDiarizationJobStatus(request) + return DiarizationResult( + job_id=response.job_id, + status=self._job_status_to_str(response.status), + segments_updated=response.segments_updated, + speaker_ids=list(response.speaker_ids), + error_message=response.error_message, + ) + except grpc.RpcError as e: + logger.error("Failed to get diarization job status: %s", e) + return None + + def rename_speaker( + self, + meeting_id: str, + old_speaker_id: str, + new_speaker_name: str, + ) -> RenameSpeakerResult | None: + """Rename a speaker in all segments of a meeting. + + Args: + meeting_id: Meeting ID. + old_speaker_id: Current speaker ID (e.g., "SPEAKER_00"). + new_speaker_name: New speaker name (e.g., "Alice"). + + Returns: + RenameSpeakerResult or None if request fails. + """ + if not self._stub: + return None + + try: + request = noteflow_pb2.RenameSpeakerRequest( + meeting_id=meeting_id, + old_speaker_id=old_speaker_id, + new_speaker_name=new_speaker_name, + ) + response = self._stub.RenameSpeaker(request) + return RenameSpeakerResult( + segments_updated=response.segments_updated, + success=response.success, + ) + except grpc.RpcError as e: + logger.error("Failed to rename speaker: %s", e) + return None +```` + +## File: tests/integration/test_repositories.py +````python +"""Integration tests for SQLAlchemy repositories.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import TYPE_CHECKING +from uuid import uuid4 + +import pytest + +from noteflow.domain.entities import Annotation, Meeting, Segment, Summary, WordTiming +from noteflow.domain.entities.summary import ActionItem, KeyPoint +from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState +from noteflow.infrastructure.persistence.repositories import ( + SqlAlchemyAnnotationRepository, + SqlAlchemyMeetingRepository, + SqlAlchemySegmentRepository, + SqlAlchemySummaryRepository, +) + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession + + +@pytest.mark.integration +class TestMeetingRepository: + """Integration tests for SqlAlchemyMeetingRepository.""" + + async def test_create_and_get_meeting(self, session: AsyncSession) -> None: + """Test creating and retrieving a meeting.""" + repo = SqlAlchemyMeetingRepository(session) + meeting = Meeting.create(title="Test Meeting", metadata={"key": "value"}) + + # Create + await repo.create(meeting) + await session.commit() + + # Get + retrieved = await repo.get(meeting.id) + + assert retrieved is not None + assert retrieved.id == meeting.id + assert retrieved.title == "Test Meeting" + assert retrieved.state == MeetingState.CREATED + assert retrieved.metadata == {"key": "value"} + + async def test_get_meeting_not_found(self, session: AsyncSession) -> None: + """Test retrieving non-existent meeting returns None.""" + repo = SqlAlchemyMeetingRepository(session) + meeting_id = MeetingId(Meeting.create().id) + + result = await repo.get(meeting_id) + + assert result is None + + async def test_update_meeting(self, session: AsyncSession) -> None: + """Test updating a meeting.""" + repo = SqlAlchemyMeetingRepository(session) + meeting = Meeting.create(title="Original") + await repo.create(meeting) + await session.commit() + + # Update state and title + meeting.start_recording() + await repo.update(meeting) + await session.commit() + + # Verify + retrieved = await repo.get(meeting.id) + assert retrieved is not None + assert retrieved.state == MeetingState.RECORDING + assert retrieved.started_at is not None + + async def test_delete_meeting(self, session: AsyncSession) -> None: + """Test deleting a meeting.""" + repo = SqlAlchemyMeetingRepository(session) + meeting = Meeting.create(title="To Delete") + await repo.create(meeting) + await session.commit() + + # Delete + result = await repo.delete(meeting.id) + await session.commit() + + assert result is True + + # Verify deleted + retrieved = await repo.get(meeting.id) + assert retrieved is None + + async def test_delete_meeting_not_found(self, session: AsyncSession) -> None: + """Test deleting non-existent meeting returns False.""" + repo = SqlAlchemyMeetingRepository(session) + meeting_id = MeetingId(Meeting.create().id) + + result = await repo.delete(meeting_id) + + assert result is False + + async def test_list_all_meetings(self, session: AsyncSession) -> None: + """Test listing all meetings with pagination.""" + repo = SqlAlchemyMeetingRepository(session) + + # Create multiple meetings + meetings = [Meeting.create(title=f"Meeting {i}") for i in range(5)] + for m in meetings: + await repo.create(m) + await session.commit() + + # List with pagination + result, total = await repo.list_all(limit=3, offset=0) + + assert len(result) == 3 + assert total == 5 + + async def test_list_meetings_filter_by_state(self, session: AsyncSession) -> None: + """Test filtering meetings by state.""" + repo = SqlAlchemyMeetingRepository(session) + + # Create meetings in different states + created = Meeting.create(title="Created") + await repo.create(created) + + recording = Meeting.create(title="Recording") + recording.start_recording() + await repo.create(recording) + await session.commit() + + # Filter by RECORDING state + result, _ = await repo.list_all(states=[MeetingState.RECORDING]) + + assert len(result) == 1 + assert result[0].title == "Recording" + + async def test_count_by_state(self, session: AsyncSession) -> None: + """Test counting meetings by state.""" + repo = SqlAlchemyMeetingRepository(session) + + # Create meetings + for _ in range(3): + await repo.create(Meeting.create()) + await session.commit() + + count = await repo.count_by_state(MeetingState.CREATED) + + assert count == 3 + + +@pytest.mark.integration +class TestSegmentRepository: + """Integration tests for SqlAlchemySegmentRepository.""" + + async def test_add_and_get_segments(self, session: AsyncSession) -> None: + """Test adding and retrieving segments.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + segment_repo = SqlAlchemySegmentRepository(session) + + # Create meeting first + meeting = Meeting.create(title="Test") + await meeting_repo.create(meeting) + await session.commit() + + # Add segments + segment = Segment( + segment_id=0, + text="Hello world", + start_time=0.0, + end_time=2.5, + meeting_id=meeting.id, + language="en", + ) + await segment_repo.add(meeting.id, segment) + await session.commit() + + # Get segments + result = await segment_repo.get_by_meeting(meeting.id) + + assert len(result) == 1 + assert result[0].text == "Hello world" + assert result[0].db_id is not None + + async def test_add_segment_with_words(self, session: AsyncSession) -> None: + """Test adding segment with word-level timing.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + segment_repo = SqlAlchemySegmentRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + words = [ + WordTiming(word="Hello", start_time=0.0, end_time=0.5, probability=0.95), + WordTiming(word="world", start_time=0.5, end_time=1.0, probability=0.98), + ] + segment = Segment( + segment_id=0, + text="Hello world", + start_time=0.0, + end_time=1.0, + meeting_id=meeting.id, + words=words, + ) + await segment_repo.add(meeting.id, segment) + await session.commit() + + result = await segment_repo.get_by_meeting(meeting.id, include_words=True) + + assert len(result[0].words) == 2 + assert result[0].words[0].word == "Hello" + + async def test_add_batch_segments(self, session: AsyncSession) -> None: + """Test batch adding segments.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + segment_repo = SqlAlchemySegmentRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + segments = [ + Segment(segment_id=i, text=f"Segment {i}", start_time=float(i), end_time=float(i + 1)) + for i in range(3) + ] + await segment_repo.add_batch(meeting.id, segments) + await session.commit() + + result = await segment_repo.get_by_meeting(meeting.id) + + assert len(result) == 3 + + async def test_get_next_segment_id(self, session: AsyncSession) -> None: + """Test get_next_segment_id returns max + 1 or 0 when empty.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + segment_repo = SqlAlchemySegmentRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + assert await segment_repo.get_next_segment_id(meeting.id) == 0 + + segments = [ + Segment(segment_id=0, text="Segment 0", start_time=0.0, end_time=1.0), + Segment(segment_id=5, text="Segment 5", start_time=1.0, end_time=2.0), + ] + await segment_repo.add_batch(meeting.id, segments) + await session.commit() + + assert await segment_repo.get_next_segment_id(meeting.id) == 6 + + async def test_update_embedding_and_retrieve(self, session: AsyncSession) -> None: + """Test updating a segment embedding persists to the database.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + segment_repo = SqlAlchemySegmentRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + segment = Segment(segment_id=0, text="Hello", start_time=0.0, end_time=1.0) + await segment_repo.add(meeting.id, segment) + await session.commit() + + assert segment.db_id is not None + embedding = [0.1] * 1536 + await segment_repo.update_embedding(segment.db_id, embedding) + await session.commit() + + result = await segment_repo.get_by_meeting(meeting.id) + assert result[0].embedding == pytest.approx(embedding) + + async def test_search_semantic_orders_by_similarity(self, session: AsyncSession) -> None: + """Test semantic search returns closest matches first.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + segment_repo = SqlAlchemySegmentRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + emb1 = [1.0] + [0.0] * 1535 + emb2 = [0.0, 1.0] + [0.0] * 1534 + + segment1 = Segment( + segment_id=0, + text="First", + start_time=0.0, + end_time=1.0, + embedding=emb1, + ) + segment2 = Segment( + segment_id=1, + text="Second", + start_time=1.0, + end_time=2.0, + embedding=emb2, + ) + await segment_repo.add_batch(meeting.id, [segment1, segment2]) + await session.commit() + + results = await segment_repo.search_semantic(query_embedding=emb1, limit=2) + assert len(results) == 2 + assert results[0][0].segment_id == 0 + assert results[0][1] >= results[1][1] + + +@pytest.mark.integration +class TestSummaryRepository: + """Integration tests for SqlAlchemySummaryRepository.""" + + async def test_save_and_get_summary(self, session: AsyncSession) -> None: + """Test saving and retrieving summary.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + summary_repo = SqlAlchemySummaryRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + summary = Summary( + meeting_id=meeting.id, + executive_summary="This was a productive meeting.", + generated_at=datetime.now(UTC), + model_version="test-v1", + ) + await summary_repo.save(summary) + await session.commit() + + result = await summary_repo.get_by_meeting(meeting.id) + + assert result is not None + assert result.executive_summary == "This was a productive meeting." + assert result.model_version == "test-v1" + + async def test_save_summary_with_key_points(self, session: AsyncSession) -> None: + """Test saving summary with key points.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + summary_repo = SqlAlchemySummaryRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + key_points = [ + KeyPoint(text="Point 1", segment_ids=[0, 1]), + KeyPoint(text="Point 2", segment_ids=[2]), + ] + summary = Summary( + meeting_id=meeting.id, + executive_summary="Summary", + key_points=key_points, + ) + await summary_repo.save(summary) + await session.commit() + + result = await summary_repo.get_by_meeting(meeting.id) + + assert result is not None + assert len(result.key_points) == 2 + assert result.key_points[0].text == "Point 1" + + async def test_save_summary_with_action_items(self, session: AsyncSession) -> None: + """Test saving summary with action items.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + summary_repo = SqlAlchemySummaryRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + action_items = [ + ActionItem(text="Review PR", assignee="Alice", priority=2), + ] + summary = Summary( + meeting_id=meeting.id, + executive_summary="Summary", + action_items=action_items, + ) + await summary_repo.save(summary) + await session.commit() + + result = await summary_repo.get_by_meeting(meeting.id) + + assert result is not None + assert len(result.action_items) == 1 + assert result.action_items[0].text == "Review PR" + assert result.action_items[0].assignee == "Alice" + + async def test_delete_summary(self, session: AsyncSession) -> None: + """Test deleting summary.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + summary_repo = SqlAlchemySummaryRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + summary = Summary(meeting_id=meeting.id, executive_summary="To delete") + await summary_repo.save(summary) + await session.commit() + + result = await summary_repo.delete_by_meeting(meeting.id) + await session.commit() + + assert result is True + + retrieved = await summary_repo.get_by_meeting(meeting.id) + assert retrieved is None + + async def test_update_summary_replaces_items(self, session: AsyncSession) -> None: + """Test saving a summary twice replaces key points and action items.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + summary_repo = SqlAlchemySummaryRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + summary_v1 = Summary( + meeting_id=meeting.id, + executive_summary="v1", + key_points=[KeyPoint(text="Old KP")], + action_items=[ActionItem(text="Old AI")], + ) + await summary_repo.save(summary_v1) + await session.commit() + + summary_v2 = Summary( + meeting_id=meeting.id, + executive_summary="v2", + key_points=[KeyPoint(text="New KP")], + action_items=[ActionItem(text="New AI")], + ) + await summary_repo.save(summary_v2) + await session.commit() + + result = await summary_repo.get_by_meeting(meeting.id) + + assert result is not None + assert result.executive_summary == "v2" + assert [kp.text for kp in result.key_points] == ["New KP"] + assert [ai.text for ai in result.action_items] == ["New AI"] + + +@pytest.mark.integration +class TestAnnotationRepository: + """Integration tests for SqlAlchemyAnnotationRepository.""" + + async def test_add_and_get_annotation(self, session: AsyncSession) -> None: + """Test adding and retrieving annotation.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + annotation_repo = SqlAlchemyAnnotationRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=meeting.id, + annotation_type=AnnotationType.NOTE, + text="Decision made", + start_time=1.0, + end_time=2.0, + segment_ids=[0], + ) + await annotation_repo.add(annotation) + await session.commit() + + retrieved = await annotation_repo.get(annotation.id) + + assert retrieved is not None + assert retrieved.text == "Decision made" + assert retrieved.segment_ids == [0] + + async def test_get_by_meeting_ordered(self, session: AsyncSession) -> None: + """Test annotations returned in start_time order.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + annotation_repo = SqlAlchemyAnnotationRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + a1 = Annotation( + id=AnnotationId(uuid4()), + meeting_id=meeting.id, + annotation_type=AnnotationType.NOTE, + text="Second", + start_time=2.0, + end_time=3.0, + ) + a2 = Annotation( + id=AnnotationId(uuid4()), + meeting_id=meeting.id, + annotation_type=AnnotationType.NOTE, + text="First", + start_time=1.0, + end_time=2.0, + ) + await annotation_repo.add(a1) + await annotation_repo.add(a2) + await session.commit() + + result = await annotation_repo.get_by_meeting(meeting.id) + + assert [a.text for a in result] == ["First", "Second"] + + async def test_get_by_time_range_inclusive(self, session: AsyncSession) -> None: + """Test time range query includes boundary overlaps.""" + meeting_repo = SqlAlchemyMeetingRepository(session) + annotation_repo = SqlAlchemyAnnotationRepository(session) + + meeting = Meeting.create() + await meeting_repo.create(meeting) + await session.commit() + + a1 = Annotation( + id=AnnotationId(uuid4()), + meeting_id=meeting.id, + annotation_type=AnnotationType.NOTE, + text="Ends at boundary", + start_time=0.0, + end_time=1.0, + ) + a2 = Annotation( + id=AnnotationId(uuid4()), + meeting_id=meeting.id, + annotation_type=AnnotationType.NOTE, + text="Starts at boundary", + start_time=1.0, + end_time=2.0, + ) + await annotation_repo.add(a1) + await annotation_repo.add(a2) + await session.commit() + + result = await annotation_repo.get_by_time_range(meeting.id, start_time=1.0, end_time=1.0) + + assert {a.text for a in result} == {"Ends at boundary", "Starts at boundary"} + + async def test_update_annotation_not_found_raises(self, session: AsyncSession) -> None: + """Test update raises when annotation does not exist.""" + annotation_repo = SqlAlchemyAnnotationRepository(session) + + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=MeetingId(uuid4()), + annotation_type=AnnotationType.NOTE, + text="Missing", + start_time=0.0, + end_time=1.0, + ) + + with pytest.raises(ValueError, match=r"Annotation .* not found"): + await annotation_repo.update(annotation) + + async def test_delete_annotation_not_found(self, session: AsyncSession) -> None: + """Test deleting unknown annotation returns False.""" + annotation_repo = SqlAlchemyAnnotationRepository(session) + + result = await annotation_repo.delete(AnnotationId(uuid4())) + + assert result is False +```` + +## File: src/noteflow/client/components/__init__.py +````python +"""UI components for NoteFlow client. + +All components use existing types and utilities - no recreation. +""" + +from noteflow.client.components._async_mixin import AsyncOperationMixin +from noteflow.client.components._thread_mixin import BackgroundWorkerMixin +from noteflow.client.components.annotation_display import AnnotationDisplayComponent +from noteflow.client.components.annotation_toolbar import AnnotationToolbarComponent +from noteflow.client.components.connection_panel import ConnectionPanelComponent +from noteflow.client.components.meeting_library import MeetingLibraryComponent +from noteflow.client.components.playback_controls import PlaybackControlsComponent +from noteflow.client.components.playback_sync import PlaybackSyncController +from noteflow.client.components.recording_timer import RecordingTimerComponent +from noteflow.client.components.summary_panel import SummaryPanelComponent +from noteflow.client.components.transcript import TranscriptComponent +from noteflow.client.components.vu_meter import VuMeterComponent + +__all__ = [ + "AnnotationDisplayComponent", + "AnnotationToolbarComponent", + "AsyncOperationMixin", + "BackgroundWorkerMixin", + "ConnectionPanelComponent", + "MeetingLibraryComponent", + "PlaybackControlsComponent", + "PlaybackSyncController", + "RecordingTimerComponent", + "SummaryPanelComponent", + "TranscriptComponent", + "VuMeterComponent", +] +```` + +## File: src/noteflow/client/app.py +````python +"""NoteFlow Flet client application. + +Captures audio locally and streams to NoteFlow gRPC server for transcription. +Orchestrates UI components - does not contain component logic. +""" + +from __future__ import annotations + +import argparse +import asyncio +import logging +import queue +import threading +import time +from typing import TYPE_CHECKING, Final + +import flet as ft + +from noteflow.application.services import TriggerService +from noteflow.client._trigger_mixin import TriggerMixin +from noteflow.client.components import ( + AnnotationDisplayComponent, + AnnotationToolbarComponent, + ConnectionPanelComponent, + MeetingLibraryComponent, + PlaybackControlsComponent, + PlaybackSyncController, + RecordingTimerComponent, + SummaryPanelComponent, + TranscriptComponent, + VuMeterComponent, +) +from noteflow.client.state import AppState +from noteflow.config.constants import DEFAULT_SAMPLE_RATE +from noteflow.config.settings import TriggerSettings, get_settings +from noteflow.infrastructure.audio import ( + MeetingAudioReader, + PlaybackState, + SoundDeviceCapture, + TimestampedAudio, +) +from noteflow.infrastructure.security import AesGcmCryptoBox, KeyringKeyStore +from noteflow.infrastructure.summarization import create_summarization_service + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + + from noteflow.application.services.summarization_service import SummarizationService + from noteflow.grpc.client import ( + AnnotationInfo, + MeetingInfo, + NoteFlowClient, + ServerInfo, + TranscriptSegment, + ) + from noteflow.infrastructure.triggers import AppAudioProvider, CalendarProvider + +logger = logging.getLogger(__name__) + +DEFAULT_SERVER: Final[str] = "localhost:50051" + + +class NoteFlowClientApp(TriggerMixin): + """Flet client application for NoteFlow. + + Orchestrates UI components and recording logic. + Inherits trigger detection from TriggerMixin. + """ + + def __init__(self, server_address: str = DEFAULT_SERVER) -> None: + """Initialize the app. + + Args: + server_address: NoteFlow server address. + """ + # Centralized state + self._state = AppState(server_address=server_address) + + # Audio capture (REUSE existing SoundDeviceCapture) + self._audio_capture: SoundDeviceCapture | None = None + + # Client reference (managed by ConnectionPanelComponent) + self._client: NoteFlowClient | None = None + + # UI components (initialized in _build_ui) + self._connection_panel: ConnectionPanelComponent | None = None + self._vu_meter: VuMeterComponent | None = None + self._timer: RecordingTimerComponent | None = None + self._transcript: TranscriptComponent | None = None + self._playback_controls: PlaybackControlsComponent | None = None + self._sync_controller: PlaybackSyncController | None = None + self._annotation_toolbar: AnnotationToolbarComponent | None = None + + # Meeting library (M4) + self._meeting_library: MeetingLibraryComponent | None = None + + # Summarization (M6) + self._summarization_service: SummarizationService | None = None + self._summary_panel: SummaryPanelComponent | None = None + + # Annotation display for review mode (M4) + self._annotation_display: AnnotationDisplayComponent | None = None + + # Audio reader for archived meetings (M4) + self._audio_reader: MeetingAudioReader | None = None + + # Trigger detection (M5) + self._trigger_settings: TriggerSettings | None = None + self._trigger_service: TriggerService | None = None + self._app_audio: AppAudioProvider | None = None + self._calendar_provider: CalendarProvider | None = None + self._trigger_poll_interval: float = 0.0 + self._trigger_task: asyncio.Task | None = None + + # Recording buttons + self._record_btn: ft.ElevatedButton | None = None + self._stop_btn: ft.ElevatedButton | None = None + + # Audio frame consumer thread (process frames from audio callback thread) + self._audio_frame_queue: queue.Queue[tuple[NDArray[np.float32], float]] = queue.Queue() + self._audio_consumer_stop = threading.Event() + self._audio_consumer_thread: threading.Thread | None = None + + def run(self) -> None: + """Run the Flet application.""" + ft.app(target=self._main) + + def _main(self, page: ft.Page) -> None: + """Flet app entry point. + + Args: + page: Flet page. + """ + self._state.set_page(page) + page.title = "NoteFlow Client" + page.window.width = 800 + page.window.height = 600 + page.padding = 20 + + page.add(self._build_ui()) + page.update() + + # Initialize trigger detection (M5) + self._initialize_triggers() + + # Start trigger check loop if enabled (opt-in via settings) + if self._state.trigger_enabled: + self._trigger_task = page.run_task(self._trigger_check_loop) + + # Ensure background tasks are cancelled when the UI closes + page.on_disconnect = lambda _e: self._shutdown() + + def _build_ui(self) -> ft.Column: + """Build the main UI by composing components. + + Returns: + Main UI column. + """ + # Create components with state + self._connection_panel = ConnectionPanelComponent( + state=self._state, + on_connected=self._on_connected, + on_disconnected=self._on_disconnected, + on_transcript_callback=self._on_transcript, + on_connection_change_callback=self._on_connection_change, + ) + self._vu_meter = VuMeterComponent(state=self._state) + self._timer = RecordingTimerComponent(state=self._state) + + # Transcript with click handler for playback sync + self._transcript = TranscriptComponent( + state=self._state, + on_segment_click=self._on_segment_click, + ) + + # Playback controls and sync + self._playback_controls = PlaybackControlsComponent( + state=self._state, + on_position_change=self._on_playback_position_change, + ) + self._sync_controller = PlaybackSyncController( + state=self._state, + on_highlight_change=self._on_highlight_change, + ) + + # Annotation toolbar + self._annotation_toolbar = AnnotationToolbarComponent( + state=self._state, + get_client=lambda: self._client, + ) + + # Annotation display for review mode + self._annotation_display = AnnotationDisplayComponent( + state=self._state, + on_annotation_seek=self._on_annotation_seek, + ) + + # Meeting library (M4) + self._meeting_library = MeetingLibraryComponent( + state=self._state, + get_client=lambda: self._client, + on_meeting_selected=self._on_meeting_selected, + ) + + # Initialize summarization service - auto-detects LOCAL/MOCK providers + self._summarization_service = create_summarization_service() + + # Summary panel + self._summary_panel = SummaryPanelComponent( + state=self._state, + get_service=lambda: self._summarization_service, + on_citation_click=self._on_citation_click, + ) + + # Recording controls (still in app.py - orchestration) + self._record_btn = ft.ElevatedButton( + "Start Recording", + on_click=self._on_record_click, + icon=ft.Icons.MIC, + disabled=True, + ) + self._stop_btn = ft.ElevatedButton( + "Stop", + on_click=self._on_stop_click, + icon=ft.Icons.STOP, + disabled=True, + ) + + recording_row = ft.Row([self._record_btn, self._stop_btn]) + + # Main layout - compose component builds + return ft.Column( + [ + ft.Text("NoteFlow Client", size=24, weight=ft.FontWeight.BOLD), + ft.Divider(), + self._connection_panel.build(), + ft.Divider(), + recording_row, + self._vu_meter.build(), + self._timer.build(), + self._annotation_toolbar.build(), + self._annotation_display.build(), + ft.Divider(), + ft.Text("Transcript:", size=16, weight=ft.FontWeight.BOLD), + self._transcript.build(), + self._playback_controls.build(), + ft.Divider(), + self._summary_panel.build(), + ft.Divider(), + ft.Text("Meeting Library:", size=16, weight=ft.FontWeight.BOLD), + self._meeting_library.build(), + ], + spacing=10, + ) + + def _ensure_audio_reader(self) -> MeetingAudioReader | None: + """Lazily initialize MeetingAudioReader (for review playback).""" + if self._audio_reader: + return self._audio_reader + + try: + settings = get_settings() + keystore = KeyringKeyStore() + crypto = AesGcmCryptoBox(keystore) + self._audio_reader = MeetingAudioReader(crypto, settings.meetings_dir) + except (OSError, ValueError, KeyError, RuntimeError) as exc: + logger.exception("Failed to initialize meeting audio reader: %s", exc) + self._audio_reader = None + + return self._audio_reader + + def _load_meeting_audio(self, meeting: MeetingInfo) -> list[TimestampedAudio]: + """Load archived audio for a meeting, if available.""" + reader = self._ensure_audio_reader() + if not reader: + return [] + + try: + if not reader.audio_exists(meeting.id): + logger.info("No archived audio for meeting %s", meeting.id) + return [] + return reader.load_meeting_audio(meeting.id) + except FileNotFoundError: + logger.info("Audio file missing for meeting %s", meeting.id) + return [] + except (OSError, ValueError, RuntimeError) as exc: + logger.exception("Failed to load audio for meeting %s: %s", meeting.id, exc) + return [] + + def _ensure_audio_capture(self) -> bool: + """Start audio capture if needed. + + Returns: + True if audio capture is running, False if start failed. + """ + if self._audio_capture: + return True + + try: + self._audio_capture = SoundDeviceCapture() + self._audio_capture.start( + device_id=None, + on_frames=self._on_audio_frames, + sample_rate=DEFAULT_SAMPLE_RATE, + channels=1, + chunk_duration_ms=100, + ) + except (RuntimeError, OSError) as exc: + logger.exception("Failed to start audio capture: %s", exc) + self._audio_capture = None + return False + + return True + + def _on_connected(self, client: NoteFlowClient, info: ServerInfo) -> None: + """Handle successful connection. + + Args: + client: Connected NoteFlowClient. + info: Server info. + """ + self._client = client + if self._transcript: + self._transcript.display_server_info(info) + if ( + self._state.recording + and self._state.current_meeting + and not self._client.start_streaming(self._state.current_meeting.id) + ): + logger.error("Failed to resume streaming after reconnect") + self._stop_recording() + self._update_recording_buttons() + + # Refresh meeting library on connection + if self._meeting_library: + self._meeting_library.refresh_meetings() + + def _on_disconnected(self) -> None: + """Handle disconnection.""" + self._shutdown() + if self._state.recording: + self._stop_recording() + self._client = None + self._update_recording_buttons() + + def _on_connection_change(self, _connected: bool, _message: str) -> None: + """Handle connection state change from client. + + Args: + connected: Connection state. + message: Status message. + """ + self._update_recording_buttons() + + def _on_transcript(self, segment: TranscriptSegment) -> None: + """Handle transcript update callback. + + Args: + segment: Transcript segment from server. + """ + if self._transcript: + self._transcript.add_segment(segment) + self._ensure_summary_panel_ready() + + def _on_record_click(self, e: ft.ControlEvent) -> None: + """Handle record button click. + + Args: + e: Control event. + """ + self._start_recording() + + def _on_stop_click(self, e: ft.ControlEvent) -> None: + """Handle stop button click. + + Args: + e: Control event. + """ + self._stop_recording() + + def _start_recording(self) -> None: + """Start recording audio.""" + if not self._client or not self._state.connected: + return + + # Create meeting + meeting = self._client.create_meeting(title=f"Recording {time.strftime('%Y-%m-%d %H:%M')}") + if not meeting: + logger.error("Failed to create meeting") + return + + self._state.current_meeting = meeting + + # Make summary panel visible once we have meeting context + self._ensure_summary_panel_ready() + + # Start streaming + if not self._client.start_streaming(meeting.id): + logger.error("Failed to start streaming") + self._client.stop_meeting(meeting.id) + self._state.current_meeting = None + return + + # Start audio capture (reuse existing capture if already running) + if not self._ensure_audio_capture(): + self._client.stop_streaming() + self._client.stop_meeting(meeting.id) + self._state.reset_recording_state() + self._update_recording_buttons() + return + + self._state.recording = True + + # Start audio frame consumer thread + self._start_audio_consumer() + + # Clear audio buffer for new recording + self._state.session_audio_buffer.clear() + + # Start timer + if self._timer: + self._timer.start() + + # Clear transcript + if self._transcript: + self._transcript.clear() + + # Enable annotation toolbar + if self._annotation_toolbar: + self._annotation_toolbar.set_visible(True) + self._annotation_toolbar.set_enabled(True) + + self._update_recording_buttons() + + def _stop_recording(self) -> None: + """Stop recording audio.""" + # Stop audio frame consumer thread + self._stop_audio_consumer() + + # Stop audio capture + if self._audio_capture and not self._should_keep_capture_running(): + self._audio_capture.stop() + self._audio_capture = None + + # Stop streaming + if self._client: + self._client.stop_streaming() + + # Stop meeting + if self._state.current_meeting: + self._client.stop_meeting(self._state.current_meeting.id) + + # Load buffered audio for playback + if self._state.session_audio_buffer and self._playback_controls: + self._playback_controls.load_audio() + self._playback_controls.set_visible(True) + + # Start sync controller for playback + if self._sync_controller: + self._sync_controller.start() + + # Keep annotation toolbar visible for playback annotations + if self._annotation_toolbar: + self._annotation_toolbar.set_enabled(True) + + # Ensure summary panel reflects current data after recording ends + self._ensure_summary_panel_ready() + + # Reset recording state (but keep meeting/transcript for playback) + self._state.recording = False + + # Stop timer + if self._timer: + self._timer.stop() + + self._update_recording_buttons() + + def _on_audio_frames( + self, + frames: NDArray[np.float32], + timestamp: float, + ) -> None: + """Handle audio frames from capture (called from audio thread). + + Enqueues frames for processing by consumer thread to avoid blocking + the real-time audio callback. + + Args: + frames: Audio samples. + timestamp: Capture timestamp. + """ + self._audio_frame_queue.put_nowait((frames.copy(), timestamp)) + + def _start_audio_consumer(self) -> None: + """Start the audio frame consumer thread.""" + if self._audio_consumer_thread is not None and self._audio_consumer_thread.is_alive(): + return + self._audio_consumer_stop.clear() + self._audio_consumer_thread = threading.Thread( + target=self._audio_consumer_loop, + daemon=True, + name="audio-consumer", + ) + self._audio_consumer_thread.start() + + def _stop_audio_consumer(self) -> None: + """Stop the audio frame consumer thread.""" + self._audio_consumer_stop.set() + if self._audio_consumer_thread is not None: + self._audio_consumer_thread.join(timeout=1.0) + self._audio_consumer_thread = None + # Drain remaining frames + while not self._audio_frame_queue.empty(): + try: + self._audio_frame_queue.get_nowait() + except queue.Empty: + break + + def _audio_consumer_loop(self) -> None: + """Consumer loop that processes audio frames from the queue.""" + while not self._audio_consumer_stop.is_set(): + try: + frames, timestamp = self._audio_frame_queue.get(timeout=0.1) + self._process_audio_frames(frames, timestamp) + except queue.Empty: + continue + + def _process_audio_frames( + self, + frames: NDArray[np.float32], + timestamp: float, + ) -> None: + """Process audio frames from consumer thread. + + Args: + frames: Audio samples. + timestamp: Capture timestamp. + """ + # Send to server + if self._client and self._state.recording: + self._client.send_audio(frames, timestamp) + + # Buffer for playback + if self._state.recording: + duration = len(frames) / DEFAULT_SAMPLE_RATE + self._state.session_audio_buffer.append( + TimestampedAudio(frames=frames, timestamp=timestamp, duration=duration) + ) + + # Update VU meter + if self._vu_meter: + self._vu_meter.on_audio_frames(frames) + + # Trigger detection uses system output + calendar; no mic-derived updates here. + + def _on_segment_click(self, segment_index: int) -> None: + """Handle transcript segment click - seek playback to segment. + + Args: + segment_index: Index of clicked segment. + """ + if self._sync_controller: + self._sync_controller.seek_to_segment(segment_index) + + def _on_citation_click(self, segment_id: int) -> None: + """Handle citation chip click - seek to segment by segment_id. + + Args: + segment_id: Segment ID from citation. + """ + # Find segment index by segment_id + for idx, seg in enumerate(self._state.transcript_segments): + if seg.segment_id == segment_id: + self._on_segment_click(idx) + break + + def _on_annotation_seek(self, timestamp: float) -> None: + """Handle annotation click - seek to timestamp. + + Args: + timestamp: Timestamp in seconds to seek to. + """ + if self._playback_controls: + self._playback_controls.seek(timestamp) + + def _on_meeting_selected(self, meeting: MeetingInfo) -> None: + """Handle meeting selection from library. + + Loads transcript segments, annotations, and prepares for playback review. + + Args: + meeting: Selected meeting info. + """ + if not self._client: + return + + # 1. Stop any existing playback + if self._state.playback.state != PlaybackState.STOPPED: + self._state.playback.stop() + if self._sync_controller: + self._sync_controller.stop() + + # Capture client reference for closure (may run in background thread) + client = self._client + + def load_and_apply() -> None: + if not client: + return + try: + segments = client.get_meeting_segments(meeting.id) + annotations = client.list_annotations(meeting.id) + audio_chunks = self._load_meeting_audio(meeting) + except (ConnectionError, ValueError, OSError, RuntimeError) as exc: + logger.exception("Failed to load meeting %s: %s", meeting.id, exc) + return + + # Apply results on UI thread to avoid race conditions + self._state.run_on_ui_thread( + lambda: self._apply_meeting_data(meeting, segments, annotations, audio_chunks) + ) + + page = self._state._page + if page and hasattr(page, "run_thread"): + page.run_thread(load_and_apply) + else: + load_and_apply() + + def _apply_meeting_data( + self, + meeting: MeetingInfo, + segments: list[TranscriptSegment], + annotations: list[AnnotationInfo], + audio_chunks: list[TimestampedAudio], + ) -> None: + """Apply loaded meeting data to state and UI (UI thread only).""" + # Clear state and UI before populating with fresh data + self._state.clear_transcript() + self._state.annotations.clear() + self._state.current_summary = None + self._state.highlighted_segment_index = None + self._state.clear_session_audio() + + if self._transcript: + self._transcript.clear() + if self._annotation_display: + self._annotation_display.clear() + + # Populate transcript + if self._transcript: + for segment in segments: + self._transcript.add_segment(segment) + + # Populate annotations + self._state.annotations = annotations + if self._annotation_display: + self._annotation_display.load_annotations(annotations) + + # Update meeting state + self._state.current_meeting = meeting + self._state.selected_meeting = meeting + + # Enable annotation toolbar for adding new annotations + if self._annotation_toolbar: + self._annotation_toolbar.set_visible(True) + self._annotation_toolbar.set_enabled(True) + + # Load audio for playback if available + if audio_chunks: + self._state.session_audio_buffer = audio_chunks + if self._playback_controls: + self._playback_controls.load_audio() + self._playback_controls.set_visible(True) + else: + # Hide controls when no audio is available + if self._playback_controls: + self._playback_controls.set_visible(False) + self._state.playback.stop() + self._state.playback_position = 0.0 + + # Update summary panel visibility/enabled state + self._ensure_summary_panel_ready() + + # Start sync controller for playback highlighting + if self._sync_controller: + self._sync_controller.start() + + logger.info( + "Loaded meeting: %s (%d segments, %d annotations, %d audio chunks)", + meeting.title, + len(segments), + len(annotations), + len(audio_chunks), + ) + + def _ensure_summary_panel_ready(self) -> None: + """Update summary panel visibility/enabled state based on data availability.""" + if not self._summary_panel: + return + + has_meeting = self._state.current_meeting is not None + has_segments = bool(self._state.transcript_segments) + + # Visible once there is a meeting context; enabled when segments exist. + self._summary_panel.set_visible(has_meeting or has_segments) + self._summary_panel.set_enabled(has_segments and not self._state.summary_loading) + + def _on_highlight_change(self, index: int | None) -> None: + """Handle highlight change from sync controller. + + Args: + index: Segment index to highlight, or None to clear. + """ + if self._transcript: + self._transcript.update_highlight(index) + + def _on_playback_position_change(self, position: float) -> None: + """Handle playback position change. + + Args: + position: Current playback position in seconds. + """ + # Sync controller handles segment matching internally + _ = position # Position tracked in state + + def _shutdown(self) -> None: + """Stop background tasks and capture started for triggers.""" + if self._trigger_task: + self._trigger_task.cancel() + self._trigger_task = None + + # Stop audio consumer if running + self._stop_audio_consumer() + + if self._app_audio: + self._app_audio.close() + + if self._audio_capture and not self._state.recording: + try: + self._audio_capture.stop() + except RuntimeError: + logger.debug("Error stopping audio capture during shutdown", exc_info=True) + self._audio_capture = None + + def _update_recording_buttons(self) -> None: + """Update recording button states.""" + if self._record_btn: + self._record_btn.disabled = not self._state.connected or self._state.recording + + if self._stop_btn: + self._stop_btn.disabled = not self._state.recording + + self._state.request_update() + + +def main() -> None: + """Run the NoteFlow client application.""" + parser = argparse.ArgumentParser(description="NoteFlow Client") + parser.add_argument( + "-s", + "--server", + type=str, + default=DEFAULT_SERVER, + help=f"Server address (default: {DEFAULT_SERVER})", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Enable verbose logging", + ) + args = parser.parse_args() + + # Configure logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig( + level=log_level, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + ) + + # Run app + app = NoteFlowClientApp(server_address=args.server) + app.run() + + +if __name__ == "__main__": + main() +```` + +## File: src/noteflow/grpc/service.py +````python +"""NoteFlow gRPC service implementation (async with UoW).""" + +from __future__ import annotations + +import asyncio +import logging +import struct +import time +from collections.abc import AsyncIterator +from dataclasses import dataclass, field +from pathlib import Path +from typing import TYPE_CHECKING, ClassVar, Final +from uuid import UUID, uuid4 + +import grpc.aio +import numpy as np +from numpy.typing import NDArray + +from noteflow.application.services.export_service import ExportFormat, ExportService +from noteflow.application.services.summarization_service import SummarizationService +from noteflow.config.constants import DEFAULT_SAMPLE_RATE as _DEFAULT_SAMPLE_RATE +from noteflow.domain.entities import Annotation, Meeting, Segment, Summary +from noteflow.domain.summarization import ProviderUnavailableError +from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState +from noteflow.infrastructure.asr import Segmenter, SegmenterConfig, StreamingVad +from noteflow.infrastructure.audio.reader import MeetingAudioReader +from noteflow.infrastructure.audio.writer import MeetingAudioWriter +from noteflow.infrastructure.converters import AsrConverter +from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker +from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork +from noteflow.infrastructure.security.crypto import AesGcmCryptoBox +from noteflow.infrastructure.security.keystore import KeyringKeyStore + +from .meeting_store import MeetingStore +from .proto import noteflow_pb2, noteflow_pb2_grpc + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + from noteflow.infrastructure.asr import FasterWhisperEngine + from noteflow.infrastructure.asr.dto import AsrResult + from noteflow.infrastructure.diarization import DiarizationEngine + +logger = logging.getLogger(__name__) + + +@dataclass +class _StreamSessionInit: + """Result of stream session initialization.""" + + next_segment_id: int + error_code: int | None = None + error_message: str | None = None + + @property + def success(self) -> bool: + """Check if initialization succeeded.""" + return self.error_code is None + + +@dataclass +class _DiarizationJob: + """Track background diarization job state.""" + + job_id: str + meeting_id: str + status: int + segments_updated: int = 0 + speaker_ids: list[str] = field(default_factory=list) + error_message: str = "" + created_at: float = field(default_factory=time.time) + updated_at: float = field(default_factory=time.time) + task: asyncio.Task[None] | None = None + + def to_proto(self) -> noteflow_pb2.DiarizationJobStatus: + return noteflow_pb2.DiarizationJobStatus( + job_id=self.job_id, + status=self.status, + segments_updated=self.segments_updated, + speaker_ids=self.speaker_ids, + error_message=self.error_message, + ) + + +class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): + """Async gRPC service implementation for NoteFlow with PostgreSQL persistence.""" + + VERSION: Final[str] = "0.2.0" + MAX_CHUNK_SIZE: Final[int] = 1024 * 1024 # 1MB + DEFAULT_SAMPLE_RATE: Final[int] = _DEFAULT_SAMPLE_RATE + SUPPORTED_SAMPLE_RATES: ClassVar[list[int]] = [16000, 44100, 48000] + PARTIAL_CADENCE_SECONDS: Final[float] = 2.0 # Emit partials every 2 seconds + MIN_PARTIAL_AUDIO_SECONDS: Final[float] = 0.5 # Minimum audio for partial inference + + def __init__( + self, + asr_engine: FasterWhisperEngine | None = None, + session_factory: async_sessionmaker[AsyncSession] | None = None, + meetings_dir: Path | None = None, + summarization_service: SummarizationService | None = None, + diarization_engine: DiarizationEngine | None = None, + ) -> None: + """Initialize the service. + + Args: + asr_engine: Optional ASR engine. + session_factory: Optional async session factory for database persistence. + If not provided, falls back to in-memory MeetingStore. + meetings_dir: Optional directory for meeting audio storage. + Defaults to ~/.noteflow/meetings. + summarization_service: Optional summarization service for generating summaries. + diarization_engine: Optional diarization engine for speaker identification. + """ + self._asr_engine = asr_engine + self._session_factory = session_factory + self._summarization_service = summarization_service + self._diarization_engine = diarization_engine + self._start_time = time.time() + # Fallback to in-memory store if no database configured + self._memory_store: MeetingStore | None = ( + MeetingStore() if session_factory is None else None + ) + + # Audio writing infrastructure + self._meetings_dir = meetings_dir or (Path.home() / ".noteflow" / "meetings") + self._keystore = KeyringKeyStore() + self._crypto = AesGcmCryptoBox(self._keystore) + self._audio_writers: dict[str, MeetingAudioWriter] = {} + + # VAD and segmentation state per meeting + self._vad_instances: dict[str, StreamingVad] = {} + self._segmenters: dict[str, Segmenter] = {} + self._was_speaking: dict[str, bool] = {} + self._segment_counters: dict[str, int] = {} + self._stream_formats: dict[str, tuple[int, int]] = {} + self._active_streams: set[str] = set() + + # Partial transcription state per meeting + self._partial_buffers: dict[str, list[NDArray[np.float32]]] = {} + self._last_partial_time: dict[str, float] = {} + self._last_partial_text: dict[str, str] = {} + + # Streaming diarization state per meeting + self._diarization_turns: dict[str, list[SpeakerTurn]] = {} + self._diarization_stream_time: dict[str, float] = {} + self._diarization_streaming_failed: set[str] = set() + + # Track audio write failures to avoid log spam + self._audio_write_failed: set[str] = set() + + # Background diarization jobs + self._diarization_jobs: dict[str, _DiarizationJob] = {} + + @property + def asr_engine(self) -> FasterWhisperEngine | None: + """Get the ASR engine.""" + return self._asr_engine + + def set_asr_engine(self, engine: FasterWhisperEngine) -> None: + """Set the ASR engine.""" + self._asr_engine = engine + + @property + def diarization_engine(self) -> DiarizationEngine | None: + """Get the diarization engine.""" + return self._diarization_engine + + def set_diarization_engine(self, engine: DiarizationEngine) -> None: + """Set the diarization engine.""" + self._diarization_engine = engine + + def _use_database(self) -> bool: + """Check if database persistence is configured.""" + return self._session_factory is not None + + def _get_memory_store(self) -> MeetingStore: + """Get the in-memory store, raising if not configured.""" + if self._memory_store is None: + raise RuntimeError("Memory store not configured") + return self._memory_store + + def _create_uow(self) -> SqlAlchemyUnitOfWork: + """Create a new Unit of Work.""" + if self._session_factory is None: + raise RuntimeError("Database not configured") + return SqlAlchemyUnitOfWork(self._session_factory) + + def _init_streaming_state(self, meeting_id: str, next_segment_id: int) -> None: + """Initialize VAD, Segmenter, speaking state, and partial buffers for a meeting.""" + self._vad_instances[meeting_id] = StreamingVad() + self._segmenters[meeting_id] = Segmenter( + config=SegmenterConfig(sample_rate=self.DEFAULT_SAMPLE_RATE) + ) + self._was_speaking[meeting_id] = False + self._segment_counters[meeting_id] = next_segment_id + self._partial_buffers[meeting_id] = [] + self._last_partial_time[meeting_id] = time.time() + self._last_partial_text[meeting_id] = "" + self._diarization_turns[meeting_id] = [] + self._diarization_stream_time[meeting_id] = 0.0 + self._diarization_streaming_failed.discard(meeting_id) + if self._diarization_engine is not None: + self._diarization_engine.reset_streaming() + + def _cleanup_streaming_state(self, meeting_id: str) -> None: + """Clean up VAD, Segmenter, speaking state, and partial buffers for a meeting.""" + self._vad_instances.pop(meeting_id, None) + self._segmenters.pop(meeting_id, None) + self._was_speaking.pop(meeting_id, None) + self._segment_counters.pop(meeting_id, None) + self._stream_formats.pop(meeting_id, None) + self._partial_buffers.pop(meeting_id, None) + self._last_partial_time.pop(meeting_id, None) + self._last_partial_text.pop(meeting_id, None) + self._diarization_turns.pop(meeting_id, None) + self._diarization_stream_time.pop(meeting_id, None) + self._diarization_streaming_failed.discard(meeting_id) + + def _ensure_meeting_dek(self, meeting: Meeting) -> tuple[bytes, bytes, bool]: + """Ensure meeting has a DEK, generating one if needed. + + Args: + meeting: Meeting entity. + + Returns: + Tuple of (dek, wrapped_dek, needs_update). + """ + if meeting.wrapped_dek is None: + dek = self._crypto.generate_dek() + wrapped_dek = self._crypto.wrap_dek(dek) + meeting.wrapped_dek = wrapped_dek + return dek, wrapped_dek, True + wrapped_dek = meeting.wrapped_dek + dek = self._crypto.unwrap_dek(wrapped_dek) + return dek, wrapped_dek, False + + def _start_meeting_if_needed(self, meeting: Meeting) -> tuple[bool, str | None]: + """Start recording on meeting if not already recording. + + Args: + meeting: Meeting entity. + + Returns: + Tuple of (needs_update, error_message). + """ + if meeting.state == MeetingState.RECORDING: + return False, None + try: + meeting.start_recording() + return True, None + except ValueError as e: + return False, str(e) + + def _open_meeting_audio_writer( + self, + meeting_id: str, + dek: bytes, + wrapped_dek: bytes, + ) -> None: + """Open audio writer for a meeting. + + Args: + meeting_id: Meeting ID string. + dek: Data encryption key. + wrapped_dek: Wrapped DEK. + """ + writer = MeetingAudioWriter(self._crypto, self._meetings_dir) + writer.open( + meeting_id=meeting_id, + dek=dek, + wrapped_dek=wrapped_dek, + sample_rate=self.DEFAULT_SAMPLE_RATE, + ) + self._audio_writers[meeting_id] = writer + logger.info("Audio writer opened for meeting %s", meeting_id) + + async def _init_stream_session_db(self, meeting_id: str) -> _StreamSessionInit: + """Initialize stream session using database persistence. + + Args: + meeting_id: Meeting ID string. + + Returns: + Stream session initialization result. + """ + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.NOT_FOUND, + error_message=f"Meeting {meeting_id} not found", + ) + + dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) + recording_updated, error_msg = self._start_meeting_if_needed(meeting) + + if error_msg: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.INVALID_ARGUMENT, + error_message=error_msg, + ) + + if dek_updated or recording_updated: + await uow.meetings.update(meeting) + await uow.commit() + + next_segment_id = await uow.segments.get_next_segment_id(meeting.id) + self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) + self._init_streaming_state(meeting_id, next_segment_id) + + return _StreamSessionInit(next_segment_id=next_segment_id) + + def _init_stream_session_memory(self, meeting_id: str) -> _StreamSessionInit: + """Initialize stream session using in-memory store. + + Args: + meeting_id: Meeting ID string. + + Returns: + Stream session initialization result. + """ + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.NOT_FOUND, + error_message=f"Meeting {meeting_id} not found", + ) + + dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) + recording_updated, error_msg = self._start_meeting_if_needed(meeting) + + if error_msg: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.INVALID_ARGUMENT, + error_message=error_msg, + ) + + if dek_updated or recording_updated: + store.update(meeting) + + next_segment_id = meeting.next_segment_id + self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) + self._init_streaming_state(meeting_id, next_segment_id) + + return _StreamSessionInit(next_segment_id=next_segment_id) + + def _next_segment_id(self, meeting_id: str, fallback: int = 0) -> int: + """Get and increment the next segment id for a meeting.""" + next_id = self._segment_counters.get(meeting_id) + if next_id is None: + next_id = fallback + self._segment_counters[meeting_id] = next_id + 1 + return next_id + + def _normalize_stream_format( + self, + meeting_id: str, + sample_rate: int, + channels: int, + ) -> tuple[int, int]: + """Validate and persist stream audio format for a meeting.""" + normalized_rate = sample_rate or self.DEFAULT_SAMPLE_RATE + normalized_channels = channels or 1 + + if normalized_rate not in self.SUPPORTED_SAMPLE_RATES: + raise ValueError( + "Unsupported sample_rate " + f"{normalized_rate}; supported: {self.SUPPORTED_SAMPLE_RATES}" + ) + if normalized_channels < 1: + raise ValueError("channels must be >= 1") + + existing = self._stream_formats.get(meeting_id) + if existing and existing != (normalized_rate, normalized_channels): + raise ValueError("Stream audio format cannot change mid-stream") + + self._stream_formats.setdefault(meeting_id, (normalized_rate, normalized_channels)) + return normalized_rate, normalized_channels + + def _convert_audio_format( + self, + audio: NDArray[np.float32], + sample_rate: int, + channels: int, + ) -> NDArray[np.float32]: + """Downmix/resample audio to the server's expected format.""" + if channels > 1: + if audio.size % channels != 0: + raise ValueError("Audio buffer size is not divisible by channel count") + audio = audio.reshape(-1, channels).mean(axis=1) + + if sample_rate != self.DEFAULT_SAMPLE_RATE: + audio = self._resample_audio(audio, sample_rate, self.DEFAULT_SAMPLE_RATE) + + return audio + + @staticmethod + def _resample_audio( + audio: NDArray[np.float32], + src_rate: int, + dst_rate: int, + ) -> NDArray[np.float32]: + """Resample audio using linear interpolation.""" + if src_rate == dst_rate or audio.size == 0: + return audio + + ratio = dst_rate / src_rate + new_length = round(audio.shape[0] * ratio) + if new_length <= 0: + return np.array([], dtype=np.float32) + + old_indices = np.arange(audio.shape[0]) + new_indices = np.arange(new_length) / ratio + return np.interp(new_indices, old_indices, audio).astype(np.float32) + + def _close_audio_writer(self, meeting_id: str) -> None: + """Close and remove the audio writer for a meeting.""" + # Clean up write failure tracking + self._audio_write_failed.discard(meeting_id) + + if meeting_id not in self._audio_writers: + return + + try: + writer = self._audio_writers.pop(meeting_id) + writer.close() + logger.info( + "Audio writer closed for meeting %s: %d bytes written", + meeting_id, + writer.bytes_written, + ) + except Exception as e: + logger.error( + "Failed to close audio writer for meeting %s: %s", + meeting_id, + e, + ) + + async def _count_active_meetings_db(self) -> int: + """Count active meetings using database state.""" + async with self._create_uow() as uow: + total = 0 + for state in (MeetingState.RECORDING, MeetingState.STOPPING): + total += await uow.meetings.count_by_state(state) + return total + + async def StreamTranscription( + self, + request_iterator: AsyncIterator[noteflow_pb2.AudioChunk], + context: grpc.aio.ServicerContext, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Handle bidirectional audio streaming with persistence. + + Receives audio chunks from client, processes through ASR, + persists segments, and yields transcript updates. + """ + if self._asr_engine is None or not self._asr_engine.is_loaded: + await context.abort( + grpc.StatusCode.FAILED_PRECONDITION, + "ASR engine not loaded", + ) + + current_meeting_id: str | None = None + + try: + async for chunk in request_iterator: + meeting_id = chunk.meeting_id + if not meeting_id: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "meeting_id required", + ) + + # Initialize stream on first chunk + if current_meeting_id is None: + init_result = await self._init_stream_for_meeting(meeting_id, context) + if init_result is None: + return # Error already sent via context.abort + current_meeting_id = meeting_id + elif meeting_id != current_meeting_id: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "Stream may only contain a single meeting_id", + ) + + # Process audio chunk + async for update in self._process_stream_chunk(current_meeting_id, chunk, context): + yield update + + # Flush any remaining audio from segmenter + if current_meeting_id and current_meeting_id in self._segmenters: + async for update in self._flush_segmenter(current_meeting_id): + yield update + finally: + if current_meeting_id: + self._cleanup_streaming_state(current_meeting_id) + self._close_audio_writer(current_meeting_id) + self._active_streams.discard(current_meeting_id) + + async def _init_stream_for_meeting( + self, + meeting_id: str, + context: grpc.aio.ServicerContext, + ) -> _StreamSessionInit | None: + """Initialize streaming for a meeting. + + Args: + meeting_id: Meeting ID string. + context: gRPC context for error handling. + + Returns: + Initialization result, or None if error was sent. + """ + if meeting_id in self._active_streams: + await context.abort( + grpc.StatusCode.FAILED_PRECONDITION, + f"Meeting {meeting_id} already streaming", + ) + + self._active_streams.add(meeting_id) + + if self._use_database(): + init_result = await self._init_stream_session_db(meeting_id) + else: + init_result = self._init_stream_session_memory(meeting_id) + + if not init_result.success: + self._active_streams.discard(meeting_id) + await context.abort(init_result.error_code, init_result.error_message or "") + + return init_result + + async def _process_stream_chunk( + self, + meeting_id: str, + chunk: noteflow_pb2.AudioChunk, + context: grpc.aio.ServicerContext, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Process a single audio chunk from the stream. + + Args: + meeting_id: Meeting ID string. + chunk: Audio chunk from client. + context: gRPC context for error handling. + + Yields: + Transcript updates from processing. + """ + try: + sample_rate, channels = self._normalize_stream_format( + meeting_id, + chunk.sample_rate, + chunk.channels, + ) + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + + audio = self._decode_audio_chunk(chunk) + if audio is None: + return + + try: + audio = self._convert_audio_format(audio, sample_rate, channels) + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + + # Write to encrypted audio file + self._write_audio_chunk_safe(meeting_id, audio) + + # VAD-driven segmentation + async for update in self._process_audio_with_vad(meeting_id, audio): + yield update + + def _write_audio_chunk_safe( + self, + meeting_id: str, + audio: NDArray[np.float32], + ) -> None: + """Write audio chunk to encrypted file, logging errors without raising. + + Args: + meeting_id: Meeting ID string. + audio: Audio samples to write. + """ + if meeting_id not in self._audio_writers: + return + if meeting_id in self._audio_write_failed: + return # Already failed, skip to avoid log spam + try: + self._audio_writers[meeting_id].write_chunk(audio) + except Exception as e: + logger.error( + "Audio write failed for meeting %s: %s. Recording may be incomplete.", + meeting_id, + e, + ) + self._audio_write_failed.add(meeting_id) + + def _decode_audio_chunk( + self, + chunk: noteflow_pb2.AudioChunk, + ) -> NDArray[np.float32] | None: + """Decode audio chunk from protobuf to numpy array.""" + if not chunk.audio_data: + return None + try: + return np.frombuffer(chunk.audio_data, dtype=np.float32) + except (ValueError, struct.error) as e: + logger.warning("Failed to decode audio chunk: %s", e) + return None + + async def _process_audio_with_vad( + self, + meeting_id: str, + audio: NDArray[np.float32], + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Process audio chunk through VAD and Segmenter. + + Args: + meeting_id: Meeting identifier. + audio: Audio samples (float32, mono). + + Yields: + TranscriptUpdates for VAD events, partials, and finals. + """ + vad = self._vad_instances.get(meeting_id) + segmenter = self._segmenters.get(meeting_id) + + if vad is None or segmenter is None: + return + + # Get VAD decision + is_speech = vad.process_chunk(audio) + + # Streaming diarization (optional) + self._process_streaming_diarization(meeting_id, audio) + + # Emit VAD state change events + was_speaking = self._was_speaking.get(meeting_id, False) + if is_speech and not was_speaking: + # Speech started + yield self._create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_START) + self._was_speaking[meeting_id] = True + elif not is_speech and was_speaking: + # Speech ended + yield self._create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_END) + self._was_speaking[meeting_id] = False + + # Buffer audio for partial transcription + if is_speech: + if meeting_id in self._partial_buffers: + self._partial_buffers[meeting_id].append(audio.copy()) + + # Check if we should emit a partial + partial_update = await self._maybe_emit_partial(meeting_id) + if partial_update is not None: + yield partial_update + + # Process through segmenter + for audio_segment in segmenter.process_audio(audio, is_speech): + # Clear partial buffer when we get a final segment + self._clear_partial_buffer(meeting_id) + async for update in self._process_audio_segment( + meeting_id, + audio_segment.audio, + audio_segment.start_time, + ): + yield update + + async def _maybe_emit_partial( + self, + meeting_id: str, + ) -> noteflow_pb2.TranscriptUpdate | None: + """Check if it's time to emit a partial and generate if so. + + Args: + meeting_id: Meeting identifier. + + Returns: + TranscriptUpdate with partial text, or None if not time yet. + """ + if self._asr_engine is None or not self._asr_engine.is_loaded: + return None + + last_time = self._last_partial_time.get(meeting_id, 0) + now = time.time() + + # Check if enough time has passed since last partial + if now - last_time < self.PARTIAL_CADENCE_SECONDS: + return None + + # Check if we have enough audio + buffer = self._partial_buffers.get(meeting_id, []) + if not buffer: + return None + + # Concatenate buffered audio + combined = np.concatenate(buffer) + audio_seconds = len(combined) / self.DEFAULT_SAMPLE_RATE + + if audio_seconds < self.MIN_PARTIAL_AUDIO_SECONDS: + return None + + # Run inference on buffered audio (async to avoid blocking event loop) + results = await self._asr_engine.transcribe_async(combined) + partial_text = " ".join(result.text for result in results) + + # Clear buffer after inference to keep partials incremental and bounded + self._partial_buffers[meeting_id] = [] + + # Only emit if text changed (debounce) + last_text = self._last_partial_text.get(meeting_id, "") + if partial_text and partial_text != last_text: + self._last_partial_time[meeting_id] = now + self._last_partial_text[meeting_id] = partial_text + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=noteflow_pb2.UPDATE_TYPE_PARTIAL, + partial_text=partial_text, + server_timestamp=now, + ) + + self._last_partial_time[meeting_id] = now + return None + + def _clear_partial_buffer(self, meeting_id: str) -> None: + """Clear the partial buffer and reset state after a final is emitted. + + Args: + meeting_id: Meeting identifier. + """ + if meeting_id in self._partial_buffers: + self._partial_buffers[meeting_id] = [] + if meeting_id in self._last_partial_text: + self._last_partial_text[meeting_id] = "" + if meeting_id in self._last_partial_time: + self._last_partial_time[meeting_id] = time.time() + + def _process_streaming_diarization( + self, + meeting_id: str, + audio: NDArray[np.float32], + ) -> None: + """Process an audio chunk for streaming diarization (best-effort).""" + if self._diarization_engine is None: + return + if meeting_id in self._diarization_streaming_failed: + return + if audio.size == 0: + return + + if not self._diarization_engine.is_streaming_loaded: + try: + self._diarization_engine.load_streaming_model() + except (RuntimeError, ValueError) as exc: + logger.warning( + "Streaming diarization disabled for meeting %s: %s", + meeting_id, + exc, + ) + self._diarization_streaming_failed.add(meeting_id) + return + + stream_time = self._diarization_stream_time.get(meeting_id, 0.0) + duration = len(audio) / self.DEFAULT_SAMPLE_RATE + + try: + turns = self._diarization_engine.process_chunk( + audio, + sample_rate=self.DEFAULT_SAMPLE_RATE, + ) + except Exception as exc: + logger.warning( + "Streaming diarization failed for meeting %s: %s", + meeting_id, + exc, + ) + self._diarization_streaming_failed.add(meeting_id) + return + + diarization_turns = self._diarization_turns.setdefault(meeting_id, []) + for turn in turns: + diarization_turns.append( + SpeakerTurn( + speaker=turn.speaker, + start=turn.start + stream_time, + end=turn.end + stream_time, + confidence=turn.confidence, + ) + ) + + self._diarization_stream_time[meeting_id] = stream_time + duration + + async def _flush_segmenter( + self, + meeting_id: str, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Flush remaining audio from segmenter at stream end. + + Args: + meeting_id: Meeting identifier. + + Yields: + TranscriptUpdates for final segment. + """ + segmenter = self._segmenters.get(meeting_id) + if segmenter is None: + return + + # Clear partial buffer since we're flushing to final + self._clear_partial_buffer(meeting_id) + + final_segment = segmenter.flush() + if final_segment is not None: + async for update in self._process_audio_segment( + meeting_id, + final_segment.audio, + final_segment.start_time, + ): + yield update + + async def _process_audio_segment( + self, + meeting_id: str, + audio: NDArray[np.float32], + segment_start_time: float, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Process a complete audio segment through ASR. + + Args: + meeting_id: Meeting identifier. + audio: Complete audio segment. + segment_start_time: Segment start time in stream seconds. + + Yields: + TranscriptUpdates for transcribed segments. + """ + if len(audio) == 0 or self._asr_engine is None: + return + + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + return + + results = await self._asr_engine.transcribe_async(audio) + for result in results: + segment_id = self._next_segment_id( + meeting_id, + fallback=meeting.next_segment_id, + ) + segment = self._create_segment_from_asr( + meeting.id, + segment_id, + result, + segment_start_time, + ) + self._maybe_assign_speaker(meeting_id, segment) + meeting.add_segment(segment) + await uow.segments.add(meeting.id, segment) + await uow.commit() + yield self._segment_to_proto_update(meeting_id, segment) + else: + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + return + results = await self._asr_engine.transcribe_async(audio) + for result in results: + segment_id = self._next_segment_id( + meeting_id, + fallback=meeting.next_segment_id, + ) + segment = self._create_segment_from_asr( + meeting.id, + segment_id, + result, + segment_start_time, + ) + self._maybe_assign_speaker(meeting_id, segment) + store.add_segment(meeting_id, segment) + yield self._segment_to_proto_update(meeting_id, segment) + + def _create_vad_update( + self, + meeting_id: str, + update_type: int, + ) -> noteflow_pb2.TranscriptUpdate: + """Create a VAD event update. + + Args: + meeting_id: Meeting identifier. + update_type: VAD_START or VAD_END. + + Returns: + TranscriptUpdate with VAD event. + """ + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=update_type, + server_timestamp=time.time(), + ) + + def _create_segment_from_asr( + self, + meeting_id: MeetingId, + segment_id: int, + result: AsrResult, + segment_start_time: float, + ) -> Segment: + """Create a Segment from ASR result. + + Use converters to transform ASR DTO to domain entities. + """ + words = AsrConverter.result_to_domain_words(result) + if segment_start_time: + for word in words: + word.start_time += segment_start_time + word.end_time += segment_start_time + + return Segment( + segment_id=segment_id, + text=result.text, + start_time=result.start + segment_start_time, + end_time=result.end + segment_start_time, + meeting_id=meeting_id, + words=words, + language=result.language, + language_confidence=result.language_probability, + avg_logprob=result.avg_logprob, + no_speech_prob=result.no_speech_prob, + ) + + def _maybe_assign_speaker(self, meeting_id: str, segment: Segment) -> None: + """Assign speaker to a segment using streaming diarization turns (best-effort).""" + if self._diarization_engine is None: + return + if meeting_id in self._diarization_streaming_failed: + return + turns = self._diarization_turns.get(meeting_id) + if not turns: + return + + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + return + + segment.speaker_id = speaker_id + segment.speaker_confidence = confidence + + def _segment_to_proto_update( + self, + meeting_id: str, + segment: Segment, + ) -> noteflow_pb2.TranscriptUpdate: + """Convert domain Segment to protobuf TranscriptUpdate.""" + words = [ + noteflow_pb2.WordTiming( + word=w.word, + start_time=w.start_time, + end_time=w.end_time, + probability=w.probability, + ) + for w in segment.words + ] + final_segment = noteflow_pb2.FinalSegment( + segment_id=segment.segment_id, + text=segment.text, + start_time=segment.start_time, + end_time=segment.end_time, + words=words, + language=segment.language, + language_confidence=segment.language_confidence, + avg_logprob=segment.avg_logprob, + no_speech_prob=segment.no_speech_prob, + speaker_id=segment.speaker_id or "", + speaker_confidence=segment.speaker_confidence, + ) + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=noteflow_pb2.UPDATE_TYPE_FINAL, + segment=final_segment, + server_timestamp=time.time(), + ) + + async def CreateMeeting( + self, + request: noteflow_pb2.CreateMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + """Create a new meeting.""" + metadata = dict(request.metadata) if request.metadata else {} + + if self._use_database(): + async with self._create_uow() as uow: + meeting = Meeting.create(title=request.title, metadata=metadata) + saved = await uow.meetings.create(meeting) + await uow.commit() + return self._meeting_to_proto(saved) + else: + store = self._get_memory_store() + meeting = store.create(title=request.title, metadata=metadata) + return self._meeting_to_proto(meeting) + + async def StopMeeting( + self, + request: noteflow_pb2.StopMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + """Stop a meeting using graceful STOPPING -> STOPPED transition.""" + meeting_id = request.meeting_id + + # Close audio writer if open + if meeting_id in self._audio_writers: + self._close_audio_writer(meeting_id) + + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {meeting_id} not found", + ) + try: + # Graceful shutdown: RECORDING -> STOPPING -> STOPPED + meeting.begin_stopping() + meeting.stop_recording() + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + await uow.meetings.update(meeting) + await uow.commit() + return self._meeting_to_proto(meeting) + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {meeting_id} not found", + ) + try: + # Graceful shutdown: RECORDING -> STOPPING -> STOPPED + meeting.begin_stopping() + meeting.stop_recording() + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + store.update(meeting) + return self._meeting_to_proto(meeting) + + async def refine_speaker_diarization( + self, + meeting_id: str, + num_speakers: int | None = None, + ) -> int: + """Run post-meeting speaker diarization refinement. + + Loads the full meeting audio, runs offline diarization, and updates + segment speaker assignments. This provides higher quality speaker + labels than streaming diarization. + + Args: + meeting_id: Meeting UUID string. + num_speakers: Known number of speakers (None for auto-detect). + + Returns: + Number of segments updated with speaker labels. + + Raises: + RuntimeError: If diarization engine not available or meeting not found. + """ + turns = await asyncio.to_thread( + self._run_diarization_inference, + meeting_id, + num_speakers, + ) + + updated_count = await self._apply_diarization_turns(meeting_id, turns) + + logger.info( + "Updated %d segments with speaker labels for meeting %s", + updated_count, + meeting_id, + ) + + return updated_count + + def _run_diarization_inference( + self, + meeting_id: str, + num_speakers: int | None, + ) -> list[SpeakerTurn]: + """Run offline diarization and return speaker turns (blocking).""" + if self._diarization_engine is None: + raise RuntimeError("Diarization engine not configured") + + if not self._diarization_engine.is_offline_loaded: + logger.info("Loading offline diarization model for refinement...") + self._diarization_engine.load_offline_model() + + audio_reader = MeetingAudioReader(self._crypto, self._meetings_dir) + if not audio_reader.audio_exists(meeting_id): + raise RuntimeError("No audio file found for meeting") + + logger.info("Loading audio for meeting %s", meeting_id) + try: + audio_chunks = audio_reader.load_meeting_audio(meeting_id) + except (FileNotFoundError, ValueError) as exc: + raise RuntimeError(f"Failed to load audio: {exc}") from exc + + if not audio_chunks: + raise RuntimeError("No audio chunks loaded for meeting") + + sample_rate = audio_reader.sample_rate + all_audio = np.concatenate([chunk.frames for chunk in audio_chunks]) + + logger.info( + "Running offline diarization on %.2f seconds of audio", + len(all_audio) / sample_rate, + ) + + turns = self._diarization_engine.diarize_full( + all_audio, + sample_rate=sample_rate, + num_speakers=num_speakers, + ) + + logger.info("Diarization found %d speaker turns", len(turns)) + return list(turns) + + async def _apply_diarization_turns( + self, + meeting_id: str, + turns: list[SpeakerTurn], + ) -> int: + """Apply diarization turns to segments and return updated count.""" + updated_count = 0 + + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) + for segment in segments: + if segment.db_id is None: + continue + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + continue + await uow.segments.update_speaker( + segment.db_id, + speaker_id, + confidence, + ) + updated_count += 1 + await uow.commit() + else: + store = self._get_memory_store() + if meeting := store.get(meeting_id): + for segment in meeting.segments: + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + continue + segment.speaker_id = speaker_id + segment.speaker_confidence = confidence + updated_count += 1 + + return updated_count + + async def _collect_speaker_ids(self, meeting_id: str) -> list[str]: + """Collect distinct speaker IDs for a meeting.""" + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) + return sorted({s.speaker_id for s in segments if s.speaker_id}) + store = self._get_memory_store() + if meeting := store.get(meeting_id): + return sorted({s.speaker_id for s in meeting.segments if s.speaker_id}) + return [] + + async def ListMeetings( + self, + request: noteflow_pb2.ListMeetingsRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ListMeetingsResponse: + """List meetings.""" + limit = request.limit or 100 + offset = request.offset or 0 + sort_desc = request.sort_order != noteflow_pb2.SORT_ORDER_CREATED_ASC + + if self._use_database(): + states = [MeetingState(s) for s in request.states] if request.states else None + async with self._create_uow() as uow: + meetings, total = await uow.meetings.list_all( + states=states, + limit=limit, + offset=offset, + sort_desc=sort_desc, + ) + return noteflow_pb2.ListMeetingsResponse( + meetings=[self._meeting_to_proto(m, include_segments=False) for m in meetings], + total_count=total, + ) + else: + store = self._get_memory_store() + states = [MeetingState(s) for s in request.states] if request.states else None + meetings, total = store.list_all( + states=states, + limit=limit, + offset=offset, + sort_desc=sort_desc, + ) + return noteflow_pb2.ListMeetingsResponse( + meetings=[self._meeting_to_proto(m, include_segments=False) for m in meetings], + total_count=total, + ) + + async def GetMeeting( + self, + request: noteflow_pb2.GetMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + """Get meeting details.""" + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(request.meeting_id))) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + # Load segments if requested + if request.include_segments: + segments = await uow.segments.get_by_meeting(meeting.id) + meeting.segments = list(segments) + # Load summary if requested + if request.include_summary: + summary = await uow.summaries.get_by_meeting(meeting.id) + meeting.summary = summary + return self._meeting_to_proto( + meeting, + include_segments=request.include_segments, + include_summary=request.include_summary, + ) + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + return self._meeting_to_proto( + meeting, + include_segments=request.include_segments, + include_summary=request.include_summary, + ) + + async def DeleteMeeting( + self, + request: noteflow_pb2.DeleteMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DeleteMeetingResponse: + """Delete a meeting.""" + if self._use_database(): + async with self._create_uow() as uow: + success = await uow.meetings.delete(MeetingId(UUID(request.meeting_id))) + if success: + await uow.commit() + return noteflow_pb2.DeleteMeetingResponse(success=True) + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + store = self._get_memory_store() + success = store.delete(request.meeting_id) + if not success: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + return noteflow_pb2.DeleteMeetingResponse(success=True) + + async def GenerateSummary( + self, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + """Generate meeting summary using SummarizationService with fallback.""" + if self._use_database(): + return await self._generate_summary_db(request, context) + + return await self._generate_summary_memory(request, context) + + async def _generate_summary_db( + self, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + """Generate summary for a meeting stored in the database. + + The potentially slow summarization step is executed outside the UoW to + avoid holding database connections while waiting on LLMs. + """ + + meeting_id = MeetingId(UUID(request.meeting_id)) + + # 1) Load meeting, existing summary, and segments inside a short UoW + async with self._create_uow() as uow: + meeting = await uow.meetings.get(meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + + existing = await uow.summaries.get_by_meeting(meeting.id) + if existing and not request.force_regenerate: + return self._summary_to_proto(existing) + + segments = list(await uow.segments.get_by_meeting(meeting.id)) + + # 2) Run summarization outside DB transaction + summary = await self._summarize_or_placeholder(meeting_id, segments) + + # 3) Persist in a fresh UoW + async with self._create_uow() as uow: + saved = await uow.summaries.save(summary) + await uow.commit() + + return self._summary_to_proto(saved) + + async def _generate_summary_memory( + self, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + """Generate summary for meetings held in the in-memory store.""" + + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + + if meeting.summary and not request.force_regenerate: + return self._summary_to_proto(meeting.summary) + + summary = await self._summarize_or_placeholder(meeting.id, meeting.segments) + store.set_summary(request.meeting_id, summary) + return self._summary_to_proto(summary) + + async def _summarize_or_placeholder( + self, + meeting_id: MeetingId, + segments: list[Segment], + ) -> Summary: + """Try to summarize via service, fallback to placeholder on failure.""" + if self._summarization_service is None: + logger.warning("SummarizationService not configured; using placeholder summary") + return self._generate_placeholder_summary(meeting_id, segments) + + try: + result = await self._summarization_service.summarize( + meeting_id=meeting_id, + segments=segments, + ) + logger.info( + "Generated summary using %s (fallback=%s)", + result.provider_used, + result.fallback_used, + ) + return result.summary + except ProviderUnavailableError as exc: + logger.warning("Summarization provider unavailable; using placeholder: %s", exc) + except (TimeoutError, RuntimeError, ValueError) as exc: + logger.exception( + "Summarization failed (%s); using placeholder summary", type(exc).__name__ + ) + + return self._generate_placeholder_summary(meeting_id, segments) + + def _generate_placeholder_summary( + self, + meeting_id: MeetingId, + segments: list[Segment], + ) -> Summary: + """Generate a lightweight placeholder summary when summarization fails.""" + full_text = " ".join(s.text for s in segments) + executive = f"{full_text[:200]}..." if len(full_text) > 200 else full_text + executive = executive or "No transcript available." + + return Summary( + meeting_id=meeting_id, + executive_summary=executive, + key_points=[], + action_items=[], + model_version="placeholder-v0", + ) + + async def GetServerInfo( + self, + request: noteflow_pb2.ServerInfoRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ServerInfo: + """Get server information.""" + asr_model = "" + asr_ready = False + if self._asr_engine: + asr_ready = self._asr_engine.is_loaded + asr_model = self._asr_engine.model_size or "" + + diarization_enabled = self._diarization_engine is not None + diarization_ready = self._diarization_engine is not None and ( + self._diarization_engine.is_streaming_loaded + or self._diarization_engine.is_offline_loaded + ) + + if self._use_database(): + active = await self._count_active_meetings_db() + else: + active = self._get_memory_store().active_count + + return noteflow_pb2.ServerInfo( + version=self.VERSION, + asr_model=asr_model, + asr_ready=asr_ready, + supported_sample_rates=self.SUPPORTED_SAMPLE_RATES, + max_chunk_size=self.MAX_CHUNK_SIZE, + uptime_seconds=time.time() - self._start_time, + active_meetings=active, + diarization_enabled=diarization_enabled, + diarization_ready=diarization_ready, + ) + + def _meeting_to_proto( + self, + meeting: Meeting, + include_segments: bool = True, + include_summary: bool = True, + ) -> noteflow_pb2.Meeting: + """Convert domain Meeting to protobuf.""" + segments = [] + if include_segments: + for seg in meeting.segments: + words = [ + noteflow_pb2.WordTiming( + word=w.word, + start_time=w.start_time, + end_time=w.end_time, + probability=w.probability, + ) + for w in seg.words + ] + segments.append( + noteflow_pb2.FinalSegment( + segment_id=seg.segment_id, + text=seg.text, + start_time=seg.start_time, + end_time=seg.end_time, + words=words, + language=seg.language, + language_confidence=seg.language_confidence, + avg_logprob=seg.avg_logprob, + no_speech_prob=seg.no_speech_prob, + speaker_id=seg.speaker_id or "", + speaker_confidence=seg.speaker_confidence, + ) + ) + + summary = None + if include_summary and meeting.summary: + summary = self._summary_to_proto(meeting.summary) + + return noteflow_pb2.Meeting( + id=str(meeting.id), + title=meeting.title, + state=meeting.state.value, + created_at=meeting.created_at.timestamp(), + started_at=meeting.started_at.timestamp() if meeting.started_at else 0, + ended_at=meeting.ended_at.timestamp() if meeting.ended_at else 0, + duration_seconds=meeting.duration_seconds, + segments=segments, + summary=summary, + metadata=meeting.metadata, + ) + + def _summary_to_proto(self, summary: Summary) -> noteflow_pb2.Summary: + """Convert domain Summary to protobuf.""" + key_points = [ + noteflow_pb2.KeyPoint( + text=kp.text, + segment_ids=kp.segment_ids, + start_time=kp.start_time, + end_time=kp.end_time, + ) + for kp in summary.key_points + ] + action_items = [ + noteflow_pb2.ActionItem( + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date.timestamp() if ai.due_date is not None else 0, + priority=ai.priority, + segment_ids=ai.segment_ids, + ) + for ai in summary.action_items + ] + return noteflow_pb2.Summary( + meeting_id=str(summary.meeting_id), + executive_summary=summary.executive_summary, + key_points=key_points, + action_items=action_items, + generated_at=( + summary.generated_at.timestamp() if summary.generated_at is not None else 0 + ), + model_version=summary.model_version, + ) + + # ========================================================================= + # Annotation Methods + # ========================================================================= + + async def AddAnnotation( + self, + request: noteflow_pb2.AddAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + """Add an annotation to a meeting.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + annotation_type = self._proto_to_annotation_type(request.annotation_type) + from uuid import uuid4 + + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=MeetingId(UUID(request.meeting_id)), + annotation_type=annotation_type, + text=request.text, + start_time=request.start_time, + end_time=request.end_time, + segment_ids=list(request.segment_ids), + ) + + async with self._create_uow() as uow: + saved = await uow.annotations.add(annotation) + await uow.commit() + return self._annotation_to_proto(saved) + + async def GetAnnotation( + self, + request: noteflow_pb2.GetAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + """Get an annotation by ID.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + async with self._create_uow() as uow: + annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) + if annotation is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) + return self._annotation_to_proto(annotation) + + async def ListAnnotations( + self, + request: noteflow_pb2.ListAnnotationsRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ListAnnotationsResponse: + """List annotations for a meeting.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + async with self._create_uow() as uow: + meeting_id = MeetingId(UUID(request.meeting_id)) + # Check if time range filter is specified + if request.start_time > 0 or request.end_time > 0: + annotations = await uow.annotations.get_by_time_range( + meeting_id, + request.start_time, + request.end_time, + ) + else: + annotations = await uow.annotations.get_by_meeting(meeting_id) + + return noteflow_pb2.ListAnnotationsResponse( + annotations=[self._annotation_to_proto(a) for a in annotations] + ) + + async def UpdateAnnotation( + self, + request: noteflow_pb2.UpdateAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + """Update an existing annotation.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + async with self._create_uow() as uow: + annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) + if annotation is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) + + # Update fields if provided + if request.annotation_type != noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED: + annotation.annotation_type = self._proto_to_annotation_type(request.annotation_type) + if request.text: + annotation.text = request.text + if request.start_time > 0: + annotation.start_time = request.start_time + if request.end_time > 0: + annotation.end_time = request.end_time + if request.segment_ids: + annotation.segment_ids = list(request.segment_ids) + + updated = await uow.annotations.update(annotation) + await uow.commit() + return self._annotation_to_proto(updated) + + async def DeleteAnnotation( + self, + request: noteflow_pb2.DeleteAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DeleteAnnotationResponse: + """Delete an annotation.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + async with self._create_uow() as uow: + success = await uow.annotations.delete(AnnotationId(UUID(request.annotation_id))) + if success: + await uow.commit() + return noteflow_pb2.DeleteAnnotationResponse(success=True) + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) + + def _annotation_to_proto( + self, + annotation: Annotation, + ) -> noteflow_pb2.Annotation: + """Convert domain Annotation to protobuf.""" + return noteflow_pb2.Annotation( + id=str(annotation.id), + meeting_id=str(annotation.meeting_id), + annotation_type=self._annotation_type_to_proto(annotation.annotation_type), + text=annotation.text, + start_time=annotation.start_time, + end_time=annotation.end_time, + segment_ids=annotation.segment_ids, + created_at=annotation.created_at.timestamp(), + ) + + def _annotation_type_to_proto( + self, + annotation_type: AnnotationType, + ) -> int: + """Convert domain AnnotationType to protobuf enum.""" + mapping = { + AnnotationType.ACTION_ITEM: noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM, + AnnotationType.DECISION: noteflow_pb2.ANNOTATION_TYPE_DECISION, + AnnotationType.NOTE: noteflow_pb2.ANNOTATION_TYPE_NOTE, + AnnotationType.RISK: noteflow_pb2.ANNOTATION_TYPE_RISK, + } + return mapping.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED) + + def _proto_to_annotation_type( + self, + proto_type: int, + ) -> AnnotationType: + """Convert protobuf enum to domain AnnotationType.""" + mapping: dict[int, AnnotationType] = { + int(noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM): AnnotationType.ACTION_ITEM, + int(noteflow_pb2.ANNOTATION_TYPE_DECISION): AnnotationType.DECISION, + int(noteflow_pb2.ANNOTATION_TYPE_NOTE): AnnotationType.NOTE, + int(noteflow_pb2.ANNOTATION_TYPE_RISK): AnnotationType.RISK, + } + return mapping.get(proto_type, AnnotationType.NOTE) + + # ========================================================================= + # Export Methods + # ========================================================================= + + async def ExportTranscript( + self, + request: noteflow_pb2.ExportTranscriptRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ExportTranscriptResponse: + """Export meeting transcript to specified format.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Export requires database persistence", + ) + + # Map proto format to ExportFormat + fmt = self._proto_to_export_format(request.format) + + export_service = ExportService(self._create_uow()) + try: + content = await export_service.export_transcript( + MeetingId(UUID(request.meeting_id)), + fmt, + ) + exporter_info = export_service.get_supported_formats() + fmt_name = "" + fmt_ext = "" + for name, ext in exporter_info: + if fmt == ExportFormat.MARKDOWN and ext == ".md": + fmt_name, fmt_ext = name, ext + break + if fmt == ExportFormat.HTML and ext == ".html": + fmt_name, fmt_ext = name, ext + break + + return noteflow_pb2.ExportTranscriptResponse( + content=content, + format_name=fmt_name, + file_extension=fmt_ext, + ) + except ValueError as e: + await context.abort( + grpc.StatusCode.NOT_FOUND, + str(e), + ) + + def _proto_to_export_format(self, proto_format: int) -> ExportFormat: + """Convert protobuf ExportFormat to domain ExportFormat.""" + if proto_format == noteflow_pb2.EXPORT_FORMAT_HTML: + return ExportFormat.HTML + return ExportFormat.MARKDOWN # Default to Markdown + + # ========================================================================= + # Speaker Diarization Methods + # ========================================================================= + + async def RefineSpeakerDiarization( + self, + request: noteflow_pb2.RefineSpeakerDiarizationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.RefineSpeakerDiarizationResponse: + """Run post-meeting speaker diarization refinement. + + Loads the full meeting audio, runs offline diarization, and updates + segment speaker assignments. + """ + if self._diarization_engine is None: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Diarization not enabled on server" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + + try: + meeting_uuid = UUID(request.meeting_id) + except ValueError: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Invalid meeting_id" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(meeting_uuid)) + else: + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Meeting not found" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + meeting_state = meeting.state + if meeting_state in ( + MeetingState.UNSPECIFIED, + MeetingState.CREATED, + MeetingState.RECORDING, + MeetingState.STOPPING, + ): + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = ( + "Meeting must be stopped before refinement " + f"(state: {meeting_state.name.lower()})" + ) + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + + num_speakers = request.num_speakers if request.num_speakers > 0 else None + + job_id = str(uuid4()) + job = _DiarizationJob( + job_id=job_id, + meeting_id=request.meeting_id, + status=noteflow_pb2.JOB_STATUS_QUEUED, + ) + self._diarization_jobs[job_id] = job + + # Task runs in background, no need to await + task = asyncio.create_task(self._run_diarization_job(job_id, num_speakers)) + job.task = task + + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "" + response.job_id = job_id + response.status = noteflow_pb2.JOB_STATUS_QUEUED + return response + + async def _run_diarization_job(self, job_id: str, num_speakers: int | None) -> None: + job = self._diarization_jobs.get(job_id) + if job is None: + return + + job.status = noteflow_pb2.JOB_STATUS_RUNNING + job.updated_at = time.time() + + try: + updated_count = await self.refine_speaker_diarization( + meeting_id=job.meeting_id, + num_speakers=num_speakers, + ) + speaker_ids = await self._collect_speaker_ids(job.meeting_id) + job.segments_updated = updated_count + job.speaker_ids = speaker_ids + job.status = noteflow_pb2.JOB_STATUS_COMPLETED + except Exception as exc: + logger.exception("Diarization failed for meeting %s", job.meeting_id) + job.error_message = str(exc) + job.status = noteflow_pb2.JOB_STATUS_FAILED + finally: + job.updated_at = time.time() + + async def RenameSpeaker( + self, + request: noteflow_pb2.RenameSpeakerRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.RenameSpeakerResponse: + """Rename a speaker ID in all segments of a meeting. + + Updates all segments where speaker_id matches old_speaker_id + to use new_speaker_name instead. + """ + if not request.old_speaker_id or not request.new_speaker_name: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "old_speaker_id and new_speaker_name are required", + ) + + try: + meeting_uuid = UUID(request.meeting_id) + except ValueError: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "Invalid meeting_id", + ) + + updated_count = 0 + + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(meeting_uuid)) + + for segment in segments: + if segment.speaker_id == request.old_speaker_id and segment.db_id: + await uow.segments.update_speaker( + segment.db_id, + request.new_speaker_name, + segment.speaker_confidence, + ) + updated_count += 1 + + await uow.commit() + else: + store = self._get_memory_store() + if meeting := store.get(request.meeting_id): + for segment in meeting.segments: + if segment.speaker_id == request.old_speaker_id: + segment.speaker_id = request.new_speaker_name + updated_count += 1 + + return noteflow_pb2.RenameSpeakerResponse( + segments_updated=updated_count, + success=updated_count > 0, + ) + + async def GetDiarizationJobStatus( + self, + request: noteflow_pb2.GetDiarizationJobStatusRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DiarizationJobStatus: + """Return current status for a diarization job.""" + job = self._diarization_jobs.get(request.job_id) + if job is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + "Diarization job not found", + ) + return job.to_proto() +```` diff --git a/repomix.config.json b/repomix.config.json new file mode 100644 index 0000000..949807f --- /dev/null +++ b/repomix.config.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://repomix.com/schemas/latest/schema.json", + "input": { + "maxFileSize": 52428800 + }, + "output": { + "filePath": "repomix-output.md", + "style": "markdown", + "parsableStyle": false, + "fileSummary": true, + "directoryStructure": true, + "files": true, + "removeComments": false, + "removeEmptyLines": false, + "compress": false, + "topFilesLength": 5, + "showLineNumbers": false, + "truncateBase64": false, + "copyToClipboard": false, + "tokenCountTree": false, + "git": { + "sortByChanges": true, + "sortByChangesMaxCommits": 100, + "includeDiffs": false, + "includeLogs": false, + "includeLogsCount": 50 + } + }, + "include": ["src/", "tests/"], + "ignore": { + "useGitignore": true, + "useDefaultPatterns": true, + "customPatterns": [] + }, + "security": { + "enableSecurityCheck": true + }, + "tokenCount": { + "encoding": "o200k_base" + } +} \ No newline at end of file diff --git a/scripts/dev_watch_server.py b/scripts/dev_watch_server.py new file mode 100644 index 0000000..396e19d --- /dev/null +++ b/scripts/dev_watch_server.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +"""Run the gRPC server with auto-reload. + +Watches only the core server code (and alembic.ini) to avoid noisy directories. +""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +from watchfiles import PythonFilter, run_process + + +def run_server() -> None: + """Start the gRPC server process.""" + subprocess.run([sys.executable, "-m", "noteflow.grpc.server"], check=False) + + +def main() -> None: + root = Path(__file__).resolve().parents[1] + watch_paths = [root / "src" / "noteflow", root / "alembic.ini"] + existing_paths = [str(path) for path in watch_paths if path.exists()] or [str(root / "src" / "noteflow")] + + run_process( + *existing_paths, + target=run_server, + watch_filter=PythonFilter(), + ) + + +if __name__ == "__main__": + main() diff --git a/spikes/spike_02_audio_capture/capture_impl.py b/spikes/spike_02_audio_capture/capture_impl.py index 9e78f24..aa0c96e 100644 --- a/spikes/spike_02_audio_capture/capture_impl.py +++ b/spikes/spike_02_audio_capture/capture_impl.py @@ -54,12 +54,12 @@ class SoundDeviceCapture: AudioDeviceInfo( device_id=idx, name=dev["name"], - channels=dev["max_input_channels"], + channels=int(dev["max_input_channels"]), sample_rate=int(dev["default_samplerate"]), is_default=(idx == default_input), ) for idx, dev in enumerate(device_list) - if dev["max_input_channels"] > 0 + if int(dev.get("max_input_channels", 0)) > 0 ) return devices diff --git a/src/noteflow/application/services/trigger_service.py b/src/noteflow/application/services/trigger_service.py index ec82baa..48591bd 100644 --- a/src/noteflow/application/services/trigger_service.py +++ b/src/noteflow/application/services/trigger_service.py @@ -144,19 +144,20 @@ class TriggerService: Returns: TriggerAction to take. """ - # Check rate limit + # Check threshold_ignore first + if confidence < self._settings.threshold_ignore: + return TriggerAction.IGNORE + + # AUTO_START bypasses rate limit (high-confidence trigger should not be delayed) + if confidence >= self._settings.threshold_auto_start and self._settings.auto_start_enabled: + return TriggerAction.AUTO_START + + # Rate limit applies only to NOTIFY actions if self._last_prompt is not None: elapsed = now - self._last_prompt if elapsed < self._settings.rate_limit_seconds: return TriggerAction.IGNORE - # Apply thresholds - if confidence < self._settings.threshold_ignore: - return TriggerAction.IGNORE - - if confidence >= self._settings.threshold_auto_start and self._settings.auto_start_enabled: - return TriggerAction.AUTO_START - return TriggerAction.NOTIFY def _make_decision( diff --git a/src/noteflow/client/_trigger_mixin.py b/src/noteflow/client/_trigger_mixin.py index 45ee90c..8316d77 100644 --- a/src/noteflow/client/_trigger_mixin.py +++ b/src/noteflow/client/_trigger_mixin.py @@ -1,7 +1,7 @@ """Trigger detection mixin for NoteFlow client. Extracts trigger detection logic from app.py to keep file under 750 lines. -Handles meeting detection triggers via audio activity and foreground app monitoring. +Handles meeting detection triggers via app audio activity and calendar proximity. """ from __future__ import annotations @@ -16,11 +16,12 @@ from noteflow.application.services import TriggerService, TriggerServiceSettings from noteflow.config.settings import TriggerSettings, get_trigger_settings from noteflow.domain.triggers import TriggerAction, TriggerDecision from noteflow.infrastructure.triggers import ( - AudioActivityProvider, - AudioActivitySettings, - ForegroundAppProvider, - ForegroundAppSettings, + AppAudioProvider, + AppAudioSettings, + CalendarProvider, + CalendarSettings, ) +from noteflow.infrastructure.triggers.calendar import parse_calendar_events if TYPE_CHECKING: from noteflow.client.state import AppState @@ -34,8 +35,8 @@ class TriggerHost(Protocol): _state: AppState _trigger_settings: TriggerSettings | None _trigger_service: TriggerService | None - _audio_activity: AudioActivityProvider | None - _foreground_app: ForegroundAppProvider | None + _app_audio: AppAudioProvider | None + _calendar_provider: CalendarProvider | None _trigger_poll_interval: float _trigger_task: asyncio.Task | None @@ -59,8 +60,10 @@ class TriggerMixin: self._trigger_settings = get_trigger_settings() self._state.trigger_enabled = self._trigger_settings.trigger_enabled self._trigger_poll_interval = self._trigger_settings.trigger_poll_interval_seconds + meeting_apps = {app.lower() for app in self._trigger_settings.trigger_meeting_apps} + suppressed_apps = {app.lower() for app in self._trigger_settings.trigger_suppressed_apps} - audio_settings = AudioActivitySettings( + app_audio_settings = AppAudioSettings( enabled=self._trigger_settings.trigger_audio_enabled, threshold_db=self._trigger_settings.trigger_audio_threshold_db, window_seconds=self._trigger_settings.trigger_audio_window_seconds, @@ -68,23 +71,21 @@ class TriggerMixin: min_samples=self._trigger_settings.trigger_audio_min_samples, max_history=self._trigger_settings.trigger_audio_max_history, weight=self._trigger_settings.trigger_weight_audio, - ) - meeting_apps = {app.lower() for app in self._trigger_settings.trigger_meeting_apps} - suppressed_apps = {app.lower() for app in self._trigger_settings.trigger_suppressed_apps} - foreground_settings = ForegroundAppSettings( - enabled=self._trigger_settings.trigger_foreground_enabled, - weight=self._trigger_settings.trigger_weight_foreground, meeting_apps=meeting_apps, suppressed_apps=suppressed_apps, ) - - self._audio_activity = AudioActivityProvider( - self._state.level_provider, - audio_settings, + calendar_settings = CalendarSettings( + enabled=self._trigger_settings.trigger_calendar_enabled, + weight=self._trigger_settings.trigger_weight_calendar, + lookahead_minutes=self._trigger_settings.trigger_calendar_lookahead_minutes, + lookbehind_minutes=self._trigger_settings.trigger_calendar_lookbehind_minutes, + events=parse_calendar_events(self._trigger_settings.trigger_calendar_events), ) - self._foreground_app = ForegroundAppProvider(foreground_settings) + + self._app_audio = AppAudioProvider(app_audio_settings) + self._calendar_provider = CalendarProvider(calendar_settings) self._trigger_service = TriggerService( - providers=[self._audio_activity, self._foreground_app], + providers=[self._app_audio, self._calendar_provider], settings=TriggerServiceSettings( enabled=self._trigger_settings.trigger_enabled, auto_start_enabled=self._trigger_settings.trigger_auto_start, @@ -97,11 +98,7 @@ class TriggerMixin: def _should_keep_capture_running(self: TriggerHost) -> bool: """Return True if background audio capture should remain active.""" - if not self._trigger_settings: - return False - return ( - self._trigger_settings.trigger_enabled and self._trigger_settings.trigger_audio_enabled - ) + return False async def _trigger_check_loop(self: TriggerHost) -> None: """Background loop to check trigger conditions. @@ -121,10 +118,6 @@ class TriggerMixin: if not self._state.trigger_enabled or not self._trigger_service: continue - # Start background audio capture only when needed for triggers - if self._should_keep_capture_running(): - self._ensure_audio_capture() - # Evaluate triggers decision = self._trigger_service.evaluate() self._state.trigger_decision = decision diff --git a/src/noteflow/client/app.py b/src/noteflow/client/app.py index b3ec76a..cc31f1b 100644 --- a/src/noteflow/client/app.py +++ b/src/noteflow/client/app.py @@ -9,6 +9,8 @@ from __future__ import annotations import argparse import asyncio import logging +import queue +import threading import time from typing import TYPE_CHECKING, Final @@ -29,6 +31,7 @@ from noteflow.client.components import ( VuMeterComponent, ) from noteflow.client.state import AppState +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.config.settings import TriggerSettings, get_settings from noteflow.infrastructure.audio import ( MeetingAudioReader, @@ -38,7 +41,6 @@ from noteflow.infrastructure.audio import ( ) from noteflow.infrastructure.security import AesGcmCryptoBox, KeyringKeyStore from noteflow.infrastructure.summarization import create_summarization_service -from noteflow.infrastructure.triggers import AudioActivityProvider, ForegroundAppProvider if TYPE_CHECKING: import numpy as np @@ -52,6 +54,7 @@ if TYPE_CHECKING: ServerInfo, TranscriptSegment, ) + from noteflow.infrastructure.triggers import AppAudioProvider, CalendarProvider logger = logging.getLogger(__name__) @@ -105,8 +108,8 @@ class NoteFlowClientApp(TriggerMixin): # Trigger detection (M5) self._trigger_settings: TriggerSettings | None = None self._trigger_service: TriggerService | None = None - self._audio_activity: AudioActivityProvider | None = None - self._foreground_app: ForegroundAppProvider | None = None + self._app_audio: AppAudioProvider | None = None + self._calendar_provider: CalendarProvider | None = None self._trigger_poll_interval: float = 0.0 self._trigger_task: asyncio.Task | None = None @@ -114,6 +117,11 @@ class NoteFlowClientApp(TriggerMixin): self._record_btn: ft.ElevatedButton | None = None self._stop_btn: ft.ElevatedButton | None = None + # Audio frame consumer thread (process frames from audio callback thread) + self._audio_frame_queue: queue.Queue[tuple[NDArray[np.float32], float]] = queue.Queue() + self._audio_consumer_stop = threading.Event() + self._audio_consumer_thread: threading.Thread | None = None + def run(self) -> None: """Run the Flet application.""" ft.app(target=self._main) @@ -256,8 +264,8 @@ class NoteFlowClientApp(TriggerMixin): keystore = KeyringKeyStore() crypto = AesGcmCryptoBox(keystore) self._audio_reader = MeetingAudioReader(crypto, settings.meetings_dir) - except Exception: - logger.exception("Failed to initialize meeting audio reader") + except (OSError, ValueError, KeyError, RuntimeError) as exc: + logger.exception("Failed to initialize meeting audio reader: %s", exc) self._audio_reader = None return self._audio_reader @@ -276,8 +284,8 @@ class NoteFlowClientApp(TriggerMixin): except FileNotFoundError: logger.info("Audio file missing for meeting %s", meeting.id) return [] - except Exception: - logger.exception("Failed to load audio for meeting %s", meeting.id) + except (OSError, ValueError, RuntimeError) as exc: + logger.exception("Failed to load audio for meeting %s: %s", meeting.id, exc) return [] def _ensure_audio_capture(self) -> bool: @@ -294,12 +302,12 @@ class NoteFlowClientApp(TriggerMixin): self._audio_capture.start( device_id=None, on_frames=self._on_audio_frames, - sample_rate=16000, + sample_rate=DEFAULT_SAMPLE_RATE, channels=1, chunk_duration_ms=100, ) - except Exception: - logger.exception("Failed to start audio capture") + except (RuntimeError, OSError) as exc: + logger.exception("Failed to start audio capture: %s", exc) self._audio_capture = None return False @@ -404,6 +412,9 @@ class NoteFlowClientApp(TriggerMixin): self._state.recording = True + # Start audio frame consumer thread + self._start_audio_consumer() + # Clear audio buffer for new recording self._state.session_audio_buffer.clear() @@ -424,7 +435,10 @@ class NoteFlowClientApp(TriggerMixin): def _stop_recording(self) -> None: """Stop recording audio.""" - # Stop audio capture first + # Stop audio frame consumer thread + self._stop_audio_consumer() + + # Stop audio capture if self._audio_capture and not self._should_keep_capture_running(): self._audio_capture.stop() self._audio_capture = None @@ -467,7 +481,57 @@ class NoteFlowClientApp(TriggerMixin): frames: NDArray[np.float32], timestamp: float, ) -> None: - """Handle audio frames from capture. + """Handle audio frames from capture (called from audio thread). + + Enqueues frames for processing by consumer thread to avoid blocking + the real-time audio callback. + + Args: + frames: Audio samples. + timestamp: Capture timestamp. + """ + self._audio_frame_queue.put_nowait((frames.copy(), timestamp)) + + def _start_audio_consumer(self) -> None: + """Start the audio frame consumer thread.""" + if self._audio_consumer_thread is not None and self._audio_consumer_thread.is_alive(): + return + self._audio_consumer_stop.clear() + self._audio_consumer_thread = threading.Thread( + target=self._audio_consumer_loop, + daemon=True, + name="audio-consumer", + ) + self._audio_consumer_thread.start() + + def _stop_audio_consumer(self) -> None: + """Stop the audio frame consumer thread.""" + self._audio_consumer_stop.set() + if self._audio_consumer_thread is not None: + self._audio_consumer_thread.join(timeout=1.0) + self._audio_consumer_thread = None + # Drain remaining frames + while not self._audio_frame_queue.empty(): + try: + self._audio_frame_queue.get_nowait() + except queue.Empty: + break + + def _audio_consumer_loop(self) -> None: + """Consumer loop that processes audio frames from the queue.""" + while not self._audio_consumer_stop.is_set(): + try: + frames, timestamp = self._audio_frame_queue.get(timeout=0.1) + self._process_audio_frames(frames, timestamp) + except queue.Empty: + continue + + def _process_audio_frames( + self, + frames: NDArray[np.float32], + timestamp: float, + ) -> None: + """Process audio frames from consumer thread. Args: frames: Audio samples. @@ -477,20 +541,18 @@ class NoteFlowClientApp(TriggerMixin): if self._client and self._state.recording: self._client.send_audio(frames, timestamp) - # Buffer for playback (estimate duration from chunk size) + # Buffer for playback if self._state.recording: - duration = len(frames) / 16000.0 # Sample rate is 16kHz + duration = len(frames) / DEFAULT_SAMPLE_RATE self._state.session_audio_buffer.append( - TimestampedAudio(frames=frames.copy(), timestamp=timestamp, duration=duration) + TimestampedAudio(frames=frames, timestamp=timestamp, duration=duration) ) # Update VU meter if self._vu_meter: self._vu_meter.on_audio_frames(frames) - # Feed audio activity provider for trigger detection - if self._audio_activity: - self._audio_activity.update(frames, timestamp) + # Trigger detection uses system output + calendar; no mic-derived updates here. def _on_segment_click(self, segment_index: int) -> None: """Handle transcript segment click - seek playback to segment. @@ -549,8 +611,8 @@ class NoteFlowClientApp(TriggerMixin): segments = client.get_meeting_segments(meeting.id) annotations = client.list_annotations(meeting.id) audio_chunks = self._load_meeting_audio(meeting) - except Exception: - logger.exception("Failed to load meeting: %s", meeting.id) + except (ConnectionError, ValueError, OSError, RuntimeError) as exc: + logger.exception("Failed to load meeting %s: %s", meeting.id, exc) return # Apply results on UI thread to avoid race conditions @@ -667,10 +729,16 @@ class NoteFlowClientApp(TriggerMixin): self._trigger_task.cancel() self._trigger_task = None + # Stop audio consumer if running + self._stop_audio_consumer() + + if self._app_audio: + self._app_audio.close() + if self._audio_capture and not self._state.recording: try: self._audio_capture.stop() - except Exception: + except RuntimeError: logger.debug("Error stopping audio capture during shutdown", exc_info=True) self._audio_capture = None diff --git a/src/noteflow/client/components/meeting_library.py b/src/noteflow/client/components/meeting_library.py index 1bc29bf..dfe6564 100644 --- a/src/noteflow/client/components/meeting_library.py +++ b/src/noteflow/client/components/meeting_library.py @@ -7,6 +7,8 @@ Does not recreate any types - imports and uses existing ones. from __future__ import annotations import logging +import threading +import time from collections.abc import Callable from datetime import datetime from typing import TYPE_CHECKING @@ -29,6 +31,8 @@ class MeetingLibraryComponent: Uses NoteFlowClient.list_meetings() and export_transcript() for data. """ + DIARIZATION_POLL_INTERVAL_SECONDS: float = 2.0 + def __init__( self, state: AppState, @@ -50,6 +54,8 @@ class MeetingLibraryComponent: self._search_field: ft.TextField | None = None self._list_view: ft.ListView | None = None self._export_btn: ft.ElevatedButton | None = None + self._analyze_btn: ft.ElevatedButton | None = None + self._rename_btn: ft.ElevatedButton | None = None self._refresh_btn: ft.IconButton | None = None self._column: ft.Column | None = None @@ -57,6 +63,14 @@ class MeetingLibraryComponent: self._export_dialog: ft.AlertDialog | None = None self._format_dropdown: ft.Dropdown | None = None + # Analyze speakers dialog + self._analyze_dialog: ft.AlertDialog | None = None + self._num_speakers_field: ft.TextField | None = None + + # Rename speakers dialog + self._rename_dialog: ft.AlertDialog | None = None + self._rename_fields: dict[str, ft.TextField] = {} + def build(self) -> ft.Column: """Build meeting library UI. @@ -80,6 +94,18 @@ class MeetingLibraryComponent: on_click=self._show_export_dialog, disabled=True, ) + self._analyze_btn = ft.ElevatedButton( + "Refine Speakers", + icon=ft.Icons.RECORD_VOICE_OVER, + on_click=self._show_analyze_dialog, + disabled=True, + ) + self._rename_btn = ft.ElevatedButton( + "Rename Speakers", + icon=ft.Icons.EDIT, + on_click=self._show_rename_dialog, + disabled=True, + ) self._list_view = ft.ListView( spacing=5, @@ -95,7 +121,11 @@ class MeetingLibraryComponent: border=ft.border.all(1, ft.Colors.GREY_400), border_radius=8, ), - ft.Row([self._export_btn], alignment=ft.MainAxisAlignment.END), + ft.Row( + [self._analyze_btn, self._rename_btn, self._export_btn], + alignment=ft.MainAxisAlignment.END, + spacing=10, + ), ], spacing=10, ) @@ -192,9 +222,13 @@ class MeetingLibraryComponent: """ self._state.selected_meeting = meeting - # Enable export button + # Enable action buttons if self._export_btn: self._export_btn.disabled = False + if self._analyze_btn: + self._analyze_btn.disabled = not self._can_refine_speakers(meeting) + if self._rename_btn: + self._rename_btn.disabled = not self._can_refine_speakers(meeting) # Re-render to update selection self._render_meetings() @@ -304,3 +338,437 @@ class MeetingLibraryComponent: file_name=filename, allowed_extensions=[extension], ) + + # ========================================================================= + # Speaker Refinement Methods + # ========================================================================= + + def _show_analyze_dialog(self, e: ft.ControlEvent) -> None: + """Show speaker refinement dialog.""" + if not self._state.selected_meeting: + return + + if not self._can_refine_speakers(self._state.selected_meeting): + self._show_simple_dialog( + "Meeting still active", + ft.Text("Stop the meeting before refining speakers."), + ) + return + + self._num_speakers_field = ft.TextField( + label="Number of speakers (optional)", + hint_text="Leave empty for auto-detect", + width=200, + keyboard_type=ft.KeyboardType.NUMBER, + ) + + self._analyze_dialog = ft.AlertDialog( + title=ft.Text("Refine Speakers"), + content=ft.Column( + [ + ft.Text(f"Meeting: {self._state.selected_meeting.title}"), + ft.Text( + "Refine speaker labels using offline diarization.", + size=12, + color=ft.Colors.GREY_600, + ), + self._num_speakers_field, + ], + spacing=10, + tight=True, + ), + actions=[ + ft.TextButton("Cancel", on_click=self._close_analyze_dialog), + ft.ElevatedButton("Analyze", on_click=self._do_analyze), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + + if self._state._page: + self._state._page.dialog = self._analyze_dialog + self._analyze_dialog.open = True + self._state.request_update() + + def _close_analyze_dialog(self, e: ft.ControlEvent | None = None) -> None: + """Close the analyze dialog.""" + if self._analyze_dialog: + self._analyze_dialog.open = False + self._state.request_update() + + def _do_analyze(self, e: ft.ControlEvent) -> None: + """Perform speaker analysis.""" + if not self._state.selected_meeting: + return + + # Parse number of speakers (optional) + num_speakers: int | None = None + if self._num_speakers_field and self._num_speakers_field.value: + try: + num_speakers = int(self._num_speakers_field.value) + if num_speakers < 1: + num_speakers = None + except ValueError: + logger.debug("Invalid speaker count input '%s', using auto-detection", self._num_speakers_field.value) + + meeting_id = self._state.selected_meeting.id + self._close_analyze_dialog() + + client = self._get_client() + if not client: + logger.warning("No gRPC client available for analysis") + return + + # Show progress indicator + self._show_analysis_progress("Starting...") + + try: + result = client.refine_speaker_diarization(meeting_id, num_speakers) + except Exception as exc: + logger.error("Error analyzing speakers: %s", exc) + self._show_analysis_error(str(exc)) + return + + if not result: + self._show_analysis_error("Analysis failed - no response from server") + return + + if result.is_terminal: + if result.success: + self._show_analysis_result(result.segments_updated, result.speaker_ids) + else: + self._show_analysis_error(result.error_message or "Analysis failed") + return + + if not result.job_id: + self._show_analysis_error(result.error_message or "Server did not return job ID") + return + + # Job queued/running - poll for completion + self._show_analysis_progress(self._format_job_status(result.status)) + self._start_diarization_poll(result.job_id) + + def _show_analysis_progress(self, status: str = "Refining...") -> None: + """Show refinement in progress indicator.""" + if self._analyze_btn: + self._analyze_btn.disabled = True + self._analyze_btn.text = status + self._state.request_update() + + def _show_analysis_result(self, segments_updated: int, speaker_ids: list[str]) -> None: + """Show refinement success result. + + Args: + segments_updated: Number of segments with speaker labels. + speaker_ids: List of detected speaker IDs. + """ + if self._analyze_btn: + self._analyze_btn.disabled = False + self._analyze_btn.text = "Refine Speakers" + + speaker_list = ", ".join(speaker_ids) if speaker_ids else "None found" + + result_dialog = ft.AlertDialog( + title=ft.Text("Refinement Complete"), + content=ft.Column( + [ + ft.Text(f"Segments updated: {segments_updated}"), + ft.Text(f"Speakers found: {speaker_list}"), + ft.Text( + "Reload the meeting to see speaker labels.", + size=12, + color=ft.Colors.GREY_600, + italic=True, + ), + ], + spacing=5, + tight=True, + ), + actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))], + ) + + if self._state._page: + self._state._page.dialog = result_dialog + result_dialog.open = True + self._state.request_update() + + def _show_analysis_error(self, error_message: str) -> None: + """Show analysis error. + + Args: + error_message: Error description. + """ + if self._analyze_btn: + self._analyze_btn.disabled = False + self._analyze_btn.text = "Refine Speakers" + self._show_simple_dialog("Refinement Failed", ft.Text(error_message)) + + def _close_result_dialog(self, e: ft.ControlEvent) -> None: + """Close any result dialog.""" + if self._state._page and self._state._page.dialog: + self._state._page.dialog.open = False + self._state.request_update() + + def _start_diarization_poll(self, job_id: str) -> None: + """Start polling for diarization job completion.""" + page = self._state._page + if page and hasattr(page, "run_thread"): + page.run_thread(lambda: self._poll_diarization_job(job_id)) + return + + threading.Thread( + target=self._poll_diarization_job, + args=(job_id,), + daemon=True, + name="diarization-poll", + ).start() + + def _poll_diarization_job(self, job_id: str) -> None: + """Poll background diarization job until completion.""" + client = self._get_client() + if not client: + self._state.run_on_ui_thread( + lambda: self._show_analysis_error("No gRPC client available for polling") + ) + return + + while True: + result = client.get_diarization_job_status(job_id) + if not result: + self._state.run_on_ui_thread( + lambda: self._show_analysis_error("Failed to fetch diarization status") + ) + return + + if result.is_terminal: + if result.success: + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_result( + r.segments_updated, + r.speaker_ids, + ) + ) + else: + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_error( + r.error_message or "Diarization failed" + ) + ) + return + + # Update status text while running + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_progress(self._format_job_status(r.status)) + ) + time.sleep(self.DIARIZATION_POLL_INTERVAL_SECONDS) + + @staticmethod + def _format_job_status(status: str) -> str: + """Format job status for button label.""" + return { + "queued": "Queued...", + "running": "Refining...", + }.get(status, "Refining...") + + def _show_simple_dialog(self, title: str, content: ft.Control) -> None: + """Show a simple dialog with title, content, and OK button. + + Args: + title: Dialog title. + content: Dialog content control. + """ + dialog = ft.AlertDialog( + title=ft.Text(title), + content=content, + actions=[ft.TextButton("OK", on_click=self._close_result_dialog)], + ) + if self._state._page: + self._state._page.dialog = dialog + dialog.open = True + self._state.request_update() + + # ========================================================================= + # Speaker Rename Methods + # ========================================================================= + + def _show_rename_dialog(self, e: ft.ControlEvent) -> None: + """Show speaker rename dialog with current speaker IDs.""" + if not self._state.selected_meeting: + return + + if not self._can_refine_speakers(self._state.selected_meeting): + self._show_simple_dialog( + "Meeting still active", + ft.Text("Stop the meeting before renaming speakers."), + ) + return + + client = self._get_client() + if not client: + logger.warning("No gRPC client available") + return + + # Get segments to extract distinct speaker IDs + meeting_id = self._state.selected_meeting.id + segments = client.get_meeting_segments(meeting_id) + + # Extract distinct speaker IDs + speaker_ids = sorted({s.speaker_id for s in segments if s.speaker_id}) + + if not speaker_ids: + self._show_no_speakers_message() + return + + # Create text fields for each speaker + self._rename_fields.clear() + speaker_controls: list[ft.Control] = [] + + for speaker_id in speaker_ids: + field = ft.TextField( + label=f"{speaker_id}", + hint_text="Enter new name", + width=200, + ) + self._rename_fields[speaker_id] = field + speaker_controls.append( + ft.Row( + [ + ft.Text(speaker_id, width=120, size=12), + ft.Icon(ft.Icons.ARROW_RIGHT, size=16), + field, + ], + alignment=ft.MainAxisAlignment.START, + ) + ) + + self._rename_dialog = ft.AlertDialog( + title=ft.Text("Rename Speakers"), + content=ft.Column( + [ + ft.Text(f"Meeting: {self._state.selected_meeting.title}"), + ft.Text( + "Enter new names for speakers (leave blank to keep current):", + size=12, + color=ft.Colors.GREY_600, + ), + ft.Divider(), + *speaker_controls, + ], + spacing=10, + scroll=ft.ScrollMode.AUTO, + height=300, + ), + actions=[ + ft.TextButton("Cancel", on_click=self._close_rename_dialog), + ft.ElevatedButton("Apply", on_click=self._do_rename), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + + if self._state._page: + self._state._page.dialog = self._rename_dialog + self._rename_dialog.open = True + self._state.request_update() + + def _close_rename_dialog(self, e: ft.ControlEvent | None = None) -> None: + """Close the rename dialog.""" + if self._rename_dialog: + self._rename_dialog.open = False + self._state.request_update() + + def _show_no_speakers_message(self) -> None: + """Show message when no speakers found.""" + self._show_simple_dialog( + "No Speakers Found", + ft.Text( + "This meeting has no speaker labels. " + "Run 'Refine Speakers' first to identify speakers." + ), + ) + + def _do_rename(self, e: ft.ControlEvent) -> None: + """Apply speaker renames.""" + if not self._state.selected_meeting: + return + + client = self._get_client() + if not client: + logger.warning("No gRPC client available") + return + + meeting_id = self._state.selected_meeting.id + self._close_rename_dialog() + + # Collect renames (only non-empty values) + renames: list[tuple[str, str]] = [] + for old_id, field in self._rename_fields.items(): + new_name = (field.value or "").strip() + if new_name and new_name != old_id: + renames.append((old_id, new_name)) + + if not renames: + return + + # Apply renames + total_updated = 0 + errors: list[str] = [] + + for old_id, new_name in renames: + try: + result = client.rename_speaker(meeting_id, old_id, new_name) + if result and result.success: + total_updated += result.segments_updated + else: + errors.append(f"{old_id}: rename failed") + except Exception as exc: + logger.error("Error renaming speaker %s: %s", old_id, exc) + errors.append(f"{old_id}: {exc}") + + # Show result + if errors: + self._show_rename_errors(errors) + else: + self._show_rename_success(total_updated, len(renames)) + + def _show_rename_success(self, segments_updated: int, speakers_renamed: int) -> None: + """Show rename success message. + + Args: + segments_updated: Total number of segments updated. + speakers_renamed: Number of speakers renamed. + """ + success_dialog = ft.AlertDialog( + title=ft.Text("Rename Complete"), + content=ft.Column( + [ + ft.Text(f"Renamed {speakers_renamed} speaker(s)"), + ft.Text(f"Updated {segments_updated} segment(s)"), + ft.Text( + "Reload the meeting to see the new speaker names.", + size=12, + color=ft.Colors.GREY_600, + italic=True, + ), + ], + spacing=5, + tight=True, + ), + actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))], + ) + + if self._state._page: + self._state._page.dialog = success_dialog + success_dialog.open = True + self._state.request_update() + + def _show_rename_errors(self, errors: list[str]) -> None: + """Show rename errors. + + Args: + errors: List of error messages. + """ + self._show_simple_dialog("Rename Errors", ft.Text("\n".join(errors))) + + @staticmethod + def _can_refine_speakers(meeting: MeetingInfo) -> bool: + """Return True when meeting is stopped/completed and safe to refine/rename.""" + return meeting.state in {"stopped", "completed", "error"} diff --git a/src/noteflow/client/components/playback_controls.py b/src/noteflow/client/components/playback_controls.py index 684cc1f..f1de798 100644 --- a/src/noteflow/client/components/playback_controls.py +++ b/src/noteflow/client/components/playback_controls.py @@ -1,19 +1,17 @@ """Playback controls component with play/pause/stop and timeline. Uses SoundDevicePlayback from infrastructure.audio and format_timestamp from _formatting. -Does not recreate any types - imports and uses existing ones. +Receives position updates via callback from SoundDevicePlayback. """ from __future__ import annotations import logging from collections.abc import Callable -from typing import TYPE_CHECKING, Final +from typing import TYPE_CHECKING import flet as ft -from noteflow.client.components._thread_mixin import BackgroundWorkerMixin - # REUSE existing types - do not recreate from noteflow.infrastructure.audio import PlaybackState from noteflow.infrastructure.export._formatting import format_timestamp @@ -23,13 +21,12 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -POSITION_POLL_INTERVAL: Final[float] = 0.1 # 100ms for smooth timeline updates - -class PlaybackControlsComponent(BackgroundWorkerMixin): +class PlaybackControlsComponent: """Audio playback controls with play/pause/stop and timeline. Uses SoundDevicePlayback from state and format_timestamp from _formatting. + Receives position updates via callback from SoundDevicePlayback. """ def __init__( @@ -45,7 +42,7 @@ class PlaybackControlsComponent(BackgroundWorkerMixin): """ self._state = state self._on_position_change = on_position_change - self._init_worker() + self._active = False # UI elements self._play_btn: ft.IconButton | None = None @@ -159,20 +156,20 @@ class PlaybackControlsComponent(BackgroundWorkerMixin): if playback.state == PlaybackState.PLAYING: playback.pause() - self._stop_polling() + self._stop_position_updates() self._update_play_button(playing=False) elif playback.state == PlaybackState.PAUSED: playback.resume() - self._start_polling() + self._start_position_updates() self._update_play_button(playing=True) elif buffer := self._state.session_audio_buffer: playback.play(buffer) - self._start_polling() + self._start_position_updates() self._update_play_button(playing=True) def _on_stop_click(self, e: ft.ControlEvent) -> None: """Handle stop button click.""" - self._stop_polling() + self._stop_position_updates() self._state.playback.stop() self._state.playback_position = 0.0 self._update_play_button(playing=False) @@ -195,37 +192,47 @@ class PlaybackControlsComponent(BackgroundWorkerMixin): self._play_btn.tooltip = "Play" self._state.request_update() - def _start_polling(self) -> None: - """Start position polling thread.""" - self._start_worker(self._poll_loop, "PlaybackPositionPoll") + def _start_position_updates(self) -> None: + """Start receiving position updates via callback.""" + if self._active: + return + self._active = True + self._state.playback.add_position_callback(self._on_position_update) - def _stop_polling(self) -> None: - """Stop position polling thread.""" - self._stop_worker() + def _stop_position_updates(self) -> None: + """Stop receiving position updates.""" + if not self._active: + return + self._active = False + self._state.playback.remove_position_callback(self._on_position_update) - def _poll_loop(self) -> None: - """Background polling loop for position updates.""" - while self._should_run(): - playback = self._state.playback + def _on_position_update(self, position: float) -> None: + """Handle position update from playback callback. - if playback.state == PlaybackState.PLAYING: - position = playback.current_position - self._state.playback_position = position - self._state.run_on_ui_thread(self._update_position_display) + Called from audio thread - schedules UI work on UI thread. + """ + if not self._active: + return - # Notify callback - if self._on_position_change: - try: - self._on_position_change(position) - except Exception as e: - logger.error("Position change callback error: %s", e) + playback = self._state.playback - elif playback.state == PlaybackState.STOPPED: - # Playback finished - update UI and stop polling - self._state.run_on_ui_thread(self._on_playback_finished) - break + # Check if playback stopped + if playback.state == PlaybackState.STOPPED: + self._active = False + self._state.playback.remove_position_callback(self._on_position_update) + self._state.run_on_ui_thread(self._on_playback_finished) + return - self._wait_interval(POSITION_POLL_INTERVAL) + # Update position state + self._state.playback_position = position + self._state.run_on_ui_thread(self._update_position_display) + + # Notify external callback + if self._on_position_change: + try: + self._on_position_change(position) + except Exception as e: + logger.error("Position change callback error: %s", e) def _update_position_display(self) -> None: """Update position display elements (UI thread only).""" diff --git a/src/noteflow/client/components/playback_sync.py b/src/noteflow/client/components/playback_sync.py index ea32168..325da9b 100644 --- a/src/noteflow/client/components/playback_sync.py +++ b/src/noteflow/client/components/playback_sync.py @@ -1,15 +1,14 @@ """Playback-transcript synchronization controller. -Polls playback position and updates transcript highlight state. -Follows RecordingTimerComponent pattern for background threading. +Uses playback position callbacks to update transcript highlight state. +No polling thread - receives position updates directly from SoundDevicePlayback. """ from __future__ import annotations import logging -import threading from collections.abc import Callable -from typing import TYPE_CHECKING, Final +from typing import TYPE_CHECKING from noteflow.infrastructure.audio import PlaybackState @@ -18,14 +17,12 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -POSITION_POLL_INTERVAL: Final[float] = 0.1 # 100ms for smooth highlighting - class PlaybackSyncController: """Synchronize playback position with transcript highlighting. - Polls playback position and updates state.highlighted_segment_index. - Triggers UI updates via state.run_on_ui_thread(). + Receives position updates via callback from SoundDevicePlayback. + Updates state.highlighted_segment_index and triggers UI updates. """ def __init__( @@ -41,46 +38,46 @@ class PlaybackSyncController: """ self._state = state self._on_highlight_change = on_highlight_change - self._sync_thread: threading.Thread | None = None - self._stop_event = threading.Event() + self._active = False def start(self) -> None: - """Start position sync polling.""" - if self._sync_thread and self._sync_thread.is_alive(): + """Start position sync by registering callback with playback.""" + if self._active: return - self._stop_event.clear() - self._sync_thread = threading.Thread( - target=self._sync_loop, - daemon=True, - name="PlaybackSyncController", - ) - self._sync_thread.start() + self._active = True + self._state.playback.add_position_callback(self._on_position_update) logger.debug("Started playback sync controller") def stop(self) -> None: - """Stop position sync polling.""" - self._stop_event.set() - if self._sync_thread: - self._sync_thread.join(timeout=2.0) - self._sync_thread = None + """Stop position sync by unregistering callback.""" + if not self._active: + return + + self._active = False + self._state.playback.remove_position_callback(self._on_position_update) + + # Clear highlight when stopped + if self._state.highlighted_segment_index is not None: + self._state.highlighted_segment_index = None + self._state.run_on_ui_thread(self._notify_highlight_change) + logger.debug("Stopped playback sync controller") - def _sync_loop(self) -> None: - """Background sync loop - polls position and updates highlight.""" - while not self._stop_event.is_set(): - playback = self._state.playback + def _on_position_update(self, position: float) -> None: + """Handle position update from playback callback. - if playback.state == PlaybackState.PLAYING: - position = playback.current_position - self._update_position(position) - elif playback.state == PlaybackState.STOPPED: - # Clear highlight when stopped - if self._state.highlighted_segment_index is not None: - self._state.highlighted_segment_index = None - self._state.run_on_ui_thread(self._notify_highlight_change) + Called from audio thread - schedules UI work on UI thread. + """ + if not self._active: + return - self._stop_event.wait(POSITION_POLL_INTERVAL) + # Check if playback stopped + if self._state.playback.state == PlaybackState.STOPPED: + self.stop() + return + + self._update_position(position) def _update_position(self, position: float) -> None: """Update state with current position and find matching segment.""" diff --git a/src/noteflow/client/components/transcript.py b/src/noteflow/client/components/transcript.py index 6d18604..37b3b4b 100644 --- a/src/noteflow/client/components/transcript.py +++ b/src/noteflow/client/components/transcript.py @@ -6,8 +6,8 @@ Does not recreate any types - imports and uses existing ones. from __future__ import annotations -from collections.abc import Callable import hashlib +from collections.abc import Callable from typing import TYPE_CHECKING import flet as ft diff --git a/src/noteflow/config/__init__.py b/src/noteflow/config/__init__.py index 6881586..9a0f2cc 100644 --- a/src/noteflow/config/__init__.py +++ b/src/noteflow/config/__init__.py @@ -1,5 +1,14 @@ """NoteFlow configuration module.""" +from .constants import DEFAULT_GRPC_PORT, DEFAULT_SAMPLE_RATE, MAX_GRPC_MESSAGE_SIZE from .settings import Settings, TriggerSettings, get_settings, get_trigger_settings -__all__ = ["Settings", "TriggerSettings", "get_settings", "get_trigger_settings"] +__all__ = [ + "DEFAULT_GRPC_PORT", + "DEFAULT_SAMPLE_RATE", + "MAX_GRPC_MESSAGE_SIZE", + "Settings", + "TriggerSettings", + "get_settings", + "get_trigger_settings", +] diff --git a/src/noteflow/config/constants.py b/src/noteflow/config/constants.py new file mode 100644 index 0000000..3d0f4a5 --- /dev/null +++ b/src/noteflow/config/constants.py @@ -0,0 +1,23 @@ +"""Centralized constants for NoteFlow. + +This module provides shared constants used across the codebase to avoid +magic numbers and ensure consistency. +""" + +from __future__ import annotations + +from typing import Final + +# Audio constants +DEFAULT_SAMPLE_RATE: Final[int] = 16000 +"""Default audio sample rate in Hz (16 kHz).""" + +POSITION_UPDATE_INTERVAL: Final[float] = 0.1 +"""Playback position update interval in seconds (100ms).""" + +# gRPC constants +DEFAULT_GRPC_PORT: Final[int] = 50051 +"""Default gRPC server port.""" + +MAX_GRPC_MESSAGE_SIZE: Final[int] = 100 * 1024 * 1024 +"""Maximum gRPC message size in bytes (100 MB).""" diff --git a/src/noteflow/config/settings.py b/src/noteflow/config/settings.py index 7f130ac..02c7e53 100644 --- a/src/noteflow/config/settings.py +++ b/src/noteflow/config/settings.py @@ -57,10 +57,10 @@ class TriggerSettings(BaseSettings): Field(default=0.80, ge=0.0, le=1.0, description="Confidence to auto-start recording"), ] - # Audio trigger tuning + # App audio trigger tuning (system output from whitelisted apps) trigger_audio_enabled: Annotated[ bool, - Field(default=True, description="Enable audio activity detection"), + Field(default=True, description="Enable app audio activity detection"), ] trigger_audio_threshold_db: Annotated[ float, @@ -83,6 +83,27 @@ class TriggerSettings(BaseSettings): Field(default=50, ge=10, le=1000, description="Max audio activity samples to retain"), ] + # Calendar trigger tuning (optional integration) + trigger_calendar_enabled: Annotated[ + bool, + Field(default=False, description="Enable calendar-based trigger detection"), + ] + trigger_calendar_lookahead_minutes: Annotated[ + int, + Field(default=5, ge=0, le=60, description="Minutes before event start to trigger"), + ] + trigger_calendar_lookbehind_minutes: Annotated[ + int, + Field(default=5, ge=0, le=60, description="Minutes after event start to keep triggering"), + ] + trigger_calendar_events: Annotated[ + list[dict[str, object]], + Field( + default_factory=list, + description="Calendar events as JSON list of {start, end, title}", + ), + ] + # Foreground app trigger tuning trigger_foreground_enabled: Annotated[ bool, @@ -148,6 +169,28 @@ class TriggerSettings(BaseSettings): return [str(item).strip() for item in parsed if str(item).strip()] return [item.strip() for item in value.split(",") if item.strip()] + @field_validator("trigger_calendar_events", mode="before") + @classmethod + def _parse_calendar_events(cls, value: object) -> list[dict[str, object]]: + if value is None: + return [] + if isinstance(value, str): + stripped = value.strip() + if not stripped: + return [] + try: + parsed = json.loads(stripped) + except json.JSONDecodeError: + return [] + if isinstance(parsed, list): + return [item for item in parsed if isinstance(item, dict)] + return [parsed] if isinstance(parsed, dict) else [] + if isinstance(value, dict): + return [value] + if isinstance(value, list): + return [item for item in value if isinstance(item, dict)] + return [] + class Settings(TriggerSettings): """Application settings loaded from environment variables. diff --git a/src/noteflow/domain/triggers/entities.py b/src/noteflow/domain/triggers/entities.py index e800b5b..f52770c 100644 --- a/src/noteflow/domain/triggers/entities.py +++ b/src/noteflow/domain/triggers/entities.py @@ -72,12 +72,5 @@ class TriggerDecision: @property def detected_app(self) -> str | None: - """Get the detected app name from foreground signal if present.""" - return next( - ( - signal.app_name - for signal in self.signals - if signal.source == TriggerSource.FOREGROUND_APP and signal.app_name - ), - None, - ) + """Get the detected app name from any signal if present.""" + return next((signal.app_name for signal in self.signals if signal.app_name), None) diff --git a/src/noteflow/grpc/__init__.py b/src/noteflow/grpc/__init__.py index ccd4be3..63d2dee 100644 --- a/src/noteflow/grpc/__init__.py +++ b/src/noteflow/grpc/__init__.py @@ -4,9 +4,11 @@ from noteflow.domain.value_objects import MeetingState from .client import ( AnnotationInfo, + DiarizationResult, ExportResult, MeetingInfo, NoteFlowClient, + RenameSpeakerResult, ServerInfo, TranscriptSegment, ) @@ -15,12 +17,14 @@ from .service import NoteFlowServicer __all__ = [ "AnnotationInfo", + "DiarizationResult", "ExportResult", "MeetingInfo", "MeetingState", "MeetingStore", "NoteFlowClient", "NoteFlowServicer", + "RenameSpeakerResult", "ServerInfo", "TranscriptSegment", ] diff --git a/src/noteflow/grpc/_mixins/__init__.py b/src/noteflow/grpc/_mixins/__init__.py new file mode 100644 index 0000000..90854d2 --- /dev/null +++ b/src/noteflow/grpc/_mixins/__init__.py @@ -0,0 +1,17 @@ +"""gRPC service mixins for NoteFlowServicer.""" + +from .annotation import AnnotationMixin +from .diarization import DiarizationMixin +from .export import ExportMixin +from .meeting import MeetingMixin +from .streaming import StreamingMixin +from .summarization import SummarizationMixin + +__all__ = [ + "AnnotationMixin", + "DiarizationMixin", + "ExportMixin", + "MeetingMixin", + "StreamingMixin", + "SummarizationMixin", +] diff --git a/src/noteflow/grpc/_mixins/annotation.py b/src/noteflow/grpc/_mixins/annotation.py new file mode 100644 index 0000000..7e7685a --- /dev/null +++ b/src/noteflow/grpc/_mixins/annotation.py @@ -0,0 +1,161 @@ +"""Annotation management mixin for gRPC service.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING +from uuid import UUID, uuid4 + +import grpc.aio + +from noteflow.domain.entities import Annotation +from noteflow.domain.value_objects import AnnotationId, MeetingId + +from ..proto import noteflow_pb2 +from .converters import annotation_to_proto, proto_to_annotation_type + +if TYPE_CHECKING: + from .protocols import ServicerHost + + +class AnnotationMixin: + """Mixin providing annotation CRUD functionality. + + Requires host to implement ServicerHost protocol. + Annotations require database persistence. + """ + + async def AddAnnotation( + self: ServicerHost, + request: noteflow_pb2.AddAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + """Add an annotation to a meeting.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + annotation_type = proto_to_annotation_type(request.annotation_type) + + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=MeetingId(UUID(request.meeting_id)), + annotation_type=annotation_type, + text=request.text, + start_time=request.start_time, + end_time=request.end_time, + segment_ids=list(request.segment_ids), + ) + + async with self._create_uow() as uow: + saved = await uow.annotations.add(annotation) + await uow.commit() + return annotation_to_proto(saved) + + async def GetAnnotation( + self: ServicerHost, + request: noteflow_pb2.GetAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + """Get an annotation by ID.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + async with self._create_uow() as uow: + annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) + if annotation is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) + return annotation_to_proto(annotation) + + async def ListAnnotations( + self: ServicerHost, + request: noteflow_pb2.ListAnnotationsRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ListAnnotationsResponse: + """List annotations for a meeting.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + async with self._create_uow() as uow: + meeting_id = MeetingId(UUID(request.meeting_id)) + # Check if time range filter is specified + if request.start_time > 0 or request.end_time > 0: + annotations = await uow.annotations.get_by_time_range( + meeting_id, + request.start_time, + request.end_time, + ) + else: + annotations = await uow.annotations.get_by_meeting(meeting_id) + + return noteflow_pb2.ListAnnotationsResponse( + annotations=[annotation_to_proto(a) for a in annotations] + ) + + async def UpdateAnnotation( + self: ServicerHost, + request: noteflow_pb2.UpdateAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + """Update an existing annotation.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + async with self._create_uow() as uow: + annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) + if annotation is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) + + # Update fields if provided + if request.annotation_type != noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED: + annotation.annotation_type = proto_to_annotation_type(request.annotation_type) + if request.text: + annotation.text = request.text + if request.start_time > 0: + annotation.start_time = request.start_time + if request.end_time > 0: + annotation.end_time = request.end_time + if request.segment_ids: + annotation.segment_ids = list(request.segment_ids) + + updated = await uow.annotations.update(annotation) + await uow.commit() + return annotation_to_proto(updated) + + async def DeleteAnnotation( + self: ServicerHost, + request: noteflow_pb2.DeleteAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DeleteAnnotationResponse: + """Delete an annotation.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + + async with self._create_uow() as uow: + success = await uow.annotations.delete(AnnotationId(UUID(request.annotation_id))) + if success: + await uow.commit() + return noteflow_pb2.DeleteAnnotationResponse(success=True) + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) diff --git a/src/noteflow/grpc/_mixins/converters.py b/src/noteflow/grpc/_mixins/converters.py new file mode 100644 index 0000000..889b805 --- /dev/null +++ b/src/noteflow/grpc/_mixins/converters.py @@ -0,0 +1,227 @@ +"""Standalone proto ↔ domain conversion functions for gRPC service.""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING + +from noteflow.application.services.export_service import ExportFormat +from noteflow.domain.entities import Annotation, Meeting, Segment, Summary +from noteflow.domain.value_objects import AnnotationType, MeetingId +from noteflow.infrastructure.converters import AsrConverter + +from ..proto import noteflow_pb2 + +if TYPE_CHECKING: + from noteflow.infrastructure.asr.dto import AsrResult + + +def meeting_to_proto( + meeting: Meeting, + include_segments: bool = True, + include_summary: bool = True, +) -> noteflow_pb2.Meeting: + """Convert domain Meeting to protobuf.""" + segments = [] + if include_segments: + for seg in meeting.segments: + words = [ + noteflow_pb2.WordTiming( + word=w.word, + start_time=w.start_time, + end_time=w.end_time, + probability=w.probability, + ) + for w in seg.words + ] + segments.append( + noteflow_pb2.FinalSegment( + segment_id=seg.segment_id, + text=seg.text, + start_time=seg.start_time, + end_time=seg.end_time, + words=words, + language=seg.language, + language_confidence=seg.language_confidence, + avg_logprob=seg.avg_logprob, + no_speech_prob=seg.no_speech_prob, + speaker_id=seg.speaker_id or "", + speaker_confidence=seg.speaker_confidence, + ) + ) + + summary = None + if include_summary and meeting.summary: + summary = summary_to_proto(meeting.summary) + + return noteflow_pb2.Meeting( + id=str(meeting.id), + title=meeting.title, + state=meeting.state.value, + created_at=meeting.created_at.timestamp(), + started_at=meeting.started_at.timestamp() if meeting.started_at else 0, + ended_at=meeting.ended_at.timestamp() if meeting.ended_at else 0, + duration_seconds=meeting.duration_seconds, + segments=segments, + summary=summary, + metadata=meeting.metadata, + ) + + +def summary_to_proto(summary: Summary) -> noteflow_pb2.Summary: + """Convert domain Summary to protobuf.""" + key_points = [ + noteflow_pb2.KeyPoint( + text=kp.text, + segment_ids=kp.segment_ids, + start_time=kp.start_time, + end_time=kp.end_time, + ) + for kp in summary.key_points + ] + action_items = [ + noteflow_pb2.ActionItem( + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date.timestamp() if ai.due_date is not None else 0, + priority=ai.priority, + segment_ids=ai.segment_ids, + ) + for ai in summary.action_items + ] + return noteflow_pb2.Summary( + meeting_id=str(summary.meeting_id), + executive_summary=summary.executive_summary, + key_points=key_points, + action_items=action_items, + generated_at=(summary.generated_at.timestamp() if summary.generated_at is not None else 0), + model_version=summary.model_version, + ) + + +def segment_to_proto_update( + meeting_id: str, + segment: Segment, +) -> noteflow_pb2.TranscriptUpdate: + """Convert domain Segment to protobuf TranscriptUpdate.""" + words = [ + noteflow_pb2.WordTiming( + word=w.word, + start_time=w.start_time, + end_time=w.end_time, + probability=w.probability, + ) + for w in segment.words + ] + final_segment = noteflow_pb2.FinalSegment( + segment_id=segment.segment_id, + text=segment.text, + start_time=segment.start_time, + end_time=segment.end_time, + words=words, + language=segment.language, + language_confidence=segment.language_confidence, + avg_logprob=segment.avg_logprob, + no_speech_prob=segment.no_speech_prob, + speaker_id=segment.speaker_id or "", + speaker_confidence=segment.speaker_confidence, + ) + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=noteflow_pb2.UPDATE_TYPE_FINAL, + segment=final_segment, + server_timestamp=time.time(), + ) + + +def annotation_to_proto(annotation: Annotation) -> noteflow_pb2.Annotation: + """Convert domain Annotation to protobuf.""" + return noteflow_pb2.Annotation( + id=str(annotation.id), + meeting_id=str(annotation.meeting_id), + annotation_type=annotation_type_to_proto(annotation.annotation_type), + text=annotation.text, + start_time=annotation.start_time, + end_time=annotation.end_time, + segment_ids=annotation.segment_ids, + created_at=annotation.created_at.timestamp(), + ) + + +def annotation_type_to_proto(annotation_type: AnnotationType) -> int: + """Convert domain AnnotationType to protobuf enum.""" + mapping = { + AnnotationType.ACTION_ITEM: noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM, + AnnotationType.DECISION: noteflow_pb2.ANNOTATION_TYPE_DECISION, + AnnotationType.NOTE: noteflow_pb2.ANNOTATION_TYPE_NOTE, + AnnotationType.RISK: noteflow_pb2.ANNOTATION_TYPE_RISK, + } + return mapping.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED) + + +def proto_to_annotation_type(proto_type: int) -> AnnotationType: + """Convert protobuf enum to domain AnnotationType.""" + mapping: dict[int, AnnotationType] = { + int(noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM): AnnotationType.ACTION_ITEM, + int(noteflow_pb2.ANNOTATION_TYPE_DECISION): AnnotationType.DECISION, + int(noteflow_pb2.ANNOTATION_TYPE_NOTE): AnnotationType.NOTE, + int(noteflow_pb2.ANNOTATION_TYPE_RISK): AnnotationType.RISK, + } + return mapping.get(proto_type, AnnotationType.NOTE) + + +def create_vad_update( + meeting_id: str, + update_type: int, +) -> noteflow_pb2.TranscriptUpdate: + """Create a VAD event update. + + Args: + meeting_id: Meeting identifier. + update_type: VAD_START or VAD_END. + + Returns: + TranscriptUpdate with VAD event. + """ + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=update_type, + server_timestamp=time.time(), + ) + + +def create_segment_from_asr( + meeting_id: MeetingId, + segment_id: int, + result: AsrResult, + segment_start_time: float, +) -> Segment: + """Create a Segment from ASR result. + + Use converters to transform ASR DTO to domain entities. + """ + words = AsrConverter.result_to_domain_words(result) + if segment_start_time: + for word in words: + word.start_time += segment_start_time + word.end_time += segment_start_time + + return Segment( + segment_id=segment_id, + text=result.text, + start_time=result.start + segment_start_time, + end_time=result.end + segment_start_time, + meeting_id=meeting_id, + words=words, + language=result.language, + language_confidence=result.language_probability, + avg_logprob=result.avg_logprob, + no_speech_prob=result.no_speech_prob, + ) + + +def proto_to_export_format(proto_format: int) -> ExportFormat: + """Convert protobuf ExportFormat to domain ExportFormat.""" + if proto_format == noteflow_pb2.EXPORT_FORMAT_HTML: + return ExportFormat.HTML + return ExportFormat.MARKDOWN # Default to Markdown diff --git a/src/noteflow/grpc/_mixins/diarization.py b/src/noteflow/grpc/_mixins/diarization.py new file mode 100644 index 0000000..32bca5c --- /dev/null +++ b/src/noteflow/grpc/_mixins/diarization.py @@ -0,0 +1,486 @@ +"""Speaker diarization mixin for gRPC service.""" + +from __future__ import annotations + +import asyncio +import logging +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING +from uuid import UUID, uuid4 + +import grpc.aio +import numpy as np +from numpy.typing import NDArray + +from noteflow.domain.entities import Segment +from noteflow.domain.value_objects import MeetingId, MeetingState +from noteflow.infrastructure.audio.reader import MeetingAudioReader +from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker + +from ..proto import noteflow_pb2 + +if TYPE_CHECKING: + from .protocols import ServicerHost + +logger = logging.getLogger(__name__) + + +@dataclass +class _DiarizationJob: + """Track background diarization job state.""" + + job_id: str + meeting_id: str + status: int + segments_updated: int = 0 + speaker_ids: list[str] = field(default_factory=list) + error_message: str = "" + created_at: float = field(default_factory=time.time) + updated_at: float = field(default_factory=time.time) + task: asyncio.Task[None] | None = None + + def to_proto(self) -> noteflow_pb2.DiarizationJobStatus: + """Convert to protobuf message.""" + return noteflow_pb2.DiarizationJobStatus( + job_id=self.job_id, + status=self.status, + segments_updated=self.segments_updated, + speaker_ids=self.speaker_ids, + error_message=self.error_message, + ) + + +class DiarizationMixin: + """Mixin providing speaker diarization functionality. + + Requires host to implement ServicerHost protocol. + """ + + # Job retention constant + DIARIZATION_JOB_TTL_SECONDS: float = 60 * 60 # 1 hour + + def _process_streaming_diarization( + self: ServicerHost, + meeting_id: str, + audio: NDArray[np.float32], + ) -> None: + """Process an audio chunk for streaming diarization (best-effort).""" + if self._diarization_engine is None: + return + if meeting_id in self._diarization_streaming_failed: + return + if audio.size == 0: + return + + if not self._diarization_engine.is_streaming_loaded: + try: + self._diarization_engine.load_streaming_model() + except (RuntimeError, ValueError) as exc: + logger.warning( + "Streaming diarization disabled for meeting %s: %s", + meeting_id, + exc, + ) + self._diarization_streaming_failed.add(meeting_id) + return + + stream_time = self._diarization_stream_time.get(meeting_id, 0.0) + duration = len(audio) / self.DEFAULT_SAMPLE_RATE + + try: + turns = self._diarization_engine.process_chunk( + audio, + sample_rate=self.DEFAULT_SAMPLE_RATE, + ) + except Exception as exc: + logger.warning( + "Streaming diarization failed for meeting %s: %s", + meeting_id, + exc, + ) + self._diarization_streaming_failed.add(meeting_id) + return + + diarization_turns = self._diarization_turns.setdefault(meeting_id, []) + for turn in turns: + diarization_turns.append( + SpeakerTurn( + speaker=turn.speaker, + start=turn.start + stream_time, + end=turn.end + stream_time, + confidence=turn.confidence, + ) + ) + + self._diarization_stream_time[meeting_id] = stream_time + duration + + def _maybe_assign_speaker( + self: ServicerHost, + meeting_id: str, + segment: Segment, + ) -> None: + """Assign speaker to a segment using streaming diarization turns (best-effort).""" + if self._diarization_engine is None: + return + if meeting_id in self._diarization_streaming_failed: + return + turns = self._diarization_turns.get(meeting_id) + if not turns: + return + + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + return + + segment.speaker_id = speaker_id + segment.speaker_confidence = confidence + + def _prune_diarization_jobs(self: ServicerHost) -> None: + """Remove completed diarization jobs older than retention window.""" + if not self._diarization_jobs: + return + now = time.time() + terminal_statuses = { + noteflow_pb2.JOB_STATUS_COMPLETED, + noteflow_pb2.JOB_STATUS_FAILED, + } + expired = [ + job_id + for job_id, job in self._diarization_jobs.items() + if job.status in terminal_statuses + and now - job.updated_at > self.DIARIZATION_JOB_TTL_SECONDS + ] + for job_id in expired: + self._diarization_jobs.pop(job_id, None) + + async def RefineSpeakerDiarization( + self: ServicerHost, + request: noteflow_pb2.RefineSpeakerDiarizationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.RefineSpeakerDiarizationResponse: + """Run post-meeting speaker diarization refinement. + + Load the full meeting audio, run offline diarization, and update + segment speaker assignments. + """ + self._prune_diarization_jobs() + + if not self._diarization_refinement_enabled: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Diarization refinement disabled on server" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + + if self._diarization_engine is None: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Diarization not enabled on server" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + + try: + meeting_uuid = UUID(request.meeting_id) + except ValueError: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Invalid meeting_id" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(meeting_uuid)) + else: + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Meeting not found" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + meeting_state = meeting.state + if meeting_state in ( + MeetingState.UNSPECIFIED, + MeetingState.CREATED, + MeetingState.RECORDING, + MeetingState.STOPPING, + ): + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = ( + f"Meeting must be stopped before refinement (state: {meeting_state.name.lower()})" + ) + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + + num_speakers = request.num_speakers if request.num_speakers > 0 else None + + job_id = str(uuid4()) + job = _DiarizationJob( + job_id=job_id, + meeting_id=request.meeting_id, + status=noteflow_pb2.JOB_STATUS_QUEUED, + ) + self._diarization_jobs[job_id] = job + + # Task runs in background, no need to await + task = asyncio.create_task(self._run_diarization_job(job_id, num_speakers)) + job.task = task + + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "" + response.job_id = job_id + response.status = noteflow_pb2.JOB_STATUS_QUEUED + return response + + async def _run_diarization_job( + self: ServicerHost, + job_id: str, + num_speakers: int | None, + ) -> None: + """Run background diarization job.""" + job = self._diarization_jobs.get(job_id) + if job is None: + return + + job.status = noteflow_pb2.JOB_STATUS_RUNNING + job.updated_at = time.time() + + try: + updated_count = await self.refine_speaker_diarization( + meeting_id=job.meeting_id, + num_speakers=num_speakers, + ) + speaker_ids = await self._collect_speaker_ids(job.meeting_id) + job.segments_updated = updated_count + job.speaker_ids = speaker_ids + job.status = noteflow_pb2.JOB_STATUS_COMPLETED + except Exception as exc: + logger.exception("Diarization failed for meeting %s", job.meeting_id) + job.error_message = str(exc) + job.status = noteflow_pb2.JOB_STATUS_FAILED + finally: + job.updated_at = time.time() + + async def refine_speaker_diarization( + self: ServicerHost, + meeting_id: str, + num_speakers: int | None = None, + ) -> int: + """Run post-meeting speaker diarization refinement. + + Load the full meeting audio, run offline diarization, and update + segment speaker assignments. This provides higher quality speaker + labels than streaming diarization. + + Args: + meeting_id: Meeting UUID string. + num_speakers: Known number of speakers (None for auto-detect). + + Returns: + Number of segments updated with speaker labels. + + Raises: + RuntimeError: If diarization engine not available or meeting not found. + """ + turns = await asyncio.to_thread( + self._run_diarization_inference, + meeting_id, + num_speakers, + ) + + updated_count = await self._apply_diarization_turns(meeting_id, turns) + + logger.info( + "Updated %d segments with speaker labels for meeting %s", + updated_count, + meeting_id, + ) + + return updated_count + + def _run_diarization_inference( + self: ServicerHost, + meeting_id: str, + num_speakers: int | None, + ) -> list[SpeakerTurn]: + """Run offline diarization and return speaker turns (blocking).""" + if self._diarization_engine is None: + raise RuntimeError("Diarization engine not configured") + + if not self._diarization_engine.is_offline_loaded: + logger.info("Loading offline diarization model for refinement...") + self._diarization_engine.load_offline_model() + + audio_reader = MeetingAudioReader(self._crypto, self._meetings_dir) + if not audio_reader.audio_exists(meeting_id): + raise RuntimeError("No audio file found for meeting") + + logger.info("Loading audio for meeting %s", meeting_id) + try: + audio_chunks = audio_reader.load_meeting_audio(meeting_id) + except (FileNotFoundError, ValueError) as exc: + raise RuntimeError(f"Failed to load audio: {exc}") from exc + + if not audio_chunks: + raise RuntimeError("No audio chunks loaded for meeting") + + sample_rate = audio_reader.sample_rate + all_audio = np.concatenate([chunk.frames for chunk in audio_chunks]) + + logger.info( + "Running offline diarization on %.2f seconds of audio", + len(all_audio) / sample_rate, + ) + + turns = self._diarization_engine.diarize_full( + all_audio, + sample_rate=sample_rate, + num_speakers=num_speakers, + ) + + logger.info("Diarization found %d speaker turns", len(turns)) + return list(turns) + + async def _apply_diarization_turns( + self: ServicerHost, + meeting_id: str, + turns: list[SpeakerTurn], + ) -> int: + """Apply diarization turns to segments and return updated count.""" + updated_count = 0 + + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) + for segment in segments: + if segment.db_id is None: + continue + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + continue + await uow.segments.update_speaker( + segment.db_id, + speaker_id, + confidence, + ) + updated_count += 1 + await uow.commit() + else: + store = self._get_memory_store() + if meeting := store.get(meeting_id): + for segment in meeting.segments: + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + continue + segment.speaker_id = speaker_id + segment.speaker_confidence = confidence + updated_count += 1 + + return updated_count + + async def _collect_speaker_ids(self: ServicerHost, meeting_id: str) -> list[str]: + """Collect distinct speaker IDs for a meeting.""" + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) + return sorted({s.speaker_id for s in segments if s.speaker_id}) + store = self._get_memory_store() + if meeting := store.get(meeting_id): + return sorted({s.speaker_id for s in meeting.segments if s.speaker_id}) + return [] + + async def RenameSpeaker( + self: ServicerHost, + request: noteflow_pb2.RenameSpeakerRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.RenameSpeakerResponse: + """Rename a speaker ID in all segments of a meeting. + + Update all segments where speaker_id matches old_speaker_id + to use new_speaker_name instead. + """ + if not request.old_speaker_id or not request.new_speaker_name: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "old_speaker_id and new_speaker_name are required", + ) + + try: + meeting_uuid = UUID(request.meeting_id) + except ValueError: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "Invalid meeting_id", + ) + + updated_count = 0 + + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(meeting_uuid)) + + for segment in segments: + if segment.speaker_id == request.old_speaker_id and segment.db_id: + await uow.segments.update_speaker( + segment.db_id, + request.new_speaker_name, + segment.speaker_confidence, + ) + updated_count += 1 + + await uow.commit() + else: + store = self._get_memory_store() + if meeting := store.get(request.meeting_id): + for segment in meeting.segments: + if segment.speaker_id == request.old_speaker_id: + segment.speaker_id = request.new_speaker_name + updated_count += 1 + + return noteflow_pb2.RenameSpeakerResponse( + segments_updated=updated_count, + success=updated_count > 0, + ) + + async def GetDiarizationJobStatus( + self: ServicerHost, + request: noteflow_pb2.GetDiarizationJobStatusRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DiarizationJobStatus: + """Return current status for a diarization job.""" + self._prune_diarization_jobs() + job = self._diarization_jobs.get(request.job_id) + if job is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + "Diarization job not found", + ) + return job.to_proto() diff --git a/src/noteflow/grpc/_mixins/export.py b/src/noteflow/grpc/_mixins/export.py new file mode 100644 index 0000000..4902aae --- /dev/null +++ b/src/noteflow/grpc/_mixins/export.py @@ -0,0 +1,68 @@ +"""Export mixin for gRPC service.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING +from uuid import UUID + +import grpc.aio + +from noteflow.application.services.export_service import ExportFormat, ExportService +from noteflow.domain.value_objects import MeetingId + +from ..proto import noteflow_pb2 +from .converters import proto_to_export_format + +if TYPE_CHECKING: + from .protocols import ServicerHost + + +class ExportMixin: + """Mixin providing export functionality. + + Requires host to implement ServicerHost protocol. + Export requires database persistence. + """ + + async def ExportTranscript( + self: ServicerHost, + request: noteflow_pb2.ExportTranscriptRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ExportTranscriptResponse: + """Export meeting transcript to specified format.""" + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Export requires database persistence", + ) + + # Map proto format to ExportFormat + fmt = proto_to_export_format(request.format) + + export_service = ExportService(self._create_uow()) + try: + content = await export_service.export_transcript( + MeetingId(UUID(request.meeting_id)), + fmt, + ) + exporter_info = export_service.get_supported_formats() + fmt_name = "" + fmt_ext = "" + for name, ext in exporter_info: + if fmt == ExportFormat.MARKDOWN and ext == ".md": + fmt_name, fmt_ext = name, ext + break + if fmt == ExportFormat.HTML and ext == ".html": + fmt_name, fmt_ext = name, ext + break + + return noteflow_pb2.ExportTranscriptResponse( + content=content, + format_name=fmt_name, + file_extension=fmt_ext, + ) + except ValueError as e: + await context.abort( + grpc.StatusCode.NOT_FOUND, + str(e), + ) diff --git a/src/noteflow/grpc/_mixins/meeting.py b/src/noteflow/grpc/_mixins/meeting.py new file mode 100644 index 0000000..e2403b7 --- /dev/null +++ b/src/noteflow/grpc/_mixins/meeting.py @@ -0,0 +1,190 @@ +"""Meeting management mixin for gRPC service.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING +from uuid import UUID + +import grpc.aio + +from noteflow.domain.entities import Meeting +from noteflow.domain.value_objects import MeetingId, MeetingState + +from ..proto import noteflow_pb2 +from .converters import meeting_to_proto + +if TYPE_CHECKING: + from .protocols import ServicerHost + + +class MeetingMixin: + """Mixin providing meeting CRUD functionality. + + Requires host to implement ServicerHost protocol. + """ + + async def CreateMeeting( + self: ServicerHost, + request: noteflow_pb2.CreateMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + """Create a new meeting.""" + metadata = dict(request.metadata) if request.metadata else {} + + if self._use_database(): + async with self._create_uow() as uow: + meeting = Meeting.create(title=request.title, metadata=metadata) + saved = await uow.meetings.create(meeting) + await uow.commit() + return meeting_to_proto(saved) + else: + store = self._get_memory_store() + meeting = store.create(title=request.title, metadata=metadata) + return meeting_to_proto(meeting) + + async def StopMeeting( + self: ServicerHost, + request: noteflow_pb2.StopMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + """Stop a meeting using graceful STOPPING -> STOPPED transition.""" + meeting_id = request.meeting_id + + # Close audio writer if open + if meeting_id in self._audio_writers: + self._close_audio_writer(meeting_id) + + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {meeting_id} not found", + ) + try: + # Graceful shutdown: RECORDING -> STOPPING -> STOPPED + meeting.begin_stopping() + meeting.stop_recording() + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + await uow.meetings.update(meeting) + await uow.commit() + return meeting_to_proto(meeting) + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {meeting_id} not found", + ) + try: + # Graceful shutdown: RECORDING -> STOPPING -> STOPPED + meeting.begin_stopping() + meeting.stop_recording() + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + store.update(meeting) + return meeting_to_proto(meeting) + + async def ListMeetings( + self: ServicerHost, + request: noteflow_pb2.ListMeetingsRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ListMeetingsResponse: + """List meetings.""" + limit = request.limit or 100 + offset = request.offset or 0 + sort_desc = request.sort_order != noteflow_pb2.SORT_ORDER_CREATED_ASC + + if self._use_database(): + states = [MeetingState(s) for s in request.states] if request.states else None + async with self._create_uow() as uow: + meetings, total = await uow.meetings.list_all( + states=states, + limit=limit, + offset=offset, + sort_desc=sort_desc, + ) + return noteflow_pb2.ListMeetingsResponse( + meetings=[meeting_to_proto(m, include_segments=False) for m in meetings], + total_count=total, + ) + else: + store = self._get_memory_store() + states = [MeetingState(s) for s in request.states] if request.states else None + meetings, total = store.list_all( + states=states, + limit=limit, + offset=offset, + sort_desc=sort_desc, + ) + return noteflow_pb2.ListMeetingsResponse( + meetings=[meeting_to_proto(m, include_segments=False) for m in meetings], + total_count=total, + ) + + async def GetMeeting( + self: ServicerHost, + request: noteflow_pb2.GetMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + """Get meeting details.""" + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(request.meeting_id))) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + # Load segments if requested + if request.include_segments: + segments = await uow.segments.get_by_meeting(meeting.id) + meeting.segments = list(segments) + # Load summary if requested + if request.include_summary: + summary = await uow.summaries.get_by_meeting(meeting.id) + meeting.summary = summary + return meeting_to_proto( + meeting, + include_segments=request.include_segments, + include_summary=request.include_summary, + ) + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + return meeting_to_proto( + meeting, + include_segments=request.include_segments, + include_summary=request.include_summary, + ) + + async def DeleteMeeting( + self: ServicerHost, + request: noteflow_pb2.DeleteMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DeleteMeetingResponse: + """Delete a meeting.""" + if self._use_database(): + async with self._create_uow() as uow: + success = await uow.meetings.delete(MeetingId(UUID(request.meeting_id))) + if success: + await uow.commit() + return noteflow_pb2.DeleteMeetingResponse(success=True) + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + store = self._get_memory_store() + success = store.delete(request.meeting_id) + if not success: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + return noteflow_pb2.DeleteMeetingResponse(success=True) diff --git a/src/noteflow/grpc/_mixins/protocols.py b/src/noteflow/grpc/_mixins/protocols.py new file mode 100644 index 0000000..b1e4f2f --- /dev/null +++ b/src/noteflow/grpc/_mixins/protocols.py @@ -0,0 +1,114 @@ +"""Protocol contracts for gRPC service mixins.""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, Protocol + +import numpy as np +from numpy.typing import NDArray + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + from noteflow.domain.entities import Meeting + from noteflow.infrastructure.asr import FasterWhisperEngine, Segmenter, StreamingVad + from noteflow.infrastructure.audio.writer import MeetingAudioWriter + from noteflow.infrastructure.diarization import DiarizationEngine, SpeakerTurn + from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork + from noteflow.infrastructure.security.crypto import AesGcmCryptoBox + + from ..meeting_store import MeetingStore + + +class ServicerHost(Protocol): + """Protocol defining shared state and methods for service mixins. + + All mixins should type-hint `self` as `ServicerHost` to access these + attributes and methods from the host NoteFlowServicer class. + """ + + # Configuration + _session_factory: async_sessionmaker[AsyncSession] | None + _memory_store: MeetingStore | None + _meetings_dir: Path + _crypto: AesGcmCryptoBox + + # Engines + _asr_engine: FasterWhisperEngine | None + _diarization_engine: DiarizationEngine | None + _summarization_service: object | None + _diarization_refinement_enabled: bool + + # Audio writers + _audio_writers: dict[str, MeetingAudioWriter] + _audio_write_failed: set[str] + + # VAD and segmentation state per meeting + _vad_instances: dict[str, StreamingVad] + _segmenters: dict[str, Segmenter] + _was_speaking: dict[str, bool] + _segment_counters: dict[str, int] + _stream_formats: dict[str, tuple[int, int]] + _active_streams: set[str] + + # Partial transcription state per meeting + _partial_buffers: dict[str, list[NDArray[np.float32]]] + _last_partial_time: dict[str, float] + _last_partial_text: dict[str, str] + + # Streaming diarization state per meeting + _diarization_turns: dict[str, list[SpeakerTurn]] + _diarization_stream_time: dict[str, float] + _diarization_streaming_failed: set[str] + + # Constants + DEFAULT_SAMPLE_RATE: int + SUPPORTED_SAMPLE_RATES: list[int] + PARTIAL_CADENCE_SECONDS: float + MIN_PARTIAL_AUDIO_SECONDS: float + + def _use_database(self) -> bool: + """Check if database persistence is configured.""" + ... + + def _get_memory_store(self) -> MeetingStore: + """Get the in-memory store, raising if not configured.""" + ... + + def _create_uow(self) -> SqlAlchemyUnitOfWork: + """Create a new Unit of Work.""" + ... + + def _next_segment_id(self, meeting_id: str, fallback: int = 0) -> int: + """Get and increment the next segment id for a meeting.""" + ... + + def _init_streaming_state(self, meeting_id: str, next_segment_id: int) -> None: + """Initialize VAD, Segmenter, speaking state, and partial buffers.""" + ... + + def _cleanup_streaming_state(self, meeting_id: str) -> None: + """Clean up streaming state for a meeting.""" + ... + + def _ensure_meeting_dek(self, meeting: Meeting) -> tuple[bytes, bytes, bool]: + """Ensure meeting has a DEK, generating one if needed.""" + ... + + def _start_meeting_if_needed(self, meeting: Meeting) -> tuple[bool, str | None]: + """Start recording on meeting if not already recording.""" + ... + + def _open_meeting_audio_writer( + self, + meeting_id: str, + dek: bytes, + wrapped_dek: bytes, + ) -> None: + """Open audio writer for a meeting.""" + ... + + def _close_audio_writer(self, meeting_id: str) -> None: + """Close and remove the audio writer for a meeting.""" + ... diff --git a/src/noteflow/grpc/_mixins/streaming.py b/src/noteflow/grpc/_mixins/streaming.py new file mode 100644 index 0000000..7238007 --- /dev/null +++ b/src/noteflow/grpc/_mixins/streaming.py @@ -0,0 +1,576 @@ +"""Streaming audio transcription mixin for gRPC service.""" + +from __future__ import annotations + +import logging +import struct +import time +from collections.abc import AsyncIterator +from dataclasses import dataclass +from typing import TYPE_CHECKING +from uuid import UUID + +import grpc.aio +import numpy as np +from numpy.typing import NDArray + +from noteflow.domain.value_objects import MeetingId + +from ..proto import noteflow_pb2 +from .converters import create_segment_from_asr, create_vad_update, segment_to_proto_update + +if TYPE_CHECKING: + from .protocols import ServicerHost + +logger = logging.getLogger(__name__) + + +@dataclass +class _StreamSessionInit: + """Result of stream session initialization.""" + + next_segment_id: int + error_code: int | None = None + error_message: str | None = None + + @property + def success(self) -> bool: + """Check if initialization succeeded.""" + return self.error_code is None + + +class StreamingMixin: + """Mixin providing streaming transcription functionality. + + Requires host to implement ServicerHost protocol. + """ + + async def StreamTranscription( + self: ServicerHost, + request_iterator: AsyncIterator[noteflow_pb2.AudioChunk], + context: grpc.aio.ServicerContext, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Handle bidirectional audio streaming with persistence. + + Receive audio chunks from client, process through ASR, + persist segments, and yield transcript updates. + """ + if self._asr_engine is None or not self._asr_engine.is_loaded: + await context.abort( + grpc.StatusCode.FAILED_PRECONDITION, + "ASR engine not loaded", + ) + + current_meeting_id: str | None = None + + try: + async for chunk in request_iterator: + meeting_id = chunk.meeting_id + if not meeting_id: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "meeting_id required", + ) + + # Initialize stream on first chunk + if current_meeting_id is None: + init_result = await self._init_stream_for_meeting(meeting_id, context) + if init_result is None: + return # Error already sent via context.abort + current_meeting_id = meeting_id + elif meeting_id != current_meeting_id: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "Stream may only contain a single meeting_id", + ) + + # Process audio chunk + async for update in self._process_stream_chunk(current_meeting_id, chunk, context): + yield update + + # Flush any remaining audio from segmenter + if current_meeting_id and current_meeting_id in self._segmenters: + async for update in self._flush_segmenter(current_meeting_id): + yield update + finally: + if current_meeting_id: + self._cleanup_streaming_state(current_meeting_id) + self._close_audio_writer(current_meeting_id) + self._active_streams.discard(current_meeting_id) + + async def _init_stream_for_meeting( + self: ServicerHost, + meeting_id: str, + context: grpc.aio.ServicerContext, + ) -> _StreamSessionInit | None: + """Initialize streaming for a meeting. + + Args: + meeting_id: Meeting ID string. + context: gRPC context for error handling. + + Returns: + Initialization result, or None if error was sent. + """ + if meeting_id in self._active_streams: + await context.abort( + grpc.StatusCode.FAILED_PRECONDITION, + f"Meeting {meeting_id} already streaming", + ) + + self._active_streams.add(meeting_id) + + if self._use_database(): + init_result = await self._init_stream_session_db(meeting_id) + else: + init_result = self._init_stream_session_memory(meeting_id) + + if not init_result.success: + self._active_streams.discard(meeting_id) + await context.abort(init_result.error_code, init_result.error_message or "") + + return init_result + + async def _init_stream_session_db( + self: ServicerHost, + meeting_id: str, + ) -> _StreamSessionInit: + """Initialize stream session using database persistence. + + Args: + meeting_id: Meeting ID string. + + Returns: + Stream session initialization result. + """ + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.NOT_FOUND, + error_message=f"Meeting {meeting_id} not found", + ) + + dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) + recording_updated, error_msg = self._start_meeting_if_needed(meeting) + + if error_msg: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.INVALID_ARGUMENT, + error_message=error_msg, + ) + + if dek_updated or recording_updated: + await uow.meetings.update(meeting) + await uow.commit() + + next_segment_id = await uow.segments.get_next_segment_id(meeting.id) + self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) + self._init_streaming_state(meeting_id, next_segment_id) + + return _StreamSessionInit(next_segment_id=next_segment_id) + + def _init_stream_session_memory( + self: ServicerHost, + meeting_id: str, + ) -> _StreamSessionInit: + """Initialize stream session using in-memory store. + + Args: + meeting_id: Meeting ID string. + + Returns: + Stream session initialization result. + """ + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.NOT_FOUND, + error_message=f"Meeting {meeting_id} not found", + ) + + dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) + recording_updated, error_msg = self._start_meeting_if_needed(meeting) + + if error_msg: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.INVALID_ARGUMENT, + error_message=error_msg, + ) + + if dek_updated or recording_updated: + store.update(meeting) + + next_segment_id = meeting.next_segment_id + self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) + self._init_streaming_state(meeting_id, next_segment_id) + + return _StreamSessionInit(next_segment_id=next_segment_id) + + async def _process_stream_chunk( + self: ServicerHost, + meeting_id: str, + chunk: noteflow_pb2.AudioChunk, + context: grpc.aio.ServicerContext, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Process a single audio chunk from the stream. + + Args: + meeting_id: Meeting ID string. + chunk: Audio chunk from client. + context: gRPC context for error handling. + + Yields: + Transcript updates from processing. + """ + try: + sample_rate, channels = self._normalize_stream_format( + meeting_id, + chunk.sample_rate, + chunk.channels, + ) + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + + audio = self._decode_audio_chunk(chunk) + if audio is None: + return + + try: + audio = self._convert_audio_format(audio, sample_rate, channels) + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + + # Write to encrypted audio file + self._write_audio_chunk_safe(meeting_id, audio) + + # VAD-driven segmentation + async for update in self._process_audio_with_vad(meeting_id, audio): + yield update + + def _normalize_stream_format( + self: ServicerHost, + meeting_id: str, + sample_rate: int, + channels: int, + ) -> tuple[int, int]: + """Validate and persist stream audio format for a meeting.""" + normalized_rate = sample_rate or self.DEFAULT_SAMPLE_RATE + normalized_channels = channels or 1 + + if normalized_rate not in self.SUPPORTED_SAMPLE_RATES: + raise ValueError( + "Unsupported sample_rate " + f"{normalized_rate}; supported: {self.SUPPORTED_SAMPLE_RATES}" + ) + if normalized_channels < 1: + raise ValueError("channels must be >= 1") + + existing = self._stream_formats.get(meeting_id) + if existing and existing != (normalized_rate, normalized_channels): + raise ValueError("Stream audio format cannot change mid-stream") + + self._stream_formats.setdefault(meeting_id, (normalized_rate, normalized_channels)) + return normalized_rate, normalized_channels + + def _convert_audio_format( + self: ServicerHost, + audio: NDArray[np.float32], + sample_rate: int, + channels: int, + ) -> NDArray[np.float32]: + """Downmix/resample audio to the server's expected format.""" + if channels > 1: + if audio.size % channels != 0: + raise ValueError("Audio buffer size is not divisible by channel count") + audio = audio.reshape(-1, channels).mean(axis=1) + + if sample_rate != self.DEFAULT_SAMPLE_RATE: + audio = self._resample_audio(audio, sample_rate, self.DEFAULT_SAMPLE_RATE) + + return audio + + @staticmethod + def _resample_audio( + audio: NDArray[np.float32], + src_rate: int, + dst_rate: int, + ) -> NDArray[np.float32]: + """Resample audio using linear interpolation.""" + if src_rate == dst_rate or audio.size == 0: + return audio + + ratio = dst_rate / src_rate + new_length = round(audio.shape[0] * ratio) + if new_length <= 0: + return np.array([], dtype=np.float32) + + old_indices = np.arange(audio.shape[0]) + new_indices = np.arange(new_length) / ratio + return np.interp(new_indices, old_indices, audio).astype(np.float32) + + def _decode_audio_chunk( + self: ServicerHost, + chunk: noteflow_pb2.AudioChunk, + ) -> NDArray[np.float32] | None: + """Decode audio chunk from protobuf to numpy array.""" + if not chunk.audio_data: + return None + try: + return np.frombuffer(chunk.audio_data, dtype=np.float32) + except (ValueError, struct.error) as e: + logger.warning("Failed to decode audio chunk: %s", e) + return None + + def _write_audio_chunk_safe( + self: ServicerHost, + meeting_id: str, + audio: NDArray[np.float32], + ) -> None: + """Write audio chunk to encrypted file, logging errors without raising. + + Args: + meeting_id: Meeting ID string. + audio: Audio samples to write. + """ + if meeting_id not in self._audio_writers: + return + if meeting_id in self._audio_write_failed: + return # Already failed, skip to avoid log spam + try: + self._audio_writers[meeting_id].write_chunk(audio) + except Exception as e: + logger.error( + "Audio write failed for meeting %s: %s. Recording may be incomplete.", + meeting_id, + e, + ) + self._audio_write_failed.add(meeting_id) + + async def _process_audio_with_vad( + self: ServicerHost, + meeting_id: str, + audio: NDArray[np.float32], + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Process audio chunk through VAD and Segmenter. + + Args: + meeting_id: Meeting identifier. + audio: Audio samples (float32, mono). + + Yields: + TranscriptUpdates for VAD events, partials, and finals. + """ + vad = self._vad_instances.get(meeting_id) + segmenter = self._segmenters.get(meeting_id) + + if vad is None or segmenter is None: + return + + # Get VAD decision + is_speech = vad.process_chunk(audio) + + # Streaming diarization (optional) - call mixin method if available + if hasattr(self, "_process_streaming_diarization"): + self._process_streaming_diarization(meeting_id, audio) + + # Emit VAD state change events + was_speaking = self._was_speaking.get(meeting_id, False) + if is_speech and not was_speaking: + # Speech started + yield create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_START) + self._was_speaking[meeting_id] = True + elif not is_speech and was_speaking: + # Speech ended + yield create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_END) + self._was_speaking[meeting_id] = False + + # Buffer audio for partial transcription + if is_speech: + if meeting_id in self._partial_buffers: + self._partial_buffers[meeting_id].append(audio.copy()) + + # Check if we should emit a partial + partial_update = await self._maybe_emit_partial(meeting_id) + if partial_update is not None: + yield partial_update + + # Process through segmenter + for audio_segment in segmenter.process_audio(audio, is_speech): + # Clear partial buffer when we get a final segment + self._clear_partial_buffer(meeting_id) + async for update in self._process_audio_segment( + meeting_id, + audio_segment.audio, + audio_segment.start_time, + ): + yield update + + async def _maybe_emit_partial( + self: ServicerHost, + meeting_id: str, + ) -> noteflow_pb2.TranscriptUpdate | None: + """Check if it's time to emit a partial and generate if so. + + Args: + meeting_id: Meeting identifier. + + Returns: + TranscriptUpdate with partial text, or None if not time yet. + """ + if self._asr_engine is None or not self._asr_engine.is_loaded: + return None + + last_time = self._last_partial_time.get(meeting_id, 0) + now = time.time() + + # Check if enough time has passed since last partial + if now - last_time < self.PARTIAL_CADENCE_SECONDS: + return None + + # Check if we have enough audio + buffer = self._partial_buffers.get(meeting_id, []) + if not buffer: + return None + + # Concatenate buffered audio + combined = np.concatenate(buffer) + audio_seconds = len(combined) / self.DEFAULT_SAMPLE_RATE + + if audio_seconds < self.MIN_PARTIAL_AUDIO_SECONDS: + return None + + # Run inference on buffered audio (async to avoid blocking event loop) + results = await self._asr_engine.transcribe_async(combined) + partial_text = " ".join(result.text for result in results) + + # Clear buffer after inference to keep partials incremental and bounded + self._partial_buffers[meeting_id] = [] + + # Only emit if text changed (debounce) + last_text = self._last_partial_text.get(meeting_id, "") + if partial_text and partial_text != last_text: + self._last_partial_time[meeting_id] = now + self._last_partial_text[meeting_id] = partial_text + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=noteflow_pb2.UPDATE_TYPE_PARTIAL, + partial_text=partial_text, + server_timestamp=now, + ) + + self._last_partial_time[meeting_id] = now + return None + + def _clear_partial_buffer(self: ServicerHost, meeting_id: str) -> None: + """Clear the partial buffer and reset state after a final is emitted. + + Args: + meeting_id: Meeting identifier. + """ + if meeting_id in self._partial_buffers: + self._partial_buffers[meeting_id] = [] + if meeting_id in self._last_partial_text: + self._last_partial_text[meeting_id] = "" + if meeting_id in self._last_partial_time: + self._last_partial_time[meeting_id] = time.time() + + async def _flush_segmenter( + self: ServicerHost, + meeting_id: str, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Flush remaining audio from segmenter at stream end. + + Args: + meeting_id: Meeting identifier. + + Yields: + TranscriptUpdates for final segment. + """ + segmenter = self._segmenters.get(meeting_id) + if segmenter is None: + return + + # Clear partial buffer since we're flushing to final + self._clear_partial_buffer(meeting_id) + + final_segment = segmenter.flush() + if final_segment is not None: + async for update in self._process_audio_segment( + meeting_id, + final_segment.audio, + final_segment.start_time, + ): + yield update + + async def _process_audio_segment( + self: ServicerHost, + meeting_id: str, + audio: NDArray[np.float32], + segment_start_time: float, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + """Process a complete audio segment through ASR. + + Args: + meeting_id: Meeting identifier. + audio: Complete audio segment. + segment_start_time: Segment start time in stream seconds. + + Yields: + TranscriptUpdates for transcribed segments. + """ + if len(audio) == 0 or self._asr_engine is None: + return + + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + return + + results = await self._asr_engine.transcribe_async(audio) + for result in results: + segment_id = self._next_segment_id( + meeting_id, + fallback=meeting.next_segment_id, + ) + segment = create_segment_from_asr( + meeting.id, + segment_id, + result, + segment_start_time, + ) + # Call diarization mixin method if available + if hasattr(self, "_maybe_assign_speaker"): + self._maybe_assign_speaker(meeting_id, segment) + meeting.add_segment(segment) + await uow.segments.add(meeting.id, segment) + await uow.commit() + yield segment_to_proto_update(meeting_id, segment) + else: + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + return + results = await self._asr_engine.transcribe_async(audio) + for result in results: + segment_id = self._next_segment_id( + meeting_id, + fallback=meeting.next_segment_id, + ) + segment = create_segment_from_asr( + meeting.id, + segment_id, + result, + segment_start_time, + ) + # Call diarization mixin method if available + if hasattr(self, "_maybe_assign_speaker"): + self._maybe_assign_speaker(meeting_id, segment) + store.add_segment(meeting_id, segment) + yield segment_to_proto_update(meeting_id, segment) diff --git a/src/noteflow/grpc/_mixins/summarization.py b/src/noteflow/grpc/_mixins/summarization.py new file mode 100644 index 0000000..71c20fb --- /dev/null +++ b/src/noteflow/grpc/_mixins/summarization.py @@ -0,0 +1,149 @@ +"""Summarization mixin for gRPC service.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING +from uuid import UUID + +import grpc.aio + +from noteflow.domain.entities import Segment, Summary +from noteflow.domain.summarization import ProviderUnavailableError +from noteflow.domain.value_objects import MeetingId + +from ..proto import noteflow_pb2 +from .converters import summary_to_proto + +if TYPE_CHECKING: + from noteflow.application.services.summarization_service import SummarizationService + + from .protocols import ServicerHost + +logger = logging.getLogger(__name__) + + +class SummarizationMixin: + """Mixin providing summarization functionality. + + Requires host to implement ServicerHost protocol. + """ + + _summarization_service: SummarizationService | None + + async def GenerateSummary( + self: ServicerHost, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + """Generate meeting summary using SummarizationService with fallback.""" + if self._use_database(): + return await self._generate_summary_db(request, context) + + return await self._generate_summary_memory(request, context) + + async def _generate_summary_db( + self: ServicerHost, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + """Generate summary for a meeting stored in the database. + + The potentially slow summarization step is executed outside the UoW to + avoid holding database connections while waiting on LLMs. + """ + meeting_id = MeetingId(UUID(request.meeting_id)) + + # 1) Load meeting, existing summary, and segments inside a short UoW + async with self._create_uow() as uow: + meeting = await uow.meetings.get(meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + + existing = await uow.summaries.get_by_meeting(meeting.id) + if existing and not request.force_regenerate: + return summary_to_proto(existing) + + segments = list(await uow.segments.get_by_meeting(meeting.id)) + + # 2) Run summarization outside DB transaction + summary = await self._summarize_or_placeholder(meeting_id, segments) + + # 3) Persist in a fresh UoW + async with self._create_uow() as uow: + saved = await uow.summaries.save(summary) + await uow.commit() + + return summary_to_proto(saved) + + async def _generate_summary_memory( + self: ServicerHost, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + """Generate summary for meetings held in the in-memory store.""" + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + + if meeting.summary and not request.force_regenerate: + return summary_to_proto(meeting.summary) + + summary = await self._summarize_or_placeholder(meeting.id, meeting.segments) + store.set_summary(request.meeting_id, summary) + return summary_to_proto(summary) + + async def _summarize_or_placeholder( + self: ServicerHost, + meeting_id: MeetingId, + segments: list[Segment], + ) -> Summary: + """Try to summarize via service, fallback to placeholder on failure.""" + if self._summarization_service is None: + logger.warning("SummarizationService not configured; using placeholder summary") + return self._generate_placeholder_summary(meeting_id, segments) + + try: + result = await self._summarization_service.summarize( + meeting_id=meeting_id, + segments=segments, + ) + logger.info( + "Generated summary using %s (fallback=%s)", + result.provider_used, + result.fallback_used, + ) + return result.summary + except ProviderUnavailableError as exc: + logger.warning("Summarization provider unavailable; using placeholder: %s", exc) + except (TimeoutError, RuntimeError, ValueError) as exc: + logger.exception( + "Summarization failed (%s); using placeholder summary", type(exc).__name__ + ) + + return self._generate_placeholder_summary(meeting_id, segments) + + def _generate_placeholder_summary( + self: ServicerHost, + meeting_id: MeetingId, + segments: list[Segment], + ) -> Summary: + """Generate a lightweight placeholder summary when summarization fails.""" + full_text = " ".join(s.text for s in segments) + executive = f"{full_text[:200]}..." if len(full_text) > 200 else full_text + executive = executive or "No transcript available." + + return Summary( + meeting_id=meeting_id, + executive_summary=executive, + key_points=[], + action_items=[], + model_version="placeholder-v0", + ) diff --git a/src/noteflow/grpc/client.py b/src/noteflow/grpc/client.py index 706cb1a..8489042 100644 --- a/src/noteflow/grpc/client.py +++ b/src/noteflow/grpc/client.py @@ -12,6 +12,8 @@ from typing import TYPE_CHECKING, Final import grpc +from noteflow.config.constants import DEFAULT_SAMPLE_RATE + from .proto import noteflow_pb2, noteflow_pb2_grpc if TYPE_CHECKING: @@ -88,6 +90,35 @@ class ExportResult: file_extension: str +@dataclass +class DiarizationResult: + """Result of speaker diarization refinement.""" + + job_id: str + status: str + segments_updated: int + speaker_ids: list[str] + error_message: str = "" + + @property + def success(self) -> bool: + """Check if diarization succeeded.""" + return self.status == "completed" and not self.error_message + + @property + def is_terminal(self) -> bool: + """Check if job reached a terminal state.""" + return self.status in {"completed", "failed"} + + +@dataclass +class RenameSpeakerResult: + """Result of speaker rename operation.""" + + segments_updated: int + success: bool + + # Callback types TranscriptCallback = Callable[[TranscriptSegment], None] ConnectionCallback = Callable[[bool, str], None] @@ -146,25 +177,7 @@ class NoteFlowClient: True if connected successfully. """ try: - self._channel = grpc.insecure_channel( - self._server_address, - options=[ - ("grpc.max_send_message_length", 100 * 1024 * 1024), - ("grpc.max_receive_message_length", 100 * 1024 * 1024), - ], - ) - - # Wait for channel to be ready - grpc.channel_ready_future(self._channel).result(timeout=timeout) - - self._stub = noteflow_pb2_grpc.NoteFlowServiceStub(self._channel) - self._connected = True - - logger.info("Connected to server at %s", self._server_address) - self._notify_connection(True, "Connected") - - return True - + return self._extracted_from_connect_11(timeout) except grpc.FutureTimeoutError: logger.error("Connection timeout: %s", self._server_address) self._notify_connection(False, "Connection timeout") @@ -174,6 +187,27 @@ class NoteFlowClient: self._notify_connection(False, str(e)) return False + # TODO Rename this here and in `connect` + def _extracted_from_connect_11(self, timeout): + self._channel = grpc.insecure_channel( + self._server_address, + options=[ + ("grpc.max_send_message_length", 100 * 1024 * 1024), + ("grpc.max_receive_message_length", 100 * 1024 * 1024), + ], + ) + + # Wait for channel to be ready + grpc.channel_ready_future(self._channel).result(timeout=timeout) + + self._stub = noteflow_pb2_grpc.NoteFlowServiceStub(self._channel) + self._connected = True + + logger.info("Connected to server at %s", self._server_address) + self._notify_connection(True, "Connected") + + return True + def disconnect(self) -> None: """Disconnect from the server.""" self.stop_streaming() @@ -427,7 +461,7 @@ class NoteFlowClient: meeting_id=meeting_id, audio_data=audio.tobytes(), timestamp=timestamp, - sample_rate=16000, + sample_rate=DEFAULT_SAMPLE_RATE, channels=1, ) except queue.Empty: @@ -788,3 +822,110 @@ class NoteFlowClient: "html": noteflow_pb2.EXPORT_FORMAT_HTML, } return format_map.get(format_name.lower(), noteflow_pb2.EXPORT_FORMAT_MARKDOWN) + + @staticmethod + def _job_status_to_str(status: int) -> str: + """Convert job status enum to string.""" + # JobStatus enum values extend int, so they work as dictionary keys + status_map = { + noteflow_pb2.JOB_STATUS_UNSPECIFIED: "unspecified", + noteflow_pb2.JOB_STATUS_QUEUED: "queued", + noteflow_pb2.JOB_STATUS_RUNNING: "running", + noteflow_pb2.JOB_STATUS_COMPLETED: "completed", + noteflow_pb2.JOB_STATUS_FAILED: "failed", + } + return status_map.get(status, "unspecified") # type: ignore[arg-type] + + # ========================================================================= + # Speaker Diarization Methods + # ========================================================================= + + def refine_speaker_diarization( + self, + meeting_id: str, + num_speakers: int | None = None, + ) -> DiarizationResult | None: + """Run post-meeting speaker diarization refinement. + + Requests the server to run offline diarization on the meeting audio + as a background job and update segment speaker assignments. + + Args: + meeting_id: Meeting ID. + num_speakers: Optional known number of speakers (auto-detect if None). + + Returns: + DiarizationResult with job status or None if request fails. + """ + if not self._stub: + return None + + try: + request = noteflow_pb2.RefineSpeakerDiarizationRequest( + meeting_id=meeting_id, + num_speakers=num_speakers or 0, + ) + response = self._stub.RefineSpeakerDiarization(request) + return DiarizationResult( + job_id=response.job_id, + status=self._job_status_to_str(response.status), + segments_updated=response.segments_updated, + speaker_ids=list(response.speaker_ids), + error_message=response.error_message, + ) + except grpc.RpcError as e: + logger.error("Failed to refine speaker diarization: %s", e) + return None + + def get_diarization_job_status(self, job_id: str) -> DiarizationResult | None: + """Get status for a diarization background job.""" + if not self._stub: + return None + + try: + request = noteflow_pb2.GetDiarizationJobStatusRequest(job_id=job_id) + response = self._stub.GetDiarizationJobStatus(request) + return DiarizationResult( + job_id=response.job_id, + status=self._job_status_to_str(response.status), + segments_updated=response.segments_updated, + speaker_ids=list(response.speaker_ids), + error_message=response.error_message, + ) + except grpc.RpcError as e: + logger.error("Failed to get diarization job status: %s", e) + return None + + def rename_speaker( + self, + meeting_id: str, + old_speaker_id: str, + new_speaker_name: str, + ) -> RenameSpeakerResult | None: + """Rename a speaker in all segments of a meeting. + + Args: + meeting_id: Meeting ID. + old_speaker_id: Current speaker ID (e.g., "SPEAKER_00"). + new_speaker_name: New speaker name (e.g., "Alice"). + + Returns: + RenameSpeakerResult or None if request fails. + """ + if not self._stub: + return None + + try: + request = noteflow_pb2.RenameSpeakerRequest( + meeting_id=meeting_id, + old_speaker_id=old_speaker_id, + new_speaker_name=new_speaker_name, + ) + response = self._stub.RenameSpeaker(request) + return RenameSpeakerResult( + segments_updated=response.segments_updated, + success=response.success, + ) + except grpc.RpcError as e: + logger.error("Failed to rename speaker: %s", e) + return None diff --git a/src/noteflow/grpc/meeting_store.py b/src/noteflow/grpc/meeting_store.py index 97e98f1..99e39d2 100644 --- a/src/noteflow/grpc/meeting_store.py +++ b/src/noteflow/grpc/meeting_store.py @@ -14,6 +14,7 @@ from noteflow.domain.value_objects import MeetingState if TYPE_CHECKING: from collections.abc import Sequence + from datetime import datetime class MeetingStore: @@ -142,6 +143,57 @@ class MeetingStore: meeting.summary = summary return meeting + def update_state(self, meeting_id: str, state: MeetingState) -> bool: + """Atomically update meeting state. + + Args: + meeting_id: Meeting ID. + state: New state. + + Returns: + True if updated, False if meeting not found. + """ + with self._lock: + meeting = self._meetings.get(meeting_id) + if meeting is None: + return False + meeting.state = state + return True + + def update_title(self, meeting_id: str, title: str) -> bool: + """Atomically update meeting title. + + Args: + meeting_id: Meeting ID. + title: New title. + + Returns: + True if updated, False if meeting not found. + """ + with self._lock: + meeting = self._meetings.get(meeting_id) + if meeting is None: + return False + meeting.title = title + return True + + def update_end_time(self, meeting_id: str, end_time: datetime) -> bool: + """Atomically update meeting end time. + + Args: + meeting_id: Meeting ID. + end_time: New end time. + + Returns: + True if updated, False if meeting not found. + """ + with self._lock: + meeting = self._meetings.get(meeting_id) + if meeting is None: + return False + meeting.end_time = end_time + return True + def delete(self, meeting_id: str) -> bool: """Delete a meeting. diff --git a/src/noteflow/grpc/proto/noteflow.proto b/src/noteflow/grpc/proto/noteflow.proto index 73a50a9..a1bead6 100644 --- a/src/noteflow/grpc/proto/noteflow.proto +++ b/src/noteflow/grpc/proto/noteflow.proto @@ -33,6 +33,11 @@ service NoteFlowService { // Export functionality rpc ExportTranscript(ExportTranscriptRequest) returns (ExportTranscriptResponse); + // Speaker diarization + rpc RefineSpeakerDiarization(RefineSpeakerDiarizationRequest) returns (RefineSpeakerDiarizationResponse); + rpc RenameSpeaker(RenameSpeakerRequest) returns (RenameSpeakerResponse); + rpc GetDiarizationJobStatus(GetDiarizationJobStatusRequest) returns (DiarizationJobStatus); + // Server health and capabilities rpc GetServerInfo(ServerInfoRequest) returns (ServerInfo); } @@ -438,6 +443,14 @@ enum ExportFormat { EXPORT_FORMAT_HTML = 2; } +enum JobStatus { + JOB_STATUS_UNSPECIFIED = 0; + JOB_STATUS_QUEUED = 1; + JOB_STATUS_RUNNING = 2; + JOB_STATUS_COMPLETED = 3; + JOB_STATUS_FAILED = 4; +} + message ExportTranscriptRequest { // Meeting ID to export string meeting_id = 1; @@ -456,3 +469,73 @@ message ExportTranscriptResponse { // Suggested file extension string file_extension = 3; } + +// ============================================================================= +// Speaker Diarization Messages +// ============================================================================= + +message RefineSpeakerDiarizationRequest { + // Meeting ID to run diarization on + string meeting_id = 1; + + // Optional known number of speakers (auto-detect if not set or 0) + int32 num_speakers = 2; +} + +message RefineSpeakerDiarizationResponse { + // Number of segments updated with speaker labels + int32 segments_updated = 1; + + // Distinct speaker IDs found + repeated string speaker_ids = 2; + + // Error message if diarization failed + string error_message = 3; + + // Background job identifier (empty if request failed) + string job_id = 4; + + // Current job status + JobStatus status = 5; +} + +message RenameSpeakerRequest { + // Meeting ID + string meeting_id = 1; + + // Original speaker ID (e.g., "SPEAKER_00") + string old_speaker_id = 2; + + // New speaker name (e.g., "Alice") + string new_speaker_name = 3; +} + +message RenameSpeakerResponse { + // Number of segments updated + int32 segments_updated = 1; + + // Success flag + bool success = 2; +} + +message GetDiarizationJobStatusRequest { + // Job ID returned by RefineSpeakerDiarization + string job_id = 1; +} + +message DiarizationJobStatus { + // Job ID + string job_id = 1; + + // Current status + JobStatus status = 2; + + // Number of segments updated (when completed) + int32 segments_updated = 3; + + // Distinct speaker IDs found (when completed) + repeated string speaker_ids = 4; + + // Error message if failed + string error_message = 5; +} diff --git a/src/noteflow/grpc/proto/noteflow_pb2.py b/src/noteflow/grpc/proto/noteflow_pb2.py index 633f47d..a5f8f3e 100644 --- a/src/noteflow/grpc/proto/noteflow_pb2.py +++ b/src/noteflow/grpc/proto/noteflow_pb2.py @@ -24,7 +24,7 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0enoteflow.proto\x12\x08noteflow\"n\n\nAudioChunk\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\naudio_data\x18\x02 \x01(\x0c\x12\x11\n\ttimestamp\x18\x03 \x01(\x01\x12\x13\n\x0bsample_rate\x18\x04 \x01(\x05\x12\x10\n\x08\x63hannels\x18\x05 \x01(\x05\"\xaa\x01\n\x10TranscriptUpdate\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12)\n\x0bupdate_type\x18\x02 \x01(\x0e\x32\x14.noteflow.UpdateType\x12\x14\n\x0cpartial_text\x18\x03 \x01(\t\x12\'\n\x07segment\x18\x04 \x01(\x0b\x32\x16.noteflow.FinalSegment\x12\x18\n\x10server_timestamp\x18\x05 \x01(\x01\"\x87\x02\n\x0c\x46inalSegment\x12\x12\n\nsegment_id\x18\x01 \x01(\x05\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\x12#\n\x05words\x18\x05 \x03(\x0b\x32\x14.noteflow.WordTiming\x12\x10\n\x08language\x18\x06 \x01(\t\x12\x1b\n\x13language_confidence\x18\x07 \x01(\x02\x12\x13\n\x0b\x61vg_logprob\x18\x08 \x01(\x02\x12\x16\n\x0eno_speech_prob\x18\t \x01(\x02\x12\x12\n\nspeaker_id\x18\n \x01(\t\x12\x1a\n\x12speaker_confidence\x18\x0b \x01(\x02\"U\n\nWordTiming\x12\x0c\n\x04word\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\x12\x13\n\x0bprobability\x18\x04 \x01(\x02\"\xd1\x02\n\x07Meeting\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12%\n\x05state\x18\x03 \x01(\x0e\x32\x16.noteflow.MeetingState\x12\x12\n\ncreated_at\x18\x04 \x01(\x01\x12\x12\n\nstarted_at\x18\x05 \x01(\x01\x12\x10\n\x08\x65nded_at\x18\x06 \x01(\x01\x12\x18\n\x10\x64uration_seconds\x18\x07 \x01(\x01\x12(\n\x08segments\x18\x08 \x03(\x0b\x32\x16.noteflow.FinalSegment\x12\"\n\x07summary\x18\t \x01(\x0b\x32\x11.noteflow.Summary\x12\x31\n\x08metadata\x18\n \x03(\x0b\x32\x1f.noteflow.Meeting.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x96\x01\n\x14\x43reateMeetingRequest\x12\r\n\x05title\x18\x01 \x01(\t\x12>\n\x08metadata\x18\x02 \x03(\x0b\x32,.noteflow.CreateMeetingRequest.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"(\n\x12StopMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"\x85\x01\n\x13ListMeetingsRequest\x12&\n\x06states\x18\x01 \x03(\x0e\x32\x16.noteflow.MeetingState\x12\r\n\x05limit\x18\x02 \x01(\x05\x12\x0e\n\x06offset\x18\x03 \x01(\x05\x12\'\n\nsort_order\x18\x04 \x01(\x0e\x32\x13.noteflow.SortOrder\"P\n\x14ListMeetingsResponse\x12#\n\x08meetings\x18\x01 \x03(\x0b\x32\x11.noteflow.Meeting\x12\x13\n\x0btotal_count\x18\x02 \x01(\x05\"Z\n\x11GetMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10include_segments\x18\x02 \x01(\x08\x12\x17\n\x0finclude_summary\x18\x03 \x01(\x08\"*\n\x14\x44\x65leteMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"(\n\x15\x44\x65leteMeetingResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\xb9\x01\n\x07Summary\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x19\n\x11\x65xecutive_summary\x18\x02 \x01(\t\x12&\n\nkey_points\x18\x03 \x03(\x0b\x32\x12.noteflow.KeyPoint\x12*\n\x0c\x61\x63tion_items\x18\x04 \x03(\x0b\x32\x14.noteflow.ActionItem\x12\x14\n\x0cgenerated_at\x18\x05 \x01(\x01\x12\x15\n\rmodel_version\x18\x06 \x01(\t\"S\n\x08KeyPoint\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x13\n\x0bsegment_ids\x18\x02 \x03(\x05\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\"y\n\nActionItem\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x10\n\x08\x61ssignee\x18\x02 \x01(\t\x12\x10\n\x08\x64ue_date\x18\x03 \x01(\x01\x12$\n\x08priority\x18\x04 \x01(\x0e\x32\x12.noteflow.Priority\x12\x13\n\x0bsegment_ids\x18\x05 \x03(\x05\"F\n\x16GenerateSummaryRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10\x66orce_regenerate\x18\x02 \x01(\x08\"\x13\n\x11ServerInfoRequest\"\xe4\x01\n\nServerInfo\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\x11\n\tasr_model\x18\x02 \x01(\t\x12\x11\n\tasr_ready\x18\x03 \x01(\x08\x12\x1e\n\x16supported_sample_rates\x18\x04 \x03(\x05\x12\x16\n\x0emax_chunk_size\x18\x05 \x01(\x05\x12\x16\n\x0euptime_seconds\x18\x06 \x01(\x01\x12\x17\n\x0f\x61\x63tive_meetings\x18\x07 \x01(\x05\x12\x1b\n\x13\x64iarization_enabled\x18\x08 \x01(\x08\x12\x19\n\x11\x64iarization_ready\x18\t \x01(\x08\"\xbc\x01\n\nAnnotation\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\nmeeting_id\x18\x02 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x03 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x12\n\nstart_time\x18\x05 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x06 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x07 \x03(\x05\x12\x12\n\ncreated_at\x18\x08 \x01(\x01\"\xa6\x01\n\x14\x41\x64\x64\x41nnotationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"-\n\x14GetAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"R\n\x16ListAnnotationsRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\"D\n\x17ListAnnotationsResponse\x12)\n\x0b\x61nnotations\x18\x01 \x03(\x0b\x32\x14.noteflow.Annotation\"\xac\x01\n\x17UpdateAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"0\n\x17\x44\x65leteAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"+\n\x18\x44\x65leteAnnotationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"U\n\x17\x45xportTranscriptRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12&\n\x06\x66ormat\x18\x02 \x01(\x0e\x32\x16.noteflow.ExportFormat\"X\n\x18\x45xportTranscriptResponse\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x13\n\x0b\x66ormat_name\x18\x02 \x01(\t\x12\x16\n\x0e\x66ile_extension\x18\x03 \x01(\t*\x8d\x01\n\nUpdateType\x12\x1b\n\x17UPDATE_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13UPDATE_TYPE_PARTIAL\x10\x01\x12\x15\n\x11UPDATE_TYPE_FINAL\x10\x02\x12\x19\n\x15UPDATE_TYPE_VAD_START\x10\x03\x12\x17\n\x13UPDATE_TYPE_VAD_END\x10\x04*\xb6\x01\n\x0cMeetingState\x12\x1d\n\x19MEETING_STATE_UNSPECIFIED\x10\x00\x12\x19\n\x15MEETING_STATE_CREATED\x10\x01\x12\x1b\n\x17MEETING_STATE_RECORDING\x10\x02\x12\x19\n\x15MEETING_STATE_STOPPED\x10\x03\x12\x1b\n\x17MEETING_STATE_COMPLETED\x10\x04\x12\x17\n\x13MEETING_STATE_ERROR\x10\x05*`\n\tSortOrder\x12\x1a\n\x16SORT_ORDER_UNSPECIFIED\x10\x00\x12\x1b\n\x17SORT_ORDER_CREATED_DESC\x10\x01\x12\x1a\n\x16SORT_ORDER_CREATED_ASC\x10\x02*^\n\x08Priority\x12\x18\n\x14PRIORITY_UNSPECIFIED\x10\x00\x12\x10\n\x0cPRIORITY_LOW\x10\x01\x12\x13\n\x0fPRIORITY_MEDIUM\x10\x02\x12\x11\n\rPRIORITY_HIGH\x10\x03*\xa4\x01\n\x0e\x41nnotationType\x12\x1f\n\x1b\x41NNOTATION_TYPE_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x41NNOTATION_TYPE_ACTION_ITEM\x10\x01\x12\x1c\n\x18\x41NNOTATION_TYPE_DECISION\x10\x02\x12\x18\n\x14\x41NNOTATION_TYPE_NOTE\x10\x03\x12\x18\n\x14\x41NNOTATION_TYPE_RISK\x10\x04*a\n\x0c\x45xportFormat\x12\x1d\n\x19\x45XPORT_FORMAT_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x45XPORT_FORMAT_MARKDOWN\x10\x01\x12\x16\n\x12\x45XPORT_FORMAT_HTML\x10\x02\x32\xb6\x08\n\x0fNoteFlowService\x12K\n\x13StreamTranscription\x12\x14.noteflow.AudioChunk\x1a\x1a.noteflow.TranscriptUpdate(\x01\x30\x01\x12\x42\n\rCreateMeeting\x12\x1e.noteflow.CreateMeetingRequest\x1a\x11.noteflow.Meeting\x12>\n\x0bStopMeeting\x12\x1c.noteflow.StopMeetingRequest\x1a\x11.noteflow.Meeting\x12M\n\x0cListMeetings\x12\x1d.noteflow.ListMeetingsRequest\x1a\x1e.noteflow.ListMeetingsResponse\x12<\n\nGetMeeting\x12\x1b.noteflow.GetMeetingRequest\x1a\x11.noteflow.Meeting\x12P\n\rDeleteMeeting\x12\x1e.noteflow.DeleteMeetingRequest\x1a\x1f.noteflow.DeleteMeetingResponse\x12\x46\n\x0fGenerateSummary\x12 .noteflow.GenerateSummaryRequest\x1a\x11.noteflow.Summary\x12\x45\n\rAddAnnotation\x12\x1e.noteflow.AddAnnotationRequest\x1a\x14.noteflow.Annotation\x12\x45\n\rGetAnnotation\x12\x1e.noteflow.GetAnnotationRequest\x1a\x14.noteflow.Annotation\x12V\n\x0fListAnnotations\x12 .noteflow.ListAnnotationsRequest\x1a!.noteflow.ListAnnotationsResponse\x12K\n\x10UpdateAnnotation\x12!.noteflow.UpdateAnnotationRequest\x1a\x14.noteflow.Annotation\x12Y\n\x10\x44\x65leteAnnotation\x12!.noteflow.DeleteAnnotationRequest\x1a\".noteflow.DeleteAnnotationResponse\x12Y\n\x10\x45xportTranscript\x12!.noteflow.ExportTranscriptRequest\x1a\".noteflow.ExportTranscriptResponse\x12\x42\n\rGetServerInfo\x12\x1b.noteflow.ServerInfoRequest\x1a\x14.noteflow.ServerInfob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0enoteflow.proto\x12\x08noteflow\"n\n\nAudioChunk\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\naudio_data\x18\x02 \x01(\x0c\x12\x11\n\ttimestamp\x18\x03 \x01(\x01\x12\x13\n\x0bsample_rate\x18\x04 \x01(\x05\x12\x10\n\x08\x63hannels\x18\x05 \x01(\x05\"\xaa\x01\n\x10TranscriptUpdate\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12)\n\x0bupdate_type\x18\x02 \x01(\x0e\x32\x14.noteflow.UpdateType\x12\x14\n\x0cpartial_text\x18\x03 \x01(\t\x12\'\n\x07segment\x18\x04 \x01(\x0b\x32\x16.noteflow.FinalSegment\x12\x18\n\x10server_timestamp\x18\x05 \x01(\x01\"\x87\x02\n\x0c\x46inalSegment\x12\x12\n\nsegment_id\x18\x01 \x01(\x05\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\x12#\n\x05words\x18\x05 \x03(\x0b\x32\x14.noteflow.WordTiming\x12\x10\n\x08language\x18\x06 \x01(\t\x12\x1b\n\x13language_confidence\x18\x07 \x01(\x02\x12\x13\n\x0b\x61vg_logprob\x18\x08 \x01(\x02\x12\x16\n\x0eno_speech_prob\x18\t \x01(\x02\x12\x12\n\nspeaker_id\x18\n \x01(\t\x12\x1a\n\x12speaker_confidence\x18\x0b \x01(\x02\"U\n\nWordTiming\x12\x0c\n\x04word\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\x12\x13\n\x0bprobability\x18\x04 \x01(\x02\"\xd1\x02\n\x07Meeting\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12%\n\x05state\x18\x03 \x01(\x0e\x32\x16.noteflow.MeetingState\x12\x12\n\ncreated_at\x18\x04 \x01(\x01\x12\x12\n\nstarted_at\x18\x05 \x01(\x01\x12\x10\n\x08\x65nded_at\x18\x06 \x01(\x01\x12\x18\n\x10\x64uration_seconds\x18\x07 \x01(\x01\x12(\n\x08segments\x18\x08 \x03(\x0b\x32\x16.noteflow.FinalSegment\x12\"\n\x07summary\x18\t \x01(\x0b\x32\x11.noteflow.Summary\x12\x31\n\x08metadata\x18\n \x03(\x0b\x32\x1f.noteflow.Meeting.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x96\x01\n\x14\x43reateMeetingRequest\x12\r\n\x05title\x18\x01 \x01(\t\x12>\n\x08metadata\x18\x02 \x03(\x0b\x32,.noteflow.CreateMeetingRequest.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"(\n\x12StopMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"\x85\x01\n\x13ListMeetingsRequest\x12&\n\x06states\x18\x01 \x03(\x0e\x32\x16.noteflow.MeetingState\x12\r\n\x05limit\x18\x02 \x01(\x05\x12\x0e\n\x06offset\x18\x03 \x01(\x05\x12\'\n\nsort_order\x18\x04 \x01(\x0e\x32\x13.noteflow.SortOrder\"P\n\x14ListMeetingsResponse\x12#\n\x08meetings\x18\x01 \x03(\x0b\x32\x11.noteflow.Meeting\x12\x13\n\x0btotal_count\x18\x02 \x01(\x05\"Z\n\x11GetMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10include_segments\x18\x02 \x01(\x08\x12\x17\n\x0finclude_summary\x18\x03 \x01(\x08\"*\n\x14\x44\x65leteMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"(\n\x15\x44\x65leteMeetingResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\xb9\x01\n\x07Summary\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x19\n\x11\x65xecutive_summary\x18\x02 \x01(\t\x12&\n\nkey_points\x18\x03 \x03(\x0b\x32\x12.noteflow.KeyPoint\x12*\n\x0c\x61\x63tion_items\x18\x04 \x03(\x0b\x32\x14.noteflow.ActionItem\x12\x14\n\x0cgenerated_at\x18\x05 \x01(\x01\x12\x15\n\rmodel_version\x18\x06 \x01(\t\"S\n\x08KeyPoint\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x13\n\x0bsegment_ids\x18\x02 \x03(\x05\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\"y\n\nActionItem\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x10\n\x08\x61ssignee\x18\x02 \x01(\t\x12\x10\n\x08\x64ue_date\x18\x03 \x01(\x01\x12$\n\x08priority\x18\x04 \x01(\x0e\x32\x12.noteflow.Priority\x12\x13\n\x0bsegment_ids\x18\x05 \x03(\x05\"F\n\x16GenerateSummaryRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10\x66orce_regenerate\x18\x02 \x01(\x08\"\x13\n\x11ServerInfoRequest\"\xe4\x01\n\nServerInfo\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\x11\n\tasr_model\x18\x02 \x01(\t\x12\x11\n\tasr_ready\x18\x03 \x01(\x08\x12\x1e\n\x16supported_sample_rates\x18\x04 \x03(\x05\x12\x16\n\x0emax_chunk_size\x18\x05 \x01(\x05\x12\x16\n\x0euptime_seconds\x18\x06 \x01(\x01\x12\x17\n\x0f\x61\x63tive_meetings\x18\x07 \x01(\x05\x12\x1b\n\x13\x64iarization_enabled\x18\x08 \x01(\x08\x12\x19\n\x11\x64iarization_ready\x18\t \x01(\x08\"\xbc\x01\n\nAnnotation\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\nmeeting_id\x18\x02 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x03 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x12\n\nstart_time\x18\x05 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x06 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x07 \x03(\x05\x12\x12\n\ncreated_at\x18\x08 \x01(\x01\"\xa6\x01\n\x14\x41\x64\x64\x41nnotationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"-\n\x14GetAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"R\n\x16ListAnnotationsRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\"D\n\x17ListAnnotationsResponse\x12)\n\x0b\x61nnotations\x18\x01 \x03(\x0b\x32\x14.noteflow.Annotation\"\xac\x01\n\x17UpdateAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"0\n\x17\x44\x65leteAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"+\n\x18\x44\x65leteAnnotationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"U\n\x17\x45xportTranscriptRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12&\n\x06\x66ormat\x18\x02 \x01(\x0e\x32\x16.noteflow.ExportFormat\"X\n\x18\x45xportTranscriptResponse\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x13\n\x0b\x66ormat_name\x18\x02 \x01(\t\x12\x16\n\x0e\x66ile_extension\x18\x03 \x01(\t\"K\n\x1fRefineSpeakerDiarizationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x14\n\x0cnum_speakers\x18\x02 \x01(\x05\"\x9d\x01\n RefineSpeakerDiarizationResponse\x12\x18\n\x10segments_updated\x18\x01 \x01(\x05\x12\x13\n\x0bspeaker_ids\x18\x02 \x03(\t\x12\x15\n\rerror_message\x18\x03 \x01(\t\x12\x0e\n\x06job_id\x18\x04 \x01(\t\x12#\n\x06status\x18\x05 \x01(\x0e\x32\x13.noteflow.JobStatus\"\\\n\x14RenameSpeakerRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x16\n\x0eold_speaker_id\x18\x02 \x01(\t\x12\x18\n\x10new_speaker_name\x18\x03 \x01(\t\"B\n\x15RenameSpeakerResponse\x12\x18\n\x10segments_updated\x18\x01 \x01(\x05\x12\x0f\n\x07success\x18\x02 \x01(\x08\"0\n\x1eGetDiarizationJobStatusRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x91\x01\n\x14\x44iarizationJobStatus\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12#\n\x06status\x18\x02 \x01(\x0e\x32\x13.noteflow.JobStatus\x12\x18\n\x10segments_updated\x18\x03 \x01(\x05\x12\x13\n\x0bspeaker_ids\x18\x04 \x03(\t\x12\x15\n\rerror_message\x18\x05 \x01(\t*\x8d\x01\n\nUpdateType\x12\x1b\n\x17UPDATE_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13UPDATE_TYPE_PARTIAL\x10\x01\x12\x15\n\x11UPDATE_TYPE_FINAL\x10\x02\x12\x19\n\x15UPDATE_TYPE_VAD_START\x10\x03\x12\x17\n\x13UPDATE_TYPE_VAD_END\x10\x04*\xb6\x01\n\x0cMeetingState\x12\x1d\n\x19MEETING_STATE_UNSPECIFIED\x10\x00\x12\x19\n\x15MEETING_STATE_CREATED\x10\x01\x12\x1b\n\x17MEETING_STATE_RECORDING\x10\x02\x12\x19\n\x15MEETING_STATE_STOPPED\x10\x03\x12\x1b\n\x17MEETING_STATE_COMPLETED\x10\x04\x12\x17\n\x13MEETING_STATE_ERROR\x10\x05*`\n\tSortOrder\x12\x1a\n\x16SORT_ORDER_UNSPECIFIED\x10\x00\x12\x1b\n\x17SORT_ORDER_CREATED_DESC\x10\x01\x12\x1a\n\x16SORT_ORDER_CREATED_ASC\x10\x02*^\n\x08Priority\x12\x18\n\x14PRIORITY_UNSPECIFIED\x10\x00\x12\x10\n\x0cPRIORITY_LOW\x10\x01\x12\x13\n\x0fPRIORITY_MEDIUM\x10\x02\x12\x11\n\rPRIORITY_HIGH\x10\x03*\xa4\x01\n\x0e\x41nnotationType\x12\x1f\n\x1b\x41NNOTATION_TYPE_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x41NNOTATION_TYPE_ACTION_ITEM\x10\x01\x12\x1c\n\x18\x41NNOTATION_TYPE_DECISION\x10\x02\x12\x18\n\x14\x41NNOTATION_TYPE_NOTE\x10\x03\x12\x18\n\x14\x41NNOTATION_TYPE_RISK\x10\x04*a\n\x0c\x45xportFormat\x12\x1d\n\x19\x45XPORT_FORMAT_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x45XPORT_FORMAT_MARKDOWN\x10\x01\x12\x16\n\x12\x45XPORT_FORMAT_HTML\x10\x02*\x87\x01\n\tJobStatus\x12\x1a\n\x16JOB_STATUS_UNSPECIFIED\x10\x00\x12\x15\n\x11JOB_STATUS_QUEUED\x10\x01\x12\x16\n\x12JOB_STATUS_RUNNING\x10\x02\x12\x18\n\x14JOB_STATUS_COMPLETED\x10\x03\x12\x15\n\x11JOB_STATUS_FAILED\x10\x04\x32\xe0\n\n\x0fNoteFlowService\x12K\n\x13StreamTranscription\x12\x14.noteflow.AudioChunk\x1a\x1a.noteflow.TranscriptUpdate(\x01\x30\x01\x12\x42\n\rCreateMeeting\x12\x1e.noteflow.CreateMeetingRequest\x1a\x11.noteflow.Meeting\x12>\n\x0bStopMeeting\x12\x1c.noteflow.StopMeetingRequest\x1a\x11.noteflow.Meeting\x12M\n\x0cListMeetings\x12\x1d.noteflow.ListMeetingsRequest\x1a\x1e.noteflow.ListMeetingsResponse\x12<\n\nGetMeeting\x12\x1b.noteflow.GetMeetingRequest\x1a\x11.noteflow.Meeting\x12P\n\rDeleteMeeting\x12\x1e.noteflow.DeleteMeetingRequest\x1a\x1f.noteflow.DeleteMeetingResponse\x12\x46\n\x0fGenerateSummary\x12 .noteflow.GenerateSummaryRequest\x1a\x11.noteflow.Summary\x12\x45\n\rAddAnnotation\x12\x1e.noteflow.AddAnnotationRequest\x1a\x14.noteflow.Annotation\x12\x45\n\rGetAnnotation\x12\x1e.noteflow.GetAnnotationRequest\x1a\x14.noteflow.Annotation\x12V\n\x0fListAnnotations\x12 .noteflow.ListAnnotationsRequest\x1a!.noteflow.ListAnnotationsResponse\x12K\n\x10UpdateAnnotation\x12!.noteflow.UpdateAnnotationRequest\x1a\x14.noteflow.Annotation\x12Y\n\x10\x44\x65leteAnnotation\x12!.noteflow.DeleteAnnotationRequest\x1a\".noteflow.DeleteAnnotationResponse\x12Y\n\x10\x45xportTranscript\x12!.noteflow.ExportTranscriptRequest\x1a\".noteflow.ExportTranscriptResponse\x12q\n\x18RefineSpeakerDiarization\x12).noteflow.RefineSpeakerDiarizationRequest\x1a*.noteflow.RefineSpeakerDiarizationResponse\x12P\n\rRenameSpeaker\x12\x1e.noteflow.RenameSpeakerRequest\x1a\x1f.noteflow.RenameSpeakerResponse\x12\x63\n\x17GetDiarizationJobStatus\x12(.noteflow.GetDiarizationJobStatusRequest\x1a\x1e.noteflow.DiarizationJobStatus\x12\x42\n\rGetServerInfo\x12\x1b.noteflow.ServerInfoRequest\x1a\x14.noteflow.ServerInfob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -35,18 +35,20 @@ if not _descriptor._USE_C_DESCRIPTORS: _globals['_MEETING_METADATAENTRY']._serialized_options = b'8\001' _globals['_CREATEMEETINGREQUEST_METADATAENTRY']._loaded_options = None _globals['_CREATEMEETINGREQUEST_METADATAENTRY']._serialized_options = b'8\001' - _globals['_UPDATETYPE']._serialized_start=3326 - _globals['_UPDATETYPE']._serialized_end=3467 - _globals['_MEETINGSTATE']._serialized_start=3470 - _globals['_MEETINGSTATE']._serialized_end=3652 - _globals['_SORTORDER']._serialized_start=3654 - _globals['_SORTORDER']._serialized_end=3750 - _globals['_PRIORITY']._serialized_start=3752 - _globals['_PRIORITY']._serialized_end=3846 - _globals['_ANNOTATIONTYPE']._serialized_start=3849 - _globals['_ANNOTATIONTYPE']._serialized_end=4013 - _globals['_EXPORTFORMAT']._serialized_start=4015 - _globals['_EXPORTFORMAT']._serialized_end=4112 + _globals['_UPDATETYPE']._serialized_start=3923 + _globals['_UPDATETYPE']._serialized_end=4064 + _globals['_MEETINGSTATE']._serialized_start=4067 + _globals['_MEETINGSTATE']._serialized_end=4249 + _globals['_SORTORDER']._serialized_start=4251 + _globals['_SORTORDER']._serialized_end=4347 + _globals['_PRIORITY']._serialized_start=4349 + _globals['_PRIORITY']._serialized_end=4443 + _globals['_ANNOTATIONTYPE']._serialized_start=4446 + _globals['_ANNOTATIONTYPE']._serialized_end=4610 + _globals['_EXPORTFORMAT']._serialized_start=4612 + _globals['_EXPORTFORMAT']._serialized_end=4709 + _globals['_JOBSTATUS']._serialized_start=4712 + _globals['_JOBSTATUS']._serialized_end=4847 _globals['_AUDIOCHUNK']._serialized_start=28 _globals['_AUDIOCHUNK']._serialized_end=138 _globals['_TRANSCRIPTUPDATE']._serialized_start=141 @@ -107,6 +109,18 @@ if not _descriptor._USE_C_DESCRIPTORS: _globals['_EXPORTTRANSCRIPTREQUEST']._serialized_end=3233 _globals['_EXPORTTRANSCRIPTRESPONSE']._serialized_start=3235 _globals['_EXPORTTRANSCRIPTRESPONSE']._serialized_end=3323 - _globals['_NOTEFLOWSERVICE']._serialized_start=4115 - _globals['_NOTEFLOWSERVICE']._serialized_end=5193 + _globals['_REFINESPEAKERDIARIZATIONREQUEST']._serialized_start=3325 + _globals['_REFINESPEAKERDIARIZATIONREQUEST']._serialized_end=3400 + _globals['_REFINESPEAKERDIARIZATIONRESPONSE']._serialized_start=3403 + _globals['_REFINESPEAKERDIARIZATIONRESPONSE']._serialized_end=3560 + _globals['_RENAMESPEAKERREQUEST']._serialized_start=3562 + _globals['_RENAMESPEAKERREQUEST']._serialized_end=3654 + _globals['_RENAMESPEAKERRESPONSE']._serialized_start=3656 + _globals['_RENAMESPEAKERRESPONSE']._serialized_end=3722 + _globals['_GETDIARIZATIONJOBSTATUSREQUEST']._serialized_start=3724 + _globals['_GETDIARIZATIONJOBSTATUSREQUEST']._serialized_end=3772 + _globals['_DIARIZATIONJOBSTATUS']._serialized_start=3775 + _globals['_DIARIZATIONJOBSTATUS']._serialized_end=3920 + _globals['_NOTEFLOWSERVICE']._serialized_start=4850 + _globals['_NOTEFLOWSERVICE']._serialized_end=6226 # @@protoc_insertion_point(module_scope) diff --git a/src/noteflow/grpc/proto/noteflow_pb2.pyi b/src/noteflow/grpc/proto/noteflow_pb2.pyi index ddd7293..4b93b37 100644 --- a/src/noteflow/grpc/proto/noteflow_pb2.pyi +++ b/src/noteflow/grpc/proto/noteflow_pb2.pyi @@ -36,6 +36,28 @@ class Priority(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): PRIORITY_LOW: _ClassVar[Priority] PRIORITY_MEDIUM: _ClassVar[Priority] PRIORITY_HIGH: _ClassVar[Priority] + +class AnnotationType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + ANNOTATION_TYPE_UNSPECIFIED: _ClassVar[AnnotationType] + ANNOTATION_TYPE_ACTION_ITEM: _ClassVar[AnnotationType] + ANNOTATION_TYPE_DECISION: _ClassVar[AnnotationType] + ANNOTATION_TYPE_NOTE: _ClassVar[AnnotationType] + ANNOTATION_TYPE_RISK: _ClassVar[AnnotationType] + +class ExportFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + EXPORT_FORMAT_UNSPECIFIED: _ClassVar[ExportFormat] + EXPORT_FORMAT_MARKDOWN: _ClassVar[ExportFormat] + EXPORT_FORMAT_HTML: _ClassVar[ExportFormat] + +class JobStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + JOB_STATUS_UNSPECIFIED: _ClassVar[JobStatus] + JOB_STATUS_QUEUED: _ClassVar[JobStatus] + JOB_STATUS_RUNNING: _ClassVar[JobStatus] + JOB_STATUS_COMPLETED: _ClassVar[JobStatus] + JOB_STATUS_FAILED: _ClassVar[JobStatus] UPDATE_TYPE_UNSPECIFIED: UpdateType UPDATE_TYPE_PARTIAL: UpdateType UPDATE_TYPE_FINAL: UpdateType @@ -54,6 +76,19 @@ PRIORITY_UNSPECIFIED: Priority PRIORITY_LOW: Priority PRIORITY_MEDIUM: Priority PRIORITY_HIGH: Priority +ANNOTATION_TYPE_UNSPECIFIED: AnnotationType +ANNOTATION_TYPE_ACTION_ITEM: AnnotationType +ANNOTATION_TYPE_DECISION: AnnotationType +ANNOTATION_TYPE_NOTE: AnnotationType +ANNOTATION_TYPE_RISK: AnnotationType +EXPORT_FORMAT_UNSPECIFIED: ExportFormat +EXPORT_FORMAT_MARKDOWN: ExportFormat +EXPORT_FORMAT_HTML: ExportFormat +JOB_STATUS_UNSPECIFIED: JobStatus +JOB_STATUS_QUEUED: JobStatus +JOB_STATUS_RUNNING: JobStatus +JOB_STATUS_COMPLETED: JobStatus +JOB_STATUS_FAILED: JobStatus class AudioChunk(_message.Message): __slots__ = ("meeting_id", "audio_data", "timestamp", "sample_rate", "channels") @@ -290,3 +325,167 @@ class ServerInfo(_message.Message): diarization_enabled: bool diarization_ready: bool def __init__(self, version: _Optional[str] = ..., asr_model: _Optional[str] = ..., asr_ready: bool = ..., supported_sample_rates: _Optional[_Iterable[int]] = ..., max_chunk_size: _Optional[int] = ..., uptime_seconds: _Optional[float] = ..., active_meetings: _Optional[int] = ..., diarization_enabled: bool = ..., diarization_ready: bool = ...) -> None: ... + +class Annotation(_message.Message): + __slots__ = ("id", "meeting_id", "annotation_type", "text", "start_time", "end_time", "segment_ids", "created_at") + ID_FIELD_NUMBER: _ClassVar[int] + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int] + CREATED_AT_FIELD_NUMBER: _ClassVar[int] + id: str + meeting_id: str + annotation_type: AnnotationType + text: str + start_time: float + end_time: float + segment_ids: _containers.RepeatedScalarFieldContainer[int] + created_at: float + def __init__(self, id: _Optional[str] = ..., meeting_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ..., created_at: _Optional[float] = ...) -> None: ... + +class AddAnnotationRequest(_message.Message): + __slots__ = ("meeting_id", "annotation_type", "text", "start_time", "end_time", "segment_ids") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + annotation_type: AnnotationType + text: str + start_time: float + end_time: float + segment_ids: _containers.RepeatedScalarFieldContainer[int] + def __init__(self, meeting_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ...) -> None: ... + +class GetAnnotationRequest(_message.Message): + __slots__ = ("annotation_id",) + ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int] + annotation_id: str + def __init__(self, annotation_id: _Optional[str] = ...) -> None: ... + +class ListAnnotationsRequest(_message.Message): + __slots__ = ("meeting_id", "start_time", "end_time") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + start_time: float + end_time: float + def __init__(self, meeting_id: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ...) -> None: ... + +class ListAnnotationsResponse(_message.Message): + __slots__ = ("annotations",) + ANNOTATIONS_FIELD_NUMBER: _ClassVar[int] + annotations: _containers.RepeatedCompositeFieldContainer[Annotation] + def __init__(self, annotations: _Optional[_Iterable[_Union[Annotation, _Mapping]]] = ...) -> None: ... + +class UpdateAnnotationRequest(_message.Message): + __slots__ = ("annotation_id", "annotation_type", "text", "start_time", "end_time", "segment_ids") + ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int] + ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int] + TEXT_FIELD_NUMBER: _ClassVar[int] + START_TIME_FIELD_NUMBER: _ClassVar[int] + END_TIME_FIELD_NUMBER: _ClassVar[int] + SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int] + annotation_id: str + annotation_type: AnnotationType + text: str + start_time: float + end_time: float + segment_ids: _containers.RepeatedScalarFieldContainer[int] + def __init__(self, annotation_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ...) -> None: ... + +class DeleteAnnotationRequest(_message.Message): + __slots__ = ("annotation_id",) + ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int] + annotation_id: str + def __init__(self, annotation_id: _Optional[str] = ...) -> None: ... + +class DeleteAnnotationResponse(_message.Message): + __slots__ = ("success",) + SUCCESS_FIELD_NUMBER: _ClassVar[int] + success: bool + def __init__(self, success: bool = ...) -> None: ... + +class ExportTranscriptRequest(_message.Message): + __slots__ = ("meeting_id", "format") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + FORMAT_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + format: ExportFormat + def __init__(self, meeting_id: _Optional[str] = ..., format: _Optional[_Union[ExportFormat, str]] = ...) -> None: ... + +class ExportTranscriptResponse(_message.Message): + __slots__ = ("content", "format_name", "file_extension") + CONTENT_FIELD_NUMBER: _ClassVar[int] + FORMAT_NAME_FIELD_NUMBER: _ClassVar[int] + FILE_EXTENSION_FIELD_NUMBER: _ClassVar[int] + content: str + format_name: str + file_extension: str + def __init__(self, content: _Optional[str] = ..., format_name: _Optional[str] = ..., file_extension: _Optional[str] = ...) -> None: ... + +class RefineSpeakerDiarizationRequest(_message.Message): + __slots__ = ("meeting_id", "num_speakers") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + NUM_SPEAKERS_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + num_speakers: int + def __init__(self, meeting_id: _Optional[str] = ..., num_speakers: _Optional[int] = ...) -> None: ... + +class RefineSpeakerDiarizationResponse(_message.Message): + __slots__ = ("segments_updated", "speaker_ids", "error_message", "job_id", "status") + SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int] + SPEAKER_IDS_FIELD_NUMBER: _ClassVar[int] + ERROR_MESSAGE_FIELD_NUMBER: _ClassVar[int] + JOB_ID_FIELD_NUMBER: _ClassVar[int] + STATUS_FIELD_NUMBER: _ClassVar[int] + segments_updated: int + speaker_ids: _containers.RepeatedScalarFieldContainer[str] + error_message: str + job_id: str + status: JobStatus + def __init__(self, segments_updated: _Optional[int] = ..., speaker_ids: _Optional[_Iterable[str]] = ..., error_message: _Optional[str] = ..., job_id: _Optional[str] = ..., status: _Optional[JobStatus] = ...) -> None: ... + +class RenameSpeakerRequest(_message.Message): + __slots__ = ("meeting_id", "old_speaker_id", "new_speaker_name") + MEETING_ID_FIELD_NUMBER: _ClassVar[int] + OLD_SPEAKER_ID_FIELD_NUMBER: _ClassVar[int] + NEW_SPEAKER_NAME_FIELD_NUMBER: _ClassVar[int] + meeting_id: str + old_speaker_id: str + new_speaker_name: str + def __init__(self, meeting_id: _Optional[str] = ..., old_speaker_id: _Optional[str] = ..., new_speaker_name: _Optional[str] = ...) -> None: ... + +class RenameSpeakerResponse(_message.Message): + __slots__ = ("segments_updated", "success") + SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int] + SUCCESS_FIELD_NUMBER: _ClassVar[int] + segments_updated: int + success: bool + def __init__(self, segments_updated: _Optional[int] = ..., success: bool = ...) -> None: ... + +class GetDiarizationJobStatusRequest(_message.Message): + __slots__ = ("job_id",) + JOB_ID_FIELD_NUMBER: _ClassVar[int] + job_id: str + def __init__(self, job_id: _Optional[str] = ...) -> None: ... + +class DiarizationJobStatus(_message.Message): + __slots__ = ("job_id", "status", "segments_updated", "speaker_ids", "error_message") + JOB_ID_FIELD_NUMBER: _ClassVar[int] + STATUS_FIELD_NUMBER: _ClassVar[int] + SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int] + SPEAKER_IDS_FIELD_NUMBER: _ClassVar[int] + ERROR_MESSAGE_FIELD_NUMBER: _ClassVar[int] + job_id: str + status: JobStatus + segments_updated: int + speaker_ids: _containers.RepeatedScalarFieldContainer[str] + error_message: str + def __init__(self, job_id: _Optional[str] = ..., status: _Optional[JobStatus] = ..., segments_updated: _Optional[int] = ..., speaker_ids: _Optional[_Iterable[str]] = ..., error_message: _Optional[str] = ...) -> None: ... diff --git a/src/noteflow/grpc/proto/noteflow_pb2_grpc.py b/src/noteflow/grpc/proto/noteflow_pb2_grpc.py index 78a0f10..8e9f5dc 100644 --- a/src/noteflow/grpc/proto/noteflow_pb2_grpc.py +++ b/src/noteflow/grpc/proto/noteflow_pb2_grpc.py @@ -103,6 +103,21 @@ class NoteFlowServiceStub(object): request_serializer=noteflow__pb2.ExportTranscriptRequest.SerializeToString, response_deserializer=noteflow__pb2.ExportTranscriptResponse.FromString, _registered_method=True) + self.RefineSpeakerDiarization = channel.unary_unary( + '/noteflow.NoteFlowService/RefineSpeakerDiarization', + request_serializer=noteflow__pb2.RefineSpeakerDiarizationRequest.SerializeToString, + response_deserializer=noteflow__pb2.RefineSpeakerDiarizationResponse.FromString, + _registered_method=True) + self.RenameSpeaker = channel.unary_unary( + '/noteflow.NoteFlowService/RenameSpeaker', + request_serializer=noteflow__pb2.RenameSpeakerRequest.SerializeToString, + response_deserializer=noteflow__pb2.RenameSpeakerResponse.FromString, + _registered_method=True) + self.GetDiarizationJobStatus = channel.unary_unary( + '/noteflow.NoteFlowService/GetDiarizationJobStatus', + request_serializer=noteflow__pb2.GetDiarizationJobStatusRequest.SerializeToString, + response_deserializer=noteflow__pb2.DiarizationJobStatus.FromString, + _registered_method=True) self.GetServerInfo = channel.unary_unary( '/noteflow.NoteFlowService/GetServerInfo', request_serializer=noteflow__pb2.ServerInfoRequest.SerializeToString, @@ -200,6 +215,25 @@ class NoteFlowServiceServicer(object): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def RefineSpeakerDiarization(self, request, context): + """Speaker diarization + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def RenameSpeaker(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetDiarizationJobStatus(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def GetServerInfo(self, request, context): """Server health and capabilities """ @@ -275,6 +309,21 @@ def add_NoteFlowServiceServicer_to_server(servicer, server): request_deserializer=noteflow__pb2.ExportTranscriptRequest.FromString, response_serializer=noteflow__pb2.ExportTranscriptResponse.SerializeToString, ), + 'RefineSpeakerDiarization': grpc.unary_unary_rpc_method_handler( + servicer.RefineSpeakerDiarization, + request_deserializer=noteflow__pb2.RefineSpeakerDiarizationRequest.FromString, + response_serializer=noteflow__pb2.RefineSpeakerDiarizationResponse.SerializeToString, + ), + 'RenameSpeaker': grpc.unary_unary_rpc_method_handler( + servicer.RenameSpeaker, + request_deserializer=noteflow__pb2.RenameSpeakerRequest.FromString, + response_serializer=noteflow__pb2.RenameSpeakerResponse.SerializeToString, + ), + 'GetDiarizationJobStatus': grpc.unary_unary_rpc_method_handler( + servicer.GetDiarizationJobStatus, + request_deserializer=noteflow__pb2.GetDiarizationJobStatusRequest.FromString, + response_serializer=noteflow__pb2.DiarizationJobStatus.SerializeToString, + ), 'GetServerInfo': grpc.unary_unary_rpc_method_handler( servicer.GetServerInfo, request_deserializer=noteflow__pb2.ServerInfoRequest.FromString, @@ -646,6 +695,87 @@ class NoteFlowService(object): metadata, _registered_method=True) + @staticmethod + def RefineSpeakerDiarization(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/RefineSpeakerDiarization', + noteflow__pb2.RefineSpeakerDiarizationRequest.SerializeToString, + noteflow__pb2.RefineSpeakerDiarizationResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def RenameSpeaker(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/RenameSpeaker', + noteflow__pb2.RenameSpeakerRequest.SerializeToString, + noteflow__pb2.RenameSpeakerResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def GetDiarizationJobStatus(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/noteflow.NoteFlowService/GetDiarizationJobStatus', + noteflow__pb2.GetDiarizationJobStatusRequest.SerializeToString, + noteflow__pb2.DiarizationJobStatus.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + @staticmethod def GetServerInfo(request, target, diff --git a/src/noteflow/grpc/server.py b/src/noteflow/grpc/server.py index dffda97..14706cb 100644 --- a/src/noteflow/grpc/server.py +++ b/src/noteflow/grpc/server.py @@ -7,15 +7,17 @@ import asyncio import logging import signal import time -from typing import TYPE_CHECKING, Final +from typing import TYPE_CHECKING, Any, Final import grpc.aio +from pydantic import ValidationError from noteflow.application.services import RecoveryService from noteflow.application.services.summarization_service import SummarizationService from noteflow.config.settings import get_settings from noteflow.infrastructure.asr import FasterWhisperEngine from noteflow.infrastructure.asr.engine import VALID_MODEL_SIZES +from noteflow.infrastructure.diarization import DiarizationEngine from noteflow.infrastructure.persistence.database import create_async_session_factory from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork from noteflow.infrastructure.summarization import create_summarization_service @@ -43,6 +45,8 @@ class NoteFlowServer: asr_compute_type: str = "int8", session_factory: async_sessionmaker[AsyncSession] | None = None, summarization_service: SummarizationService | None = None, + diarization_engine: DiarizationEngine | None = None, + diarization_refinement_enabled: bool = True, ) -> None: """Initialize the server. @@ -53,6 +57,8 @@ class NoteFlowServer: asr_compute_type: ASR compute type. session_factory: Optional async session factory for database. summarization_service: Optional summarization service for generating summaries. + diarization_engine: Optional diarization engine for speaker identification. + diarization_refinement_enabled: Whether to allow diarization refinement RPCs. """ self._port = port self._asr_model = asr_model @@ -60,6 +66,8 @@ class NoteFlowServer: self._asr_compute_type = asr_compute_type self._session_factory = session_factory self._summarization_service = summarization_service + self._diarization_engine = diarization_engine + self._diarization_refinement_enabled = diarization_refinement_enabled self._server: grpc.aio.Server | None = None self._servicer: NoteFlowServicer | None = None @@ -90,11 +98,13 @@ class NoteFlowServer: self._summarization_service = create_summarization_service() logger.info("Summarization service initialized (default factory)") - # Create servicer with session factory and summarization service + # Create servicer with session factory, summarization, and diarization self._servicer = NoteFlowServicer( asr_engine=asr_engine, session_factory=self._session_factory, summarization_service=self._summarization_service, + diarization_engine=self._diarization_engine, + diarization_refinement_enabled=self._diarization_refinement_enabled, ) # Create async gRPC server @@ -142,6 +152,13 @@ async def run_server( asr_device: str, asr_compute_type: str, database_url: str | None = None, + diarization_enabled: bool = False, + diarization_hf_token: str | None = None, + diarization_device: str = "auto", + diarization_streaming_latency: float | None = None, + diarization_min_speakers: int | None = None, + diarization_max_speakers: int | None = None, + diarization_refinement_enabled: bool = True, ) -> None: """Run the async gRPC server. @@ -151,6 +168,13 @@ async def run_server( asr_device: Device for ASR. asr_compute_type: ASR compute type. database_url: Optional database URL for persistence. + diarization_enabled: Whether to enable speaker diarization. + diarization_hf_token: HuggingFace token for pyannote models. + diarization_device: Device for diarization ("auto", "cpu", "cuda", "mps"). + diarization_streaming_latency: Streaming diarization latency in seconds. + diarization_min_speakers: Minimum expected speakers for offline diarization. + diarization_max_speakers: Maximum expected speakers for offline diarization. + diarization_refinement_enabled: Whether to allow diarization refinement RPCs. """ # Create session factory if database URL provided session_factory = None @@ -173,6 +197,29 @@ async def run_server( summarization_service = create_summarization_service() logger.info("Summarization service initialized") + # Create diarization engine if enabled + diarization_engine: DiarizationEngine | None = None + if diarization_enabled: + if not diarization_hf_token: + logger.warning( + "Diarization enabled but no HuggingFace token provided. " + "Set NOTEFLOW_DIARIZATION_HF_TOKEN or --diarization-hf-token." + ) + else: + logger.info("Initializing diarization engine on %s...", diarization_device) + diarization_kwargs: dict[str, Any] = { + "device": diarization_device, + "hf_token": diarization_hf_token, + } + if diarization_streaming_latency is not None: + diarization_kwargs["streaming_latency"] = diarization_streaming_latency + if diarization_min_speakers is not None: + diarization_kwargs["min_speakers"] = diarization_min_speakers + if diarization_max_speakers is not None: + diarization_kwargs["max_speakers"] = diarization_max_speakers + diarization_engine = DiarizationEngine(**diarization_kwargs) + logger.info("Diarization engine initialized (models loaded on demand)") + server = NoteFlowServer( port=port, asr_model=asr_model, @@ -180,6 +227,8 @@ async def run_server( asr_compute_type=asr_compute_type, session_factory=session_factory, summarization_service=summarization_service, + diarization_engine=diarization_engine, + diarization_refinement_enabled=diarization_refinement_enabled, ) # Set up graceful shutdown @@ -201,6 +250,10 @@ async def run_server( print("Database: Connected") else: print("Database: Not configured (in-memory mode)") + if diarization_engine: + print(f"Diarization: Enabled ({diarization_device})") + else: + print("Diarization: Disabled") print("Press Ctrl+C to stop\n") # Wait for shutdown signal or server termination @@ -255,6 +308,24 @@ def main() -> None: action="store_true", help="Enable verbose logging", ) + parser.add_argument( + "--diarization", + action="store_true", + help="Enable speaker diarization (requires pyannote.audio)", + ) + parser.add_argument( + "--diarization-hf-token", + type=str, + default=None, + help="HuggingFace token for pyannote models (overrides NOTEFLOW_DIARIZATION_HF_TOKEN)", + ) + parser.add_argument( + "--diarization-device", + type=str, + default="auto", + choices=["auto", "cpu", "cuda", "mps"], + help="Device for diarization (default: auto)", + ) args = parser.parse_args() # Configure logging @@ -264,14 +335,39 @@ def main() -> None: format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) + # Get settings + try: + settings = get_settings() + except (OSError, ValueError, ValidationError) as exc: + logger.warning("Failed to load settings: %s", exc) + settings = None + # Get database URL from args or settings database_url = args.database_url + if not database_url and settings: + database_url = str(settings.database_url) if not database_url: - try: - settings = get_settings() - database_url = str(settings.database_url) - except Exception: - logger.warning("No database URL configured, running in-memory mode") + logger.warning("No database URL configured, running in-memory mode") + + # Get diarization config from args or settings + diarization_enabled = args.diarization + diarization_hf_token = args.diarization_hf_token + diarization_device = args.diarization_device + diarization_streaming_latency: float | None = None + diarization_min_speakers: int | None = None + diarization_max_speakers: int | None = None + diarization_refinement_enabled = True + if settings and not diarization_enabled: + diarization_enabled = settings.diarization_enabled + if settings and not diarization_hf_token: + diarization_hf_token = settings.diarization_hf_token + if settings and diarization_device == "auto": + diarization_device = settings.diarization_device + if settings: + diarization_streaming_latency = settings.diarization_streaming_latency + diarization_min_speakers = settings.diarization_min_speakers + diarization_max_speakers = settings.diarization_max_speakers + diarization_refinement_enabled = settings.diarization_refinement_enabled # Run server asyncio.run( @@ -281,6 +377,13 @@ def main() -> None: asr_device=args.device, asr_compute_type=args.compute_type, database_url=database_url, + diarization_enabled=diarization_enabled, + diarization_hf_token=diarization_hf_token, + diarization_device=diarization_device, + diarization_streaming_latency=diarization_streaming_latency, + diarization_min_speakers=diarization_min_speakers, + diarization_max_speakers=diarization_max_speakers, + diarization_refinement_enabled=diarization_refinement_enabled, ) ) diff --git a/src/noteflow/grpc/service.py b/src/noteflow/grpc/service.py index 72c9b53..4a54bcc 100644 --- a/src/noteflow/grpc/service.py +++ b/src/noteflow/grpc/service.py @@ -3,64 +3,58 @@ from __future__ import annotations import logging -import struct import time -from collections.abc import AsyncIterator -from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, ClassVar, Final -from uuid import UUID import grpc.aio import numpy as np -from numpy.typing import NDArray -from noteflow.application.services.export_service import ExportFormat, ExportService -from noteflow.application.services.summarization_service import SummarizationService -from noteflow.domain.entities import Annotation, Meeting, Segment, Summary -from noteflow.domain.summarization import ProviderUnavailableError -from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState +from noteflow.config.constants import DEFAULT_SAMPLE_RATE as _DEFAULT_SAMPLE_RATE +from noteflow.domain.entities import Meeting +from noteflow.domain.value_objects import MeetingState from noteflow.infrastructure.asr import Segmenter, SegmenterConfig, StreamingVad -from noteflow.infrastructure.audio.reader import MeetingAudioReader from noteflow.infrastructure.audio.writer import MeetingAudioWriter -from noteflow.infrastructure.converters import AsrConverter from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork from noteflow.infrastructure.security.crypto import AesGcmCryptoBox from noteflow.infrastructure.security.keystore import KeyringKeyStore +from ._mixins import ( + AnnotationMixin, + DiarizationMixin, + ExportMixin, + MeetingMixin, + StreamingMixin, + SummarizationMixin, +) from .meeting_store import MeetingStore from .proto import noteflow_pb2, noteflow_pb2_grpc if TYPE_CHECKING: + from numpy.typing import NDArray from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + from noteflow.application.services.summarization_service import SummarizationService from noteflow.infrastructure.asr import FasterWhisperEngine - from noteflow.infrastructure.asr.dto import AsrResult - from noteflow.infrastructure.diarization import DiarizationEngine + from noteflow.infrastructure.diarization import DiarizationEngine, SpeakerTurn logger = logging.getLogger(__name__) -@dataclass -class _StreamSessionInit: - """Result of stream session initialization.""" - - next_segment_id: int - error_code: int | None = None - error_message: str | None = None - - @property - def success(self) -> bool: - """Check if initialization succeeded.""" - return self.error_code is None - - -class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): +class NoteFlowServicer( + StreamingMixin, + DiarizationMixin, + MeetingMixin, + SummarizationMixin, + AnnotationMixin, + ExportMixin, + noteflow_pb2_grpc.NoteFlowServiceServicer, +): """Async gRPC service implementation for NoteFlow with PostgreSQL persistence.""" VERSION: Final[str] = "0.2.0" MAX_CHUNK_SIZE: Final[int] = 1024 * 1024 # 1MB - DEFAULT_SAMPLE_RATE: Final[int] = 16000 + DEFAULT_SAMPLE_RATE: Final[int] = _DEFAULT_SAMPLE_RATE SUPPORTED_SAMPLE_RATES: ClassVar[list[int]] = [16000, 44100, 48000] PARTIAL_CADENCE_SECONDS: Final[float] = 2.0 # Emit partials every 2 seconds MIN_PARTIAL_AUDIO_SECONDS: Final[float] = 0.5 # Minimum audio for partial inference @@ -72,6 +66,7 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): meetings_dir: Path | None = None, summarization_service: SummarizationService | None = None, diarization_engine: DiarizationEngine | None = None, + diarization_refinement_enabled: bool = True, ) -> None: """Initialize the service. @@ -83,11 +78,13 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): Defaults to ~/.noteflow/meetings. summarization_service: Optional summarization service for generating summaries. diarization_engine: Optional diarization engine for speaker identification. + diarization_refinement_enabled: Whether to allow post-meeting diarization refinement. """ self._asr_engine = asr_engine self._session_factory = session_factory self._summarization_service = summarization_service self._diarization_engine = diarization_engine + self._diarization_refinement_enabled = diarization_refinement_enabled self._start_time = time.time() # Fallback to in-memory store if no database configured self._memory_store: MeetingStore | None = ( @@ -113,6 +110,17 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): self._last_partial_time: dict[str, float] = {} self._last_partial_text: dict[str, str] = {} + # Streaming diarization state per meeting + self._diarization_turns: dict[str, list[SpeakerTurn]] = {} + self._diarization_stream_time: dict[str, float] = {} + self._diarization_streaming_failed: set[str] = set() + + # Track audio write failures to avoid log spam + self._audio_write_failed: set[str] = set() + + # Background diarization jobs + self._diarization_jobs: dict[str, object] = {} + @property def asr_engine(self) -> FasterWhisperEngine | None: """Get the ASR engine.""" @@ -158,6 +166,11 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): self._partial_buffers[meeting_id] = [] self._last_partial_time[meeting_id] = time.time() self._last_partial_text[meeting_id] = "" + self._diarization_turns[meeting_id] = [] + self._diarization_stream_time[meeting_id] = 0.0 + self._diarization_streaming_failed.discard(meeting_id) + if self._diarization_engine is not None: + self._diarization_engine.reset_streaming() def _cleanup_streaming_state(self, meeting_id: str) -> None: """Clean up VAD, Segmenter, speaking state, and partial buffers for a meeting.""" @@ -169,6 +182,9 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): self._partial_buffers.pop(meeting_id, None) self._last_partial_time.pop(meeting_id, None) self._last_partial_text.pop(meeting_id, None) + self._diarization_turns.pop(meeting_id, None) + self._diarization_stream_time.pop(meeting_id, None) + self._diarization_streaming_failed.discard(meeting_id) def _ensure_meeting_dek(self, meeting: Meeting) -> tuple[bytes, bytes, bool]: """Ensure meeting has a DEK, generating one if needed. @@ -228,152 +244,11 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): self._audio_writers[meeting_id] = writer logger.info("Audio writer opened for meeting %s", meeting_id) - async def _init_stream_session_db(self, meeting_id: str) -> _StreamSessionInit: - """Initialize stream session using database persistence. - - Args: - meeting_id: Meeting ID string. - - Returns: - Stream session initialization result. - """ - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) - if meeting is None: - return _StreamSessionInit( - next_segment_id=0, - error_code=grpc.StatusCode.NOT_FOUND, - error_message=f"Meeting {meeting_id} not found", - ) - - dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) - recording_updated, error_msg = self._start_meeting_if_needed(meeting) - - if error_msg: - return _StreamSessionInit( - next_segment_id=0, - error_code=grpc.StatusCode.INVALID_ARGUMENT, - error_message=error_msg, - ) - - if dek_updated or recording_updated: - await uow.meetings.update(meeting) - await uow.commit() - - next_segment_id = await uow.segments.get_next_segment_id(meeting.id) - self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) - self._init_streaming_state(meeting_id, next_segment_id) - - return _StreamSessionInit(next_segment_id=next_segment_id) - - def _init_stream_session_memory(self, meeting_id: str) -> _StreamSessionInit: - """Initialize stream session using in-memory store. - - Args: - meeting_id: Meeting ID string. - - Returns: - Stream session initialization result. - """ - store = self._get_memory_store() - meeting = store.get(meeting_id) - if meeting is None: - return _StreamSessionInit( - next_segment_id=0, - error_code=grpc.StatusCode.NOT_FOUND, - error_message=f"Meeting {meeting_id} not found", - ) - - dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) - recording_updated, error_msg = self._start_meeting_if_needed(meeting) - - if error_msg: - return _StreamSessionInit( - next_segment_id=0, - error_code=grpc.StatusCode.INVALID_ARGUMENT, - error_message=error_msg, - ) - - if dek_updated or recording_updated: - store.update(meeting) - - next_segment_id = meeting.next_segment_id - self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) - self._init_streaming_state(meeting_id, next_segment_id) - - return _StreamSessionInit(next_segment_id=next_segment_id) - - def _next_segment_id(self, meeting_id: str, fallback: int = 0) -> int: - """Get and increment the next segment id for a meeting.""" - next_id = self._segment_counters.get(meeting_id) - if next_id is None: - next_id = fallback - self._segment_counters[meeting_id] = next_id + 1 - return next_id - - def _normalize_stream_format( - self, - meeting_id: str, - sample_rate: int, - channels: int, - ) -> tuple[int, int]: - """Validate and persist stream audio format for a meeting.""" - normalized_rate = sample_rate or self.DEFAULT_SAMPLE_RATE - normalized_channels = channels or 1 - - if normalized_rate not in self.SUPPORTED_SAMPLE_RATES: - raise ValueError( - "Unsupported sample_rate " - f"{normalized_rate}; supported: {self.SUPPORTED_SAMPLE_RATES}" - ) - if normalized_channels < 1: - raise ValueError("channels must be >= 1") - - existing = self._stream_formats.get(meeting_id) - if existing and existing != (normalized_rate, normalized_channels): - raise ValueError("Stream audio format cannot change mid-stream") - - self._stream_formats.setdefault(meeting_id, (normalized_rate, normalized_channels)) - return normalized_rate, normalized_channels - - def _convert_audio_format( - self, - audio: NDArray[np.float32], - sample_rate: int, - channels: int, - ) -> NDArray[np.float32]: - """Downmix/resample audio to the server's expected format.""" - if channels > 1: - if audio.size % channels != 0: - raise ValueError("Audio buffer size is not divisible by channel count") - audio = audio.reshape(-1, channels).mean(axis=1) - - if sample_rate != self.DEFAULT_SAMPLE_RATE: - audio = self._resample_audio(audio, sample_rate, self.DEFAULT_SAMPLE_RATE) - - return audio - - @staticmethod - def _resample_audio( - audio: NDArray[np.float32], - src_rate: int, - dst_rate: int, - ) -> NDArray[np.float32]: - """Resample audio using linear interpolation.""" - if src_rate == dst_rate or audio.size == 0: - return audio - - ratio = dst_rate / src_rate - new_length = round(audio.shape[0] * ratio) - if new_length <= 0: - return np.array([], dtype=np.float32) - - old_indices = np.arange(audio.shape[0]) - new_indices = np.arange(new_length) / ratio - return np.interp(new_indices, old_indices, audio).astype(np.float32) - def _close_audio_writer(self, meeting_id: str) -> None: """Close and remove the audio writer for a meeting.""" + # Clean up write failure tracking + self._audio_write_failed.discard(meeting_id) + if meeting_id not in self._audio_writers: return @@ -392,6 +267,14 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): e, ) + def _next_segment_id(self, meeting_id: str, fallback: int = 0) -> int: + """Get and increment the next segment id for a meeting.""" + next_id = self._segment_counters.get(meeting_id) + if next_id is None: + next_id = fallback + self._segment_counters[meeting_id] = next_id + 1 + return next_id + async def _count_active_meetings_db(self) -> int: """Count active meetings using database state.""" async with self._create_uow() as uow: @@ -400,832 +283,6 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): total += await uow.meetings.count_by_state(state) return total - async def StreamTranscription( - self, - request_iterator: AsyncIterator[noteflow_pb2.AudioChunk], - context: grpc.aio.ServicerContext, - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Handle bidirectional audio streaming with persistence. - - Receives audio chunks from client, processes through ASR, - persists segments, and yields transcript updates. - """ - if self._asr_engine is None or not self._asr_engine.is_loaded: - await context.abort( - grpc.StatusCode.FAILED_PRECONDITION, - "ASR engine not loaded", - ) - - current_meeting_id: str | None = None - - try: - async for chunk in request_iterator: - meeting_id = chunk.meeting_id - if not meeting_id: - await context.abort( - grpc.StatusCode.INVALID_ARGUMENT, - "meeting_id required", - ) - - # Initialize stream on first chunk - if current_meeting_id is None: - init_result = await self._init_stream_for_meeting(meeting_id, context) - if init_result is None: - return # Error already sent via context.abort - current_meeting_id = meeting_id - elif meeting_id != current_meeting_id: - await context.abort( - grpc.StatusCode.INVALID_ARGUMENT, - "Stream may only contain a single meeting_id", - ) - - # Process audio chunk - async for update in self._process_stream_chunk(current_meeting_id, chunk, context): - yield update - - # Flush any remaining audio from segmenter - if current_meeting_id and current_meeting_id in self._segmenters: - async for update in self._flush_segmenter(current_meeting_id): - yield update - finally: - if current_meeting_id: - self._cleanup_streaming_state(current_meeting_id) - self._close_audio_writer(current_meeting_id) - self._active_streams.discard(current_meeting_id) - - async def _init_stream_for_meeting( - self, - meeting_id: str, - context: grpc.aio.ServicerContext, - ) -> _StreamSessionInit | None: - """Initialize streaming for a meeting. - - Args: - meeting_id: Meeting ID string. - context: gRPC context for error handling. - - Returns: - Initialization result, or None if error was sent. - """ - if meeting_id in self._active_streams: - await context.abort( - grpc.StatusCode.FAILED_PRECONDITION, - f"Meeting {meeting_id} already streaming", - ) - - self._active_streams.add(meeting_id) - - if self._use_database(): - init_result = await self._init_stream_session_db(meeting_id) - else: - init_result = self._init_stream_session_memory(meeting_id) - - if not init_result.success: - self._active_streams.discard(meeting_id) - await context.abort(init_result.error_code, init_result.error_message or "") - - return init_result - - async def _process_stream_chunk( - self, - meeting_id: str, - chunk: noteflow_pb2.AudioChunk, - context: grpc.aio.ServicerContext, - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Process a single audio chunk from the stream. - - Args: - meeting_id: Meeting ID string. - chunk: Audio chunk from client. - context: gRPC context for error handling. - - Yields: - Transcript updates from processing. - """ - try: - sample_rate, channels = self._normalize_stream_format( - meeting_id, - chunk.sample_rate, - chunk.channels, - ) - except ValueError as e: - await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) - - audio = self._decode_audio_chunk(chunk) - if audio is None: - return - - try: - audio = self._convert_audio_format(audio, sample_rate, channels) - except ValueError as e: - await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) - - # Write to encrypted audio file - self._write_audio_chunk_safe(meeting_id, audio) - - # VAD-driven segmentation - async for update in self._process_audio_with_vad(meeting_id, audio): - yield update - - def _write_audio_chunk_safe( - self, - meeting_id: str, - audio: NDArray[np.float32], - ) -> None: - """Write audio chunk to encrypted file, logging errors without raising. - - Args: - meeting_id: Meeting ID string. - audio: Audio samples to write. - """ - if meeting_id not in self._audio_writers: - return - try: - self._audio_writers[meeting_id].write_chunk(audio) - except Exception as e: - logger.error("Failed to write audio chunk: %s", e) - - def _decode_audio_chunk( - self, - chunk: noteflow_pb2.AudioChunk, - ) -> NDArray[np.float32] | None: - """Decode audio chunk from protobuf to numpy array.""" - if not chunk.audio_data: - return None - try: - return np.frombuffer(chunk.audio_data, dtype=np.float32) - except (ValueError, struct.error) as e: - logger.warning("Failed to decode audio chunk: %s", e) - return None - - async def _process_audio_with_vad( - self, - meeting_id: str, - audio: NDArray[np.float32], - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Process audio chunk through VAD and Segmenter. - - Args: - meeting_id: Meeting identifier. - audio: Audio samples (float32, mono). - - Yields: - TranscriptUpdates for VAD events, partials, and finals. - """ - vad = self._vad_instances.get(meeting_id) - segmenter = self._segmenters.get(meeting_id) - - if vad is None or segmenter is None: - return - - # Get VAD decision - is_speech = vad.process_chunk(audio) - - # Emit VAD state change events - was_speaking = self._was_speaking.get(meeting_id, False) - if is_speech and not was_speaking: - # Speech started - yield self._create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_START) - self._was_speaking[meeting_id] = True - elif not is_speech and was_speaking: - # Speech ended - yield self._create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_END) - self._was_speaking[meeting_id] = False - - # Buffer audio for partial transcription - if is_speech: - if meeting_id in self._partial_buffers: - self._partial_buffers[meeting_id].append(audio.copy()) - - # Check if we should emit a partial - partial_update = await self._maybe_emit_partial(meeting_id) - if partial_update is not None: - yield partial_update - - # Process through segmenter - for audio_segment in segmenter.process_audio(audio, is_speech): - # Clear partial buffer when we get a final segment - self._clear_partial_buffer(meeting_id) - async for update in self._process_audio_segment(meeting_id, audio_segment.audio): - yield update - - async def _maybe_emit_partial( - self, - meeting_id: str, - ) -> noteflow_pb2.TranscriptUpdate | None: - """Check if it's time to emit a partial and generate if so. - - Args: - meeting_id: Meeting identifier. - - Returns: - TranscriptUpdate with partial text, or None if not time yet. - """ - if self._asr_engine is None or not self._asr_engine.is_loaded: - return None - - last_time = self._last_partial_time.get(meeting_id, 0) - now = time.time() - - # Check if enough time has passed since last partial - if now - last_time < self.PARTIAL_CADENCE_SECONDS: - return None - - # Check if we have enough audio - buffer = self._partial_buffers.get(meeting_id, []) - if not buffer: - return None - - # Concatenate buffered audio - combined = np.concatenate(buffer) - audio_seconds = len(combined) / self.DEFAULT_SAMPLE_RATE - - if audio_seconds < self.MIN_PARTIAL_AUDIO_SECONDS: - return None - - # Run inference on buffered audio - partial_text = " ".join(result.text for result in self._asr_engine.transcribe(combined)) - - # Clear buffer after inference to keep partials incremental and bounded - self._partial_buffers[meeting_id] = [] - - # Only emit if text changed (debounce) - last_text = self._last_partial_text.get(meeting_id, "") - if partial_text and partial_text != last_text: - self._last_partial_time[meeting_id] = now - self._last_partial_text[meeting_id] = partial_text - return noteflow_pb2.TranscriptUpdate( - meeting_id=meeting_id, - update_type=noteflow_pb2.UPDATE_TYPE_PARTIAL, - partial_text=partial_text, - server_timestamp=now, - ) - - self._last_partial_time[meeting_id] = now - return None - - def _clear_partial_buffer(self, meeting_id: str) -> None: - """Clear the partial buffer and reset state after a final is emitted. - - Args: - meeting_id: Meeting identifier. - """ - if meeting_id in self._partial_buffers: - self._partial_buffers[meeting_id] = [] - if meeting_id in self._last_partial_text: - self._last_partial_text[meeting_id] = "" - if meeting_id in self._last_partial_time: - self._last_partial_time[meeting_id] = time.time() - - async def _flush_segmenter( - self, - meeting_id: str, - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Flush remaining audio from segmenter at stream end. - - Args: - meeting_id: Meeting identifier. - - Yields: - TranscriptUpdates for final segment. - """ - segmenter = self._segmenters.get(meeting_id) - if segmenter is None: - return - - # Clear partial buffer since we're flushing to final - self._clear_partial_buffer(meeting_id) - - final_segment = segmenter.flush() - if final_segment is not None: - async for update in self._process_audio_segment(meeting_id, final_segment.audio): - yield update - - async def _process_audio_segment( - self, - meeting_id: str, - audio: NDArray[np.float32], - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Process a complete audio segment through ASR. - - Args: - meeting_id: Meeting identifier. - audio: Complete audio segment. - - Yields: - TranscriptUpdates for transcribed segments. - """ - if len(audio) == 0 or self._asr_engine is None: - return - - if self._use_database(): - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) - if meeting is None: - return - - for result in self._asr_engine.transcribe(audio): - segment_id = self._next_segment_id( - meeting_id, - fallback=meeting.next_segment_id, - ) - segment = self._create_segment_from_asr(meeting.id, segment_id, result) - meeting.add_segment(segment) - await uow.segments.add(meeting.id, segment) - await uow.commit() - yield self._segment_to_proto_update(meeting_id, segment) - else: - store = self._get_memory_store() - meeting = store.get(meeting_id) - if meeting is None: - return - for result in self._asr_engine.transcribe(audio): - segment_id = self._next_segment_id( - meeting_id, - fallback=meeting.next_segment_id, - ) - segment = self._create_segment_from_asr(meeting.id, segment_id, result) - store.add_segment(meeting_id, segment) - yield self._segment_to_proto_update(meeting_id, segment) - - def _create_vad_update( - self, - meeting_id: str, - update_type: int, - ) -> noteflow_pb2.TranscriptUpdate: - """Create a VAD event update. - - Args: - meeting_id: Meeting identifier. - update_type: VAD_START or VAD_END. - - Returns: - TranscriptUpdate with VAD event. - """ - return noteflow_pb2.TranscriptUpdate( - meeting_id=meeting_id, - update_type=update_type, - server_timestamp=time.time(), - ) - - def _create_segment_from_asr( - self, - meeting_id: MeetingId, - segment_id: int, - result: AsrResult, - ) -> Segment: - """Create a Segment from ASR result. - - Use converters to transform ASR DTO to domain entities. - """ - words = AsrConverter.result_to_domain_words(result) - - return Segment( - segment_id=segment_id, - text=result.text, - start_time=result.start, - end_time=result.end, - meeting_id=meeting_id, - words=words, - language=result.language, - language_confidence=result.language_probability, - avg_logprob=result.avg_logprob, - no_speech_prob=result.no_speech_prob, - ) - - def _segment_to_proto_update( - self, - meeting_id: str, - segment: Segment, - ) -> noteflow_pb2.TranscriptUpdate: - """Convert domain Segment to protobuf TranscriptUpdate.""" - words = [ - noteflow_pb2.WordTiming( - word=w.word, - start_time=w.start_time, - end_time=w.end_time, - probability=w.probability, - ) - for w in segment.words - ] - final_segment = noteflow_pb2.FinalSegment( - segment_id=segment.segment_id, - text=segment.text, - start_time=segment.start_time, - end_time=segment.end_time, - words=words, - language=segment.language, - language_confidence=segment.language_confidence, - avg_logprob=segment.avg_logprob, - no_speech_prob=segment.no_speech_prob, - speaker_id=segment.speaker_id or "", - speaker_confidence=segment.speaker_confidence, - ) - return noteflow_pb2.TranscriptUpdate( - meeting_id=meeting_id, - update_type=noteflow_pb2.UPDATE_TYPE_FINAL, - segment=final_segment, - server_timestamp=time.time(), - ) - - async def CreateMeeting( - self, - request: noteflow_pb2.CreateMeetingRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Meeting: - """Create a new meeting.""" - metadata = dict(request.metadata) if request.metadata else {} - - if self._use_database(): - async with self._create_uow() as uow: - meeting = Meeting.create(title=request.title, metadata=metadata) - saved = await uow.meetings.create(meeting) - await uow.commit() - return self._meeting_to_proto(saved) - else: - store = self._get_memory_store() - meeting = store.create(title=request.title, metadata=metadata) - return self._meeting_to_proto(meeting) - - async def StopMeeting( - self, - request: noteflow_pb2.StopMeetingRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Meeting: - """Stop a meeting using graceful STOPPING -> STOPPED transition.""" - meeting_id = request.meeting_id - - # Close audio writer if open - if meeting_id in self._audio_writers: - self._close_audio_writer(meeting_id) - - if self._use_database(): - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {meeting_id} not found", - ) - try: - # Graceful shutdown: RECORDING -> STOPPING -> STOPPED - meeting.begin_stopping() - meeting.stop_recording() - except ValueError as e: - await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) - await uow.meetings.update(meeting) - await uow.commit() - return self._meeting_to_proto(meeting) - store = self._get_memory_store() - meeting = store.get(meeting_id) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {meeting_id} not found", - ) - try: - # Graceful shutdown: RECORDING -> STOPPING -> STOPPED - meeting.begin_stopping() - meeting.stop_recording() - except ValueError as e: - await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) - store.update(meeting) - return self._meeting_to_proto(meeting) - - async def refine_speaker_diarization( - self, - meeting_id: str, - num_speakers: int | None = None, - ) -> int: - """Run post-meeting speaker diarization refinement. - - Loads the full meeting audio, runs offline diarization, and updates - segment speaker assignments. This provides higher quality speaker - labels than streaming diarization. - - Args: - meeting_id: Meeting UUID string. - num_speakers: Known number of speakers (None for auto-detect). - - Returns: - Number of segments updated with speaker labels. - - Raises: - RuntimeError: If diarization engine not available or meeting not found. - """ - if self._diarization_engine is None: - raise RuntimeError("Diarization engine not configured") - - if not self._diarization_engine.is_offline_loaded: - logger.info("Loading offline diarization model for refinement...") - self._diarization_engine.load_offline_model() - - # Load meeting audio - audio_reader = MeetingAudioReader(self._crypto, self._meetings_dir) - - if not audio_reader.audio_exists(meeting_id): - logger.warning("No audio file found for meeting %s", meeting_id) - return 0 - - logger.info("Loading audio for meeting %s", meeting_id) - try: - audio_chunks = audio_reader.load_meeting_audio(meeting_id) - except (FileNotFoundError, ValueError) as exc: - logger.warning("Failed to load audio for meeting %s: %s", meeting_id, exc) - return 0 - - if not audio_chunks: - logger.warning("No audio chunks loaded for meeting %s", meeting_id) - return 0 - - sample_rate = audio_reader.sample_rate - - # Concatenate all audio chunks - all_audio = np.concatenate([chunk.frames for chunk in audio_chunks]) - - logger.info( - "Running offline diarization on %.2f seconds of audio", - len(all_audio) / sample_rate, - ) - - # Run full diarization - from noteflow.infrastructure.diarization import assign_speaker - - turns = self._diarization_engine.diarize_full( - all_audio, - sample_rate=sample_rate, - num_speakers=num_speakers, - ) - - logger.info("Diarization found %d speaker turns", len(turns)) - - # Update segments with speaker assignments - updated_count = 0 - - if self._use_database(): - async with self._create_uow() as uow: - segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) - - for segment in segments: - if segment.db_id is None: - continue - - speaker_id, confidence = assign_speaker( - segment.start_time, - segment.end_time, - turns, - ) - - if speaker_id is not None: - await uow.segments.update_speaker( - segment.db_id, - speaker_id, - confidence, - ) - updated_count += 1 - - await uow.commit() - else: - store = self._get_memory_store() - if meeting := store.get(meeting_id): - for segment in meeting.segments: - speaker_id, confidence = assign_speaker( - segment.start_time, - segment.end_time, - turns, - ) - if speaker_id is not None: - segment.speaker_id = speaker_id - segment.speaker_confidence = confidence - updated_count += 1 - - logger.info( - "Updated %d segments with speaker labels for meeting %s", - updated_count, - meeting_id, - ) - - return updated_count - - async def ListMeetings( - self, - request: noteflow_pb2.ListMeetingsRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.ListMeetingsResponse: - """List meetings.""" - limit = request.limit or 100 - offset = request.offset or 0 - sort_desc = request.sort_order != noteflow_pb2.SORT_ORDER_CREATED_ASC - - if self._use_database(): - states = [MeetingState(s) for s in request.states] if request.states else None - async with self._create_uow() as uow: - meetings, total = await uow.meetings.list_all( - states=states, - limit=limit, - offset=offset, - sort_desc=sort_desc, - ) - return noteflow_pb2.ListMeetingsResponse( - meetings=[self._meeting_to_proto(m, include_segments=False) for m in meetings], - total_count=total, - ) - else: - store = self._get_memory_store() - states = [MeetingState(s) for s in request.states] if request.states else None - meetings, total = store.list_all( - states=states, - limit=limit, - offset=offset, - sort_desc=sort_desc, - ) - return noteflow_pb2.ListMeetingsResponse( - meetings=[self._meeting_to_proto(m, include_segments=False) for m in meetings], - total_count=total, - ) - - async def GetMeeting( - self, - request: noteflow_pb2.GetMeetingRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Meeting: - """Get meeting details.""" - if self._use_database(): - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(UUID(request.meeting_id))) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - # Load segments if requested - if request.include_segments: - segments = await uow.segments.get_by_meeting(meeting.id) - meeting.segments = list(segments) - # Load summary if requested - if request.include_summary: - summary = await uow.summaries.get_by_meeting(meeting.id) - meeting.summary = summary - return self._meeting_to_proto( - meeting, - include_segments=request.include_segments, - include_summary=request.include_summary, - ) - store = self._get_memory_store() - meeting = store.get(request.meeting_id) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - return self._meeting_to_proto( - meeting, - include_segments=request.include_segments, - include_summary=request.include_summary, - ) - - async def DeleteMeeting( - self, - request: noteflow_pb2.DeleteMeetingRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.DeleteMeetingResponse: - """Delete a meeting.""" - if self._use_database(): - async with self._create_uow() as uow: - success = await uow.meetings.delete(MeetingId(UUID(request.meeting_id))) - if success: - await uow.commit() - return noteflow_pb2.DeleteMeetingResponse(success=True) - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - store = self._get_memory_store() - success = store.delete(request.meeting_id) - if not success: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - return noteflow_pb2.DeleteMeetingResponse(success=True) - - async def GenerateSummary( - self, - request: noteflow_pb2.GenerateSummaryRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Summary: - """Generate meeting summary using SummarizationService with fallback.""" - if self._use_database(): - return await self._generate_summary_db(request, context) - - return await self._generate_summary_memory(request, context) - - async def _generate_summary_db( - self, - request: noteflow_pb2.GenerateSummaryRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Summary: - """Generate summary for a meeting stored in the database. - - The potentially slow summarization step is executed outside the UoW to - avoid holding database connections while waiting on LLMs. - """ - - meeting_id = MeetingId(UUID(request.meeting_id)) - - # 1) Load meeting, existing summary, and segments inside a short UoW - async with self._create_uow() as uow: - meeting = await uow.meetings.get(meeting_id) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - - existing = await uow.summaries.get_by_meeting(meeting.id) - if existing and not request.force_regenerate: - return self._summary_to_proto(existing) - - segments = list(await uow.segments.get_by_meeting(meeting.id)) - - # 2) Run summarization outside DB transaction - summary = await self._summarize_or_placeholder(meeting_id, segments) - - # 3) Persist in a fresh UoW - async with self._create_uow() as uow: - saved = await uow.summaries.save(summary) - await uow.commit() - - return self._summary_to_proto(saved) - - async def _generate_summary_memory( - self, - request: noteflow_pb2.GenerateSummaryRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Summary: - """Generate summary for meetings held in the in-memory store.""" - - store = self._get_memory_store() - meeting = store.get(request.meeting_id) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - - if meeting.summary and not request.force_regenerate: - return self._summary_to_proto(meeting.summary) - - summary = await self._summarize_or_placeholder(meeting.id, meeting.segments) - store.set_summary(request.meeting_id, summary) - return self._summary_to_proto(summary) - - async def _summarize_or_placeholder( - self, - meeting_id: MeetingId, - segments: list[Segment], - ) -> Summary: - """Try to summarize via service, fallback to placeholder on failure.""" - if self._summarization_service is None: - logger.warning("SummarizationService not configured; using placeholder summary") - return self._generate_placeholder_summary(meeting_id, segments) - - try: - result = await self._summarization_service.summarize( - meeting_id=meeting_id, - segments=segments, - ) - logger.info( - "Generated summary using %s (fallback=%s)", - result.provider_used, - result.fallback_used, - ) - return result.summary - except ProviderUnavailableError as exc: - logger.warning("Summarization provider unavailable; using placeholder: %s", exc) - except Exception: - logger.exception("Summarization failed; using placeholder summary") - - return self._generate_placeholder_summary(meeting_id, segments) - - def _generate_placeholder_summary( - self, - meeting_id: MeetingId, - segments: list[Segment], - ) -> Summary: - """Generate a lightweight placeholder summary when summarization fails.""" - full_text = " ".join(s.text for s in segments) - executive = f"{full_text[:200]}..." if len(full_text) > 200 else full_text - executive = executive or "No transcript available." - - return Summary( - meeting_id=meeting_id, - executive_summary=executive, - key_points=[], - action_items=[], - model_version="placeholder-v0", - ) - async def GetServerInfo( self, request: noteflow_pb2.ServerInfoRequest, @@ -1260,323 +317,3 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): diarization_enabled=diarization_enabled, diarization_ready=diarization_ready, ) - - def _meeting_to_proto( - self, - meeting: Meeting, - include_segments: bool = True, - include_summary: bool = True, - ) -> noteflow_pb2.Meeting: - """Convert domain Meeting to protobuf.""" - segments = [] - if include_segments: - for seg in meeting.segments: - words = [ - noteflow_pb2.WordTiming( - word=w.word, - start_time=w.start_time, - end_time=w.end_time, - probability=w.probability, - ) - for w in seg.words - ] - segments.append( - noteflow_pb2.FinalSegment( - segment_id=seg.segment_id, - text=seg.text, - start_time=seg.start_time, - end_time=seg.end_time, - words=words, - language=seg.language, - language_confidence=seg.language_confidence, - avg_logprob=seg.avg_logprob, - no_speech_prob=seg.no_speech_prob, - speaker_id=seg.speaker_id or "", - speaker_confidence=seg.speaker_confidence, - ) - ) - - summary = None - if include_summary and meeting.summary: - summary = self._summary_to_proto(meeting.summary) - - return noteflow_pb2.Meeting( - id=str(meeting.id), - title=meeting.title, - state=meeting.state.value, - created_at=meeting.created_at.timestamp(), - started_at=meeting.started_at.timestamp() if meeting.started_at else 0, - ended_at=meeting.ended_at.timestamp() if meeting.ended_at else 0, - duration_seconds=meeting.duration_seconds, - segments=segments, - summary=summary, - metadata=meeting.metadata, - ) - - def _summary_to_proto(self, summary: Summary) -> noteflow_pb2.Summary: - """Convert domain Summary to protobuf.""" - key_points = [ - noteflow_pb2.KeyPoint( - text=kp.text, - segment_ids=kp.segment_ids, - start_time=kp.start_time, - end_time=kp.end_time, - ) - for kp in summary.key_points - ] - action_items = [ - noteflow_pb2.ActionItem( - text=ai.text, - assignee=ai.assignee, - due_date=ai.due_date.timestamp() if ai.due_date is not None else 0, - priority=ai.priority, - segment_ids=ai.segment_ids, - ) - for ai in summary.action_items - ] - return noteflow_pb2.Summary( - meeting_id=str(summary.meeting_id), - executive_summary=summary.executive_summary, - key_points=key_points, - action_items=action_items, - generated_at=( - summary.generated_at.timestamp() if summary.generated_at is not None else 0 - ), - model_version=summary.model_version, - ) - - # ========================================================================= - # Annotation Methods - # ========================================================================= - - async def AddAnnotation( - self, - request: noteflow_pb2.AddAnnotationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Annotation: - """Add an annotation to a meeting.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - annotation_type = self._proto_to_annotation_type(request.annotation_type) - from uuid import uuid4 - - annotation = Annotation( - id=AnnotationId(uuid4()), - meeting_id=MeetingId(UUID(request.meeting_id)), - annotation_type=annotation_type, - text=request.text, - start_time=request.start_time, - end_time=request.end_time, - segment_ids=list(request.segment_ids), - ) - - async with self._create_uow() as uow: - saved = await uow.annotations.add(annotation) - await uow.commit() - return self._annotation_to_proto(saved) - - async def GetAnnotation( - self, - request: noteflow_pb2.GetAnnotationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Annotation: - """Get an annotation by ID.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - async with self._create_uow() as uow: - annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) - if annotation is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Annotation {request.annotation_id} not found", - ) - return self._annotation_to_proto(annotation) - - async def ListAnnotations( - self, - request: noteflow_pb2.ListAnnotationsRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.ListAnnotationsResponse: - """List annotations for a meeting.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - async with self._create_uow() as uow: - meeting_id = MeetingId(UUID(request.meeting_id)) - # Check if time range filter is specified - if request.start_time > 0 or request.end_time > 0: - annotations = await uow.annotations.get_by_time_range( - meeting_id, - request.start_time, - request.end_time, - ) - else: - annotations = await uow.annotations.get_by_meeting(meeting_id) - - return noteflow_pb2.ListAnnotationsResponse( - annotations=[self._annotation_to_proto(a) for a in annotations] - ) - - async def UpdateAnnotation( - self, - request: noteflow_pb2.UpdateAnnotationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Annotation: - """Update an existing annotation.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - async with self._create_uow() as uow: - annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) - if annotation is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Annotation {request.annotation_id} not found", - ) - - # Update fields if provided - if request.annotation_type != noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED: - annotation.annotation_type = self._proto_to_annotation_type(request.annotation_type) - if request.text: - annotation.text = request.text - if request.start_time > 0: - annotation.start_time = request.start_time - if request.end_time > 0: - annotation.end_time = request.end_time - if request.segment_ids: - annotation.segment_ids = list(request.segment_ids) - - updated = await uow.annotations.update(annotation) - await uow.commit() - return self._annotation_to_proto(updated) - - async def DeleteAnnotation( - self, - request: noteflow_pb2.DeleteAnnotationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.DeleteAnnotationResponse: - """Delete an annotation.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - async with self._create_uow() as uow: - success = await uow.annotations.delete(AnnotationId(UUID(request.annotation_id))) - if success: - await uow.commit() - return noteflow_pb2.DeleteAnnotationResponse(success=True) - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Annotation {request.annotation_id} not found", - ) - - def _annotation_to_proto( - self, - annotation: Annotation, - ) -> noteflow_pb2.Annotation: - """Convert domain Annotation to protobuf.""" - return noteflow_pb2.Annotation( - id=str(annotation.id), - meeting_id=str(annotation.meeting_id), - annotation_type=self._annotation_type_to_proto(annotation.annotation_type), - text=annotation.text, - start_time=annotation.start_time, - end_time=annotation.end_time, - segment_ids=annotation.segment_ids, - created_at=annotation.created_at.timestamp(), - ) - - def _annotation_type_to_proto( - self, - annotation_type: AnnotationType, - ) -> int: - """Convert domain AnnotationType to protobuf enum.""" - mapping = { - AnnotationType.ACTION_ITEM: noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM, - AnnotationType.DECISION: noteflow_pb2.ANNOTATION_TYPE_DECISION, - AnnotationType.NOTE: noteflow_pb2.ANNOTATION_TYPE_NOTE, - AnnotationType.RISK: noteflow_pb2.ANNOTATION_TYPE_RISK, - } - return mapping.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED) - - def _proto_to_annotation_type( - self, - proto_type: int, - ) -> AnnotationType: - """Convert protobuf enum to domain AnnotationType.""" - mapping = { - noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM: AnnotationType.ACTION_ITEM, - noteflow_pb2.ANNOTATION_TYPE_DECISION: AnnotationType.DECISION, - noteflow_pb2.ANNOTATION_TYPE_NOTE: AnnotationType.NOTE, - noteflow_pb2.ANNOTATION_TYPE_RISK: AnnotationType.RISK, - } - return mapping.get(proto_type, AnnotationType.NOTE) - - # ========================================================================= - # Export Methods - # ========================================================================= - - async def ExportTranscript( - self, - request: noteflow_pb2.ExportTranscriptRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.ExportTranscriptResponse: - """Export meeting transcript to specified format.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Export requires database persistence", - ) - - # Map proto format to ExportFormat - fmt = self._proto_to_export_format(request.format) - - export_service = ExportService(self._create_uow()) - try: - content = await export_service.export_transcript( - MeetingId(UUID(request.meeting_id)), - fmt, - ) - exporter_info = export_service.get_supported_formats() - fmt_name = "" - fmt_ext = "" - for name, ext in exporter_info: - if fmt == ExportFormat.MARKDOWN and ext == ".md": - fmt_name, fmt_ext = name, ext - break - if fmt == ExportFormat.HTML and ext == ".html": - fmt_name, fmt_ext = name, ext - break - - return noteflow_pb2.ExportTranscriptResponse( - content=content, - format_name=fmt_name, - file_extension=fmt_ext, - ) - except ValueError as e: - await context.abort( - grpc.StatusCode.NOT_FOUND, - str(e), - ) - - def _proto_to_export_format(self, proto_format: int) -> ExportFormat: - """Convert protobuf ExportFormat to domain ExportFormat.""" - if proto_format == noteflow_pb2.EXPORT_FORMAT_HTML: - return ExportFormat.HTML - return ExportFormat.MARKDOWN # Default to Markdown diff --git a/src/noteflow/infrastructure/asr/engine.py b/src/noteflow/infrastructure/asr/engine.py index 9d8b8e5..23bb375 100644 --- a/src/noteflow/infrastructure/asr/engine.py +++ b/src/noteflow/infrastructure/asr/engine.py @@ -5,8 +5,10 @@ Provides Whisper-based transcription with word-level timestamps. from __future__ import annotations +import asyncio import logging from collections.abc import Iterator +from functools import partial from typing import TYPE_CHECKING, Final if TYPE_CHECKING: @@ -151,6 +153,29 @@ class FasterWhisperEngine: no_speech_prob=segment.no_speech_prob, ) + async def transcribe_async( + self, + audio: NDArray[np.float32], + language: str | None = None, + ) -> list[AsrResult]: + """Transcribe audio asynchronously using executor. + + Offloads blocking transcription to a thread pool executor to avoid + blocking the asyncio event loop. + + Args: + audio: Audio samples as float32 array (16kHz mono, normalized). + language: Optional language code (e.g., "en"). + + Returns: + List of AsrResult segments with word-level timestamps. + """ + loop = asyncio.get_running_loop() + return await loop.run_in_executor( + None, + partial(lambda a, lang: list(self.transcribe(a, lang)), audio, language), + ) + @property def is_loaded(self) -> bool: """Return True if model is loaded.""" diff --git a/src/noteflow/infrastructure/asr/segmenter.py b/src/noteflow/infrastructure/asr/segmenter.py index eb59f1c..69b1476 100644 --- a/src/noteflow/infrastructure/asr/segmenter.py +++ b/src/noteflow/infrastructure/asr/segmenter.py @@ -12,6 +12,8 @@ from typing import TYPE_CHECKING import numpy as np from numpy.typing import NDArray +from noteflow.config.constants import DEFAULT_SAMPLE_RATE + if TYPE_CHECKING: from collections.abc import Iterator @@ -37,7 +39,7 @@ class SegmenterConfig: # Leading audio to include before speech starts (seconds) leading_buffer: float = 0.2 # Sample rate for audio processing - sample_rate: int = 16000 + sample_rate: int = DEFAULT_SAMPLE_RATE @dataclass diff --git a/src/noteflow/infrastructure/asr/streaming_vad.py b/src/noteflow/infrastructure/asr/streaming_vad.py index e51a8c8..7981489 100644 --- a/src/noteflow/infrastructure/asr/streaming_vad.py +++ b/src/noteflow/infrastructure/asr/streaming_vad.py @@ -8,6 +8,7 @@ from __future__ import annotations from dataclasses import dataclass, field from typing import TYPE_CHECKING, Protocol +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.audio import compute_rms if TYPE_CHECKING: @@ -111,7 +112,7 @@ class StreamingVad: """ engine: VadEngine = field(default_factory=EnergyVad) - sample_rate: int = 16000 + sample_rate: int = DEFAULT_SAMPLE_RATE def process_chunk(self, audio: NDArray[np.float32]) -> bool: """Process audio chunk through VAD engine. diff --git a/src/noteflow/infrastructure/audio/capture.py b/src/noteflow/infrastructure/audio/capture.py index 8a2406e..cae518a 100644 --- a/src/noteflow/infrastructure/audio/capture.py +++ b/src/noteflow/infrastructure/audio/capture.py @@ -12,6 +12,7 @@ from typing import TYPE_CHECKING import numpy as np import sounddevice as sd +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.audio.dto import AudioDeviceInfo, AudioFrameCallback if TYPE_CHECKING: @@ -32,7 +33,7 @@ class SoundDeviceCapture: self._stream: sd.InputStream | None = None self._callback: AudioFrameCallback | None = None self._device_id: int | None = None - self._sample_rate: int = 16000 + self._sample_rate: int = DEFAULT_SAMPLE_RATE self._channels: int = 1 def list_devices(self) -> list[AudioDeviceInfo]: @@ -79,7 +80,7 @@ class SoundDeviceCapture: self, device_id: int | None, on_frames: AudioFrameCallback, - sample_rate: int = 16000, + sample_rate: int = DEFAULT_SAMPLE_RATE, channels: int = 1, chunk_duration_ms: int = 100, ) -> None: diff --git a/src/noteflow/infrastructure/audio/playback.py b/src/noteflow/infrastructure/audio/playback.py index e168a37..b86c069 100644 --- a/src/noteflow/infrastructure/audio/playback.py +++ b/src/noteflow/infrastructure/audio/playback.py @@ -7,6 +7,7 @@ from __future__ import annotations import logging import threading +from collections.abc import Callable from enum import Enum, auto from typing import TYPE_CHECKING @@ -14,6 +15,8 @@ import numpy as np import sounddevice as sd from numpy.typing import NDArray +from noteflow.config.constants import DEFAULT_SAMPLE_RATE, POSITION_UPDATE_INTERVAL + if TYPE_CHECKING: from noteflow.infrastructure.audio.dto import TimestampedAudio @@ -35,16 +38,29 @@ class SoundDevicePlayback: Thread-safe for UI callbacks. """ - def __init__(self, sample_rate: int = 16000, channels: int = 1) -> None: + def __init__( + self, + sample_rate: int = DEFAULT_SAMPLE_RATE, + channels: int = 1, + on_position_update: Callable[[float], None] | None = None, + ) -> None: """Initialize the playback instance. Args: sample_rate: Sample rate in Hz (default 16kHz for ASR audio). channels: Number of channels (default 1 for mono). + on_position_update: Optional callback for position updates during playback. + Called at ~100ms intervals with current position in seconds. + Runs in the audio thread, so keep the callback minimal. """ self._sample_rate = sample_rate self._channels = channels + # Position update callbacks (can have multiple subscribers) + self._position_callbacks: list[Callable[[float], None]] = [] + if on_position_update is not None: + self._position_callbacks.append(on_position_update) + # Playback state self._state = PlaybackState.STOPPED self._lock = threading.Lock() @@ -54,6 +70,10 @@ class SoundDevicePlayback: self._total_samples: int = 0 self._current_sample: int = 0 + # Position callback tracking + self._callback_interval_samples = int(sample_rate * POSITION_UPDATE_INTERVAL) + self._last_callback_sample: int = 0 + # Stream self._stream: sd.OutputStream | None = None @@ -76,6 +96,7 @@ class SoundDevicePlayback: self._audio_data = np.concatenate(frames).astype(np.float32) self._total_samples = len(self._audio_data) self._current_sample = 0 + self._last_callback_sample = 0 # Create and start stream self._start_stream() @@ -114,9 +135,15 @@ class SoundDevicePlayback: Safe to call even if not playing. """ + position = 0.0 with self._lock: + if self._audio_data is not None: + position = self._current_sample / self._sample_rate self._stop_internal() + # Notify callbacks so UI can react to stop even if no final tick fired. + self._notify_position_callbacks(position) + def _stop_internal(self) -> None: """Internal stop without lock (caller must hold lock).""" if self._stream is not None: @@ -132,6 +159,7 @@ class SoundDevicePlayback: self._current_sample = 0 self._audio_data = None self._total_samples = 0 + self._last_callback_sample = 0 logger.debug("Stopped playback") def _start_stream(self) -> None: @@ -149,6 +177,9 @@ class SoundDevicePlayback: if status: logger.warning("Playback stream status: %s", status) + fire_callback = False + position = 0.0 + with self._lock: if self._audio_data is None or self._state != PlaybackState.PLAYING: # Output silence @@ -170,11 +201,22 @@ class SoundDevicePlayback: if to_copy < frames: outdata[to_copy:] = 0 + # Check if we should fire position update callback + elapsed = self._current_sample - self._last_callback_sample + if elapsed >= self._callback_interval_samples: + fire_callback = True + position = self._current_sample / self._sample_rate + self._last_callback_sample = self._current_sample + # Check if playback is complete if self._current_sample >= self._total_samples: # Schedule stop on another thread to avoid deadlock threading.Thread(target=self._on_playback_complete, daemon=True).start() + # Fire callbacks outside lock to avoid potential deadlocks + if fire_callback: + self._notify_position_callbacks(position) + try: self._stream = sd.OutputStream( channels=self._channels, @@ -214,13 +256,19 @@ class SoundDevicePlayback: # Convert to sample position self._current_sample = int(clamped_position * self._sample_rate) + # Reset callback sample so updates resume immediately after seek + self._last_callback_sample = self._current_sample logger.debug( "Seeked to %.2f seconds (sample %d)", clamped_position, self._current_sample, ) - return True + position_seconds = clamped_position + + # Notify callbacks to update UI/highlights immediately after seek + self._notify_position_callbacks(position_seconds) + return True def is_playing(self) -> bool: """Check if currently playing audio. @@ -258,3 +306,58 @@ class SoundDevicePlayback: def channels(self) -> int: """Number of channels.""" return self._channels + + def add_position_callback( + self, + callback: Callable[[float], None], + ) -> None: + """Add a position update callback. + + Multiple callbacks can be registered. Each receives the current + position in seconds during playback. + + Args: + callback: Callback receiving current position in seconds. + """ + if callback not in self._position_callbacks: + self._position_callbacks.append(callback) + + def _notify_position_callbacks(self, position: float) -> None: + """Notify all registered position callbacks. + + Runs without holding the playback lock to avoid deadlocks. + """ + for callback in list(self._position_callbacks): + try: + callback(position) + except Exception as e: + logger.debug("Position update callback error: %s", e) + + def remove_position_callback( + self, + callback: Callable[[float], None], + ) -> None: + """Remove a position update callback. + + Args: + callback: Previously registered callback to remove. + """ + if callback in self._position_callbacks: + self._position_callbacks.remove(callback) + + def set_position_callback( + self, + callback: Callable[[float], None] | None, + ) -> None: + """Set or clear the position update callback (replaces all callbacks). + + For backwards compatibility. Use add_position_callback/remove_position_callback + for multiple subscribers. + + Args: + callback: Callback receiving current position in seconds, + or None to clear all callbacks. + """ + self._position_callbacks.clear() + if callback is not None: + self._position_callbacks.append(callback) diff --git a/src/noteflow/infrastructure/audio/protocols.py b/src/noteflow/infrastructure/audio/protocols.py index 9e266ae..7724991 100644 --- a/src/noteflow/infrastructure/audio/protocols.py +++ b/src/noteflow/infrastructure/audio/protocols.py @@ -7,6 +7,8 @@ from __future__ import annotations from typing import TYPE_CHECKING, Protocol +from noteflow.config.constants import DEFAULT_SAMPLE_RATE + if TYPE_CHECKING: import numpy as np from numpy.typing import NDArray @@ -37,7 +39,7 @@ class AudioCapture(Protocol): self, device_id: int | None, on_frames: AudioFrameCallback, - sample_rate: int = 16000, + sample_rate: int = DEFAULT_SAMPLE_RATE, channels: int = 1, chunk_duration_ms: int = 100, ) -> None: diff --git a/src/noteflow/infrastructure/audio/reader.py b/src/noteflow/infrastructure/audio/reader.py index 1d2794f..5f5f7d8 100644 --- a/src/noteflow/infrastructure/audio/reader.py +++ b/src/noteflow/infrastructure/audio/reader.py @@ -13,6 +13,7 @@ from typing import TYPE_CHECKING import numpy as np +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.audio.dto import TimestampedAudio from noteflow.infrastructure.security.crypto import ChunkedAssetReader @@ -48,7 +49,7 @@ class MeetingAudioReader: self._crypto = crypto self._meetings_dir = meetings_dir self._meeting_dir: Path | None = None - self._sample_rate: int = 16000 + self._sample_rate: int = DEFAULT_SAMPLE_RATE def load_meeting_audio( self, @@ -77,7 +78,7 @@ class MeetingAudioReader: raise FileNotFoundError(f"Manifest not found: {manifest_path}") manifest = json.loads(manifest_path.read_text()) - self._sample_rate = manifest.get("sample_rate", 16000) + self._sample_rate = manifest.get("sample_rate", DEFAULT_SAMPLE_RATE) wrapped_dek_hex = manifest.get("wrapped_dek") if not wrapped_dek_hex: diff --git a/src/noteflow/infrastructure/audio/writer.py b/src/noteflow/infrastructure/audio/writer.py index 051d89b..f1f151c 100644 --- a/src/noteflow/infrastructure/audio/writer.py +++ b/src/noteflow/infrastructure/audio/writer.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING import numpy as np +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.security.crypto import ChunkedAssetWriter if TYPE_CHECKING: @@ -47,7 +48,7 @@ class MeetingAudioWriter: self._meetings_dir = meetings_dir self._asset_writer: ChunkedAssetWriter | None = None self._meeting_dir: Path | None = None - self._sample_rate: int = 16000 + self._sample_rate: int = DEFAULT_SAMPLE_RATE self._chunk_count: int = 0 def open( @@ -55,7 +56,7 @@ class MeetingAudioWriter: meeting_id: str, dek: bytes, wrapped_dek: bytes, - sample_rate: int = 16000, + sample_rate: int = DEFAULT_SAMPLE_RATE, ) -> None: """Open meeting for audio writing. diff --git a/src/noteflow/infrastructure/diarization/engine.py b/src/noteflow/infrastructure/diarization/engine.py index 798905a..95858d8 100644 --- a/src/noteflow/infrastructure/diarization/engine.py +++ b/src/noteflow/infrastructure/diarization/engine.py @@ -11,6 +11,7 @@ from __future__ import annotations import logging from typing import TYPE_CHECKING +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.diarization.dto import SpeakerTurn if TYPE_CHECKING: @@ -170,7 +171,7 @@ class DiarizationEngine: def process_chunk( self, audio: NDArray[np.float32], - sample_rate: int = 16000, + sample_rate: int = DEFAULT_SAMPLE_RATE, ) -> Sequence[SpeakerTurn]: """Process an audio chunk for streaming diarization. @@ -212,7 +213,7 @@ class DiarizationEngine: def diarize_full( self, audio: NDArray[np.float32], - sample_rate: int = 16000, + sample_rate: int = DEFAULT_SAMPLE_RATE, num_speakers: int | None = None, ) -> Sequence[SpeakerTurn]: """Diarize a complete audio recording. diff --git a/src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py b/src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py index f57c8fe..d2729f4 100644 --- a/src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py +++ b/src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py @@ -28,7 +28,14 @@ def upgrade() -> None: op.execute("CREATE SCHEMA IF NOT EXISTS noteflow") # Enable pgvector extension - op.execute("CREATE EXTENSION IF NOT EXISTS vector") + try: + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + except sa.exc.ProgrammingError as e: + raise RuntimeError( + f"Failed to create pgvector extension: {e}. " + "Ensure the database user has CREATE EXTENSION privileges, or " + "install pgvector manually: CREATE EXTENSION vector;" + ) from e # Create meetings table op.create_table( diff --git a/src/noteflow/infrastructure/security/keystore.py b/src/noteflow/infrastructure/security/keystore.py index f53e100..9a1bbbd 100644 --- a/src/noteflow/infrastructure/security/keystore.py +++ b/src/noteflow/infrastructure/security/keystore.py @@ -6,7 +6,9 @@ Provides secure master key storage using OS credential stores. from __future__ import annotations import base64 +import binascii import logging +import os import secrets from typing import Final @@ -18,6 +20,7 @@ logger = logging.getLogger(__name__) KEY_SIZE: Final[int] = 32 # 256-bit key SERVICE_NAME: Final[str] = "noteflow" KEY_NAME: Final[str] = "master_key" +ENV_VAR_NAME: Final[str] = "NOTEFLOW_MASTER_KEY" class KeyringKeyStore: @@ -46,17 +49,35 @@ class KeyringKeyStore: def get_or_create_master_key(self) -> bytes: """Retrieve or generate the master encryption key. + Checks for an environment variable first (for headless/container deployments), + then falls back to the OS keyring. + Returns: 32-byte master key. Raises: - RuntimeError: If keychain is unavailable. + RuntimeError: If keychain is unavailable and no env var is set. """ + # Check environment variable first (for headless/container deployments) + if env_key := os.environ.get(ENV_VAR_NAME): + logger.debug("Using master key from environment variable") + try: + decoded = base64.b64decode(env_key, validate=True) + except (binascii.Error, ValueError) as exc: + raise RuntimeError( + f"{ENV_VAR_NAME} must be base64-encoded {KEY_SIZE}-byte key" + ) from exc + if len(decoded) != KEY_SIZE: + raise RuntimeError( + f"{ENV_VAR_NAME} must decode to {KEY_SIZE} bytes, got {len(decoded)}" + ) + return decoded + try: - # Try to retrieve existing key + # Try to retrieve existing key from keyring stored = keyring.get_password(self._service_name, self._key_name) if stored is not None: - logger.debug("Retrieved existing master key") + logger.debug("Retrieved existing master key from keyring") return base64.b64decode(stored) # Generate new key @@ -65,11 +86,14 @@ class KeyringKeyStore: # Store in keyring keyring.set_password(self._service_name, self._key_name, encoded) - logger.info("Generated and stored new master key") + logger.info("Generated and stored new master key in keyring") return new_key except keyring.errors.KeyringError as e: - raise RuntimeError(f"Keyring unavailable: {e}") from e + raise RuntimeError( + f"Keyring unavailable: {e}. " + f"Set {ENV_VAR_NAME} environment variable for headless mode." + ) from e def delete_master_key(self) -> None: """Delete the master key from the keychain. diff --git a/src/noteflow/infrastructure/summarization/ollama_provider.py b/src/noteflow/infrastructure/summarization/ollama_provider.py index e07e604..0cd88e0 100644 --- a/src/noteflow/infrastructure/summarization/ollama_provider.py +++ b/src/noteflow/infrastructure/summarization/ollama_provider.py @@ -77,7 +77,7 @@ class OllamaSummarizer: # Try to list models to verify connectivity client.list() return True - except Exception: + except (ConnectionError, TimeoutError, RuntimeError, OSError): return False @property diff --git a/src/noteflow/infrastructure/triggers/__init__.py b/src/noteflow/infrastructure/triggers/__init__.py index 8f4cc6d..515a9ef 100644 --- a/src/noteflow/infrastructure/triggers/__init__.py +++ b/src/noteflow/infrastructure/triggers/__init__.py @@ -3,18 +3,24 @@ Provide signal providers for meeting detection triggers. """ +from noteflow.infrastructure.triggers.app_audio import AppAudioProvider, AppAudioSettings from noteflow.infrastructure.triggers.audio_activity import ( AudioActivityProvider, AudioActivitySettings, ) +from noteflow.infrastructure.triggers.calendar import CalendarProvider, CalendarSettings from noteflow.infrastructure.triggers.foreground_app import ( ForegroundAppProvider, ForegroundAppSettings, ) __all__ = [ + "AppAudioProvider", + "AppAudioSettings", "AudioActivityProvider", "AudioActivitySettings", + "CalendarProvider", + "CalendarSettings", "ForegroundAppProvider", "ForegroundAppSettings", ] diff --git a/src/noteflow/infrastructure/triggers/app_audio.py b/src/noteflow/infrastructure/triggers/app_audio.py new file mode 100644 index 0000000..1c94483 --- /dev/null +++ b/src/noteflow/infrastructure/triggers/app_audio.py @@ -0,0 +1,280 @@ +"""App audio activity provider. + +Detects audio activity from system output while whitelisted meeting apps are active. +This is a best-effort heuristic: it combines (a) system output activity and +(b) presence of whitelisted app windows to infer a likely meeting. +""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +from noteflow.config.constants import DEFAULT_SAMPLE_RATE +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource +from noteflow.infrastructure.audio.levels import RmsLevelProvider +from noteflow.infrastructure.triggers.audio_activity import ( + AudioActivityProvider, + AudioActivitySettings, +) + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + +logger = logging.getLogger(__name__) + + +@dataclass +class AppAudioSettings: + """Configuration for app audio detection. + + Attributes: + enabled: Whether app audio detection is enabled. + threshold_db: Minimum dB level to consider as activity. + window_seconds: Time window for sustained activity detection. + min_active_ratio: Minimum ratio of active samples in window. + min_samples: Minimum samples required before evaluation. + max_history: Maximum samples retained in history. + weight: Confidence weight contributed by this provider. + meeting_apps: Set of app name substrings to match (lowercase). + suppressed_apps: App substrings to ignore even if matched. + sample_rate: Sample rate for system output capture. + sample_duration_seconds: Duration of each sampling read. + chunk_duration_seconds: Duration of sub-chunks for activity history updates. + """ + + enabled: bool + threshold_db: float + window_seconds: float + min_active_ratio: float + min_samples: int + max_history: int + weight: float + meeting_apps: set[str] = field(default_factory=set) + suppressed_apps: set[str] = field(default_factory=set) + sample_rate: int = DEFAULT_SAMPLE_RATE + sample_duration_seconds: float = 0.5 + chunk_duration_seconds: float = 0.1 + + def __post_init__(self) -> None: + self.meeting_apps = {app.lower() for app in self.meeting_apps} + self.suppressed_apps = {app.lower() for app in self.suppressed_apps} + + +class _SystemOutputSampler: + """Best-effort system output sampler using sounddevice.""" + + def __init__(self, sample_rate: int, channels: int = 1) -> None: + self._sample_rate = sample_rate + self._channels = channels + self._stream = None + self._extra_settings = None + self._device = None + self._available: bool | None = None + + def _select_device(self) -> None: + try: + import sounddevice as sd + except ImportError: + return self._extracted_from__select_device_5( + "sounddevice not available - app audio detection disabled" + ) + # Default to output device and WASAPI loopback when available (Windows) + try: + default_output = sd.default.device[1] + except (TypeError, IndexError): + default_output = None + + try: + hostapi_index = sd.default.hostapi + hostapi = sd.query_hostapis(hostapi_index) if hostapi_index is not None else None + except Exception: + hostapi = None + + if hostapi and hostapi.get("type") == "Windows WASAPI" and default_output is not None: + # On WASAPI, loopback devices appear as separate input devices + # Fall through to monitor/loopback device detection below + pass + + # Fallback: look for monitor/loopback devices (Linux/PulseAudio) + try: + devices = sd.query_devices() + except Exception: + return self._extracted_from__select_device_5( + "Failed to query audio devices for app audio detection" + ) + for idx, dev in enumerate(devices): + name = str(dev.get("name", "")).lower() + if int(dev.get("max_input_channels", 0)) <= 0: + continue + if "monitor" in name or "loopback" in name: + return self._extracted_from__select_device_24(idx) + self._available = False + logger.warning("No loopback audio device found - app audio detection disabled") + + # TODO Rename this here and in `_select_device` + def _extracted_from__select_device_24(self, arg0): + self._device = arg0 + self._available = True + return + + # TODO Rename this here and in `_select_device` + def _extracted_from__select_device_5(self, arg0): + self._available = False + logger.warning(arg0) + return + + def _ensure_stream(self) -> bool: + if self._available is False: + return False + + if self._available is None: + self._select_device() + if self._available is False: + return False + + if self._stream is not None: + return True + + try: + import sounddevice as sd + + self._stream = sd.InputStream( + device=self._device, + channels=self._channels, + samplerate=self._sample_rate, + dtype="float32", + extra_settings=self._extra_settings, + ) + self._stream.start() + return True + except Exception as exc: + logger.warning("Failed to start system output capture: %s", exc) + self._stream = None + self._available = False + return False + + def read_frames(self, duration_seconds: float) -> NDArray[np.float32] | None: + if not self._ensure_stream(): + return None + + if self._stream is None: + return None + + frames = max(1, int(self._sample_rate * duration_seconds)) + try: + data, _ = self._stream.read(frames) + except Exception as exc: + logger.debug("System output read failed: %s", exc) + return None + + return data.reshape(-1).astype("float32") + + def close(self) -> None: + if self._stream is None: + return + try: + self._stream.stop() + self._stream.close() + except Exception: + logger.debug("Failed to close system output stream", exc_info=True) + finally: + self._stream = None + + +class AppAudioProvider: + """Detect app audio activity from whitelisted meeting apps.""" + + def __init__(self, settings: AppAudioSettings) -> None: + self._settings = settings + self._sampler = _SystemOutputSampler(sample_rate=settings.sample_rate) + self._level_provider = RmsLevelProvider() + self._audio_activity = AudioActivityProvider( + self._level_provider, + AudioActivitySettings( + enabled=settings.enabled, + threshold_db=settings.threshold_db, + window_seconds=settings.window_seconds, + min_active_ratio=settings.min_active_ratio, + min_samples=settings.min_samples, + max_history=settings.max_history, + weight=settings.weight, + ), + ) + + @property + def source(self) -> TriggerSource: + return TriggerSource.AUDIO_ACTIVITY + + @property + def max_weight(self) -> float: + return self._settings.weight + + def is_enabled(self) -> bool: + return self._settings.enabled + + def get_signal(self) -> TriggerSignal | None: + if not self.is_enabled(): + return None + if not self._settings.meeting_apps: + return None + + app_title = self._detect_meeting_app() + if not app_title: + return None + + frames = self._sampler.read_frames(self._settings.sample_duration_seconds) + if frames is None or frames.size == 0: + return None + + self._update_activity_history(frames) + if self._audio_activity.get_signal() is None: + return None + + return TriggerSignal( + source=self.source, + weight=self.max_weight, + app_name=app_title, + ) + + def _update_activity_history(self, frames: NDArray[np.float32]) -> None: + chunk_size = max(1, int(self._settings.sample_rate * self._settings.chunk_duration_seconds)) + now = time.monotonic() + for offset in range(0, len(frames), chunk_size): + chunk = frames[offset : offset + chunk_size] + if chunk.size == 0: + continue + self._audio_activity.update(chunk, now) + + def _detect_meeting_app(self) -> str | None: + try: + import pywinctl + except ImportError: + return None + + titles: list[str] = [] + try: + if hasattr(pywinctl, "getAllWindows"): + windows = pywinctl.getAllWindows() + titles = [w.title for w in windows if getattr(w, "title", None)] + elif hasattr(pywinctl, "getAllTitles"): + titles = [t for t in pywinctl.getAllTitles() if t] + except Exception as exc: + logger.debug("Failed to list windows for app detection: %s", exc) + return None + + for title in titles: + title_lower = title.lower() + if any(suppressed in title_lower for suppressed in self._settings.suppressed_apps): + continue + if any(app in title_lower for app in self._settings.meeting_apps): + return title + + return None + + def close(self) -> None: + """Release system audio resources.""" + self._sampler.close() diff --git a/src/noteflow/infrastructure/triggers/calendar.py b/src/noteflow/infrastructure/triggers/calendar.py new file mode 100644 index 0000000..8554133 --- /dev/null +++ b/src/noteflow/infrastructure/triggers/calendar.py @@ -0,0 +1,150 @@ +"""Calendar trigger provider. + +Best-effort calendar integration using configured event windows. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import TYPE_CHECKING + +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource + +if TYPE_CHECKING: + from collections.abc import Iterable + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class CalendarEvent: + """Simple calendar event window.""" + + start: datetime + end: datetime + title: str | None = None + + +@dataclass +class CalendarSettings: + """Configuration for calendar trigger detection.""" + + enabled: bool + weight: float + lookahead_minutes: int + lookbehind_minutes: int + events: list[CalendarEvent] + + +class CalendarProvider: + """Provide trigger signal based on calendar proximity.""" + + def __init__(self, settings: CalendarSettings) -> None: + self._settings = settings + + @property + def source(self) -> TriggerSource: + return TriggerSource.CALENDAR + + @property + def max_weight(self) -> float: + return self._settings.weight + + def is_enabled(self) -> bool: + return self._settings.enabled + + def get_signal(self) -> TriggerSignal | None: + if not self.is_enabled(): + return None + + if not self._settings.events: + return None + + now = datetime.now(timezone.utc) + window_start = now - timedelta(minutes=self._settings.lookbehind_minutes) + window_end = now + timedelta(minutes=self._settings.lookahead_minutes) + + return next( + ( + TriggerSignal( + source=self.source, + weight=self.max_weight, + app_name=event.title, + ) + for event in self._settings.events + if self._event_overlaps_window(event, window_start, window_end) + ), + None, + ) + + @staticmethod + def _event_overlaps_window( + event: CalendarEvent, + window_start: datetime, + window_end: datetime, + ) -> bool: + event_start = _ensure_tz(event.start) + event_end = _ensure_tz(event.end) + return event_start <= window_end and event_end >= window_start + + +def parse_calendar_events(raw_events: object) -> list[CalendarEvent]: + """Parse calendar events from config/env payloads.""" + if raw_events is None: + return [] + + if isinstance(raw_events, str): + raw_events = _load_events_from_json(raw_events) + + if isinstance(raw_events, dict): + raw_events = [raw_events] + + if not isinstance(raw_events, Iterable): + return [] + + events: list[CalendarEvent] = [] + for item in raw_events: + if isinstance(item, CalendarEvent): + events.append(item) + continue + if isinstance(item, dict): + start = _parse_datetime(item.get("start")) + end = _parse_datetime(item.get("end")) + if start and end: + events.append(CalendarEvent(start=start, end=end, title=item.get("title"))) + return events + + +def _load_events_from_json(raw: str) -> list[dict[str, object]]: + try: + parsed = json.loads(raw) + except json.JSONDecodeError: + logger.debug("Failed to parse calendar events JSON") + return [] + if isinstance(parsed, list): + return [item for item in parsed if isinstance(item, dict)] + return [parsed] if isinstance(parsed, dict) else [] + + +def _parse_datetime(value: object) -> datetime | None: + if isinstance(value, datetime): + return value + if not isinstance(value, str) or not value: + return None + cleaned = value.strip() + if cleaned.endswith("Z"): + cleaned = f"{cleaned[:-1]}+00:00" + try: + return datetime.fromisoformat(cleaned) + except ValueError: + return None + + +def _ensure_tz(value: datetime) -> datetime: + if value.tzinfo is None: + return value.replace(tzinfo=timezone.utc) + return value.astimezone(timezone.utc) + diff --git a/tests/application/test_retention_service.py b/tests/application/test_retention_service.py index 2f8a6e2..68fcdd4 100644 --- a/tests/application/test_retention_service.py +++ b/tests/application/test_retention_service.py @@ -26,7 +26,10 @@ class TestRetentionServiceProperties: def test_is_enabled_reflects_init(self) -> None: """is_enabled should reflect constructor parameter.""" uow = MagicMock() - factory = lambda: uow + + def factory() -> MagicMock: + return uow + enabled_service = RetentionService(factory, retention_days=30, enabled=True) disabled_service = RetentionService(factory, retention_days=30, enabled=False) diff --git a/tests/conftest.py b/tests/conftest.py index 3a9f773..f6db095 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,11 +72,25 @@ def mock_optional_extras() -> None: ) sys.modules["ollama"] = ollama_module + # pywinctl depends on pymonctl, which may fail in headless environments + # Mock both if not already present + if "pymonctl" not in sys.modules: + try: + import pymonctl as _pymonctl # noqa: F401 + except Exception: + # Mock pymonctl for headless environments (Xlib.error.DisplayNameError, etc.) + pymonctl_module = types.ModuleType("pymonctl") + pymonctl_module.getAllMonitors = lambda: [] + sys.modules["pymonctl"] = pymonctl_module + if "pywinctl" not in sys.modules: try: import pywinctl as _pywinctl # noqa: F401 except Exception: - # In headless environments pywinctl import may fail (e.g., missing DISPLAY) + # ImportError: package not installed + # OSError/Xlib errors: pywinctl may fail in headless environments pywinctl_module = types.ModuleType("pywinctl") pywinctl_module.getActiveWindow = lambda: None + pywinctl_module.getAllWindows = lambda: [] + pywinctl_module.getAllTitles = lambda: [] sys.modules["pywinctl"] = pywinctl_module diff --git a/tests/grpc/test_diarization_refine.py b/tests/grpc/test_diarization_refine.py new file mode 100644 index 0000000..dff34af --- /dev/null +++ b/tests/grpc/test_diarization_refine.py @@ -0,0 +1,35 @@ +"""Tests for RefineSpeakerDiarization RPC guards.""" + +from __future__ import annotations + +import pytest + +from noteflow.grpc.proto import noteflow_pb2 +from noteflow.grpc.service import NoteFlowServicer + + +class _DummyContext: + """Minimal gRPC context that raises if abort is invoked.""" + + async def abort(self, code, details): # type: ignore[override] + raise AssertionError(f"abort called: {code} - {details}") + + +@pytest.mark.asyncio +async def test_refine_speaker_diarization_rejects_active_meeting() -> None: + """Refinement should be blocked while a meeting is still recording.""" + servicer = NoteFlowServicer(diarization_engine=object()) + store = servicer._get_memory_store() + + meeting = store.create("Active meeting") + meeting.start_recording() + store.update(meeting) + + response = await servicer.RefineSpeakerDiarization( + noteflow_pb2.RefineSpeakerDiarizationRequest(meeting_id=str(meeting.id)), + _DummyContext(), + ) + + assert response.segments_updated == 0 + assert response.error_message + assert "stopped" in response.error_message.lower() diff --git a/tests/grpc/test_partial_transcription.py b/tests/grpc/test_partial_transcription.py index 2b108d3..3e518f0 100644 --- a/tests/grpc/test_partial_transcription.py +++ b/tests/grpc/test_partial_transcription.py @@ -37,7 +37,14 @@ def _create_mock_asr_engine(transcribe_results: list[str] | None = None) -> Magi def _transcribe(_audio: NDArray[np.float32]) -> list[MockAsrResult]: return [MockAsrResult(text=text) for text in results] + async def _transcribe_async( + _audio: NDArray[np.float32], + _language: str | None = None, + ) -> list[MockAsrResult]: + return [MockAsrResult(text=text) for text in results] + engine.transcribe = _transcribe + engine.transcribe_async = _transcribe_async return engine diff --git a/tests/infrastructure/test_diarization.py b/tests/infrastructure/test_diarization.py new file mode 100644 index 0000000..c7eb1ce --- /dev/null +++ b/tests/infrastructure/test_diarization.py @@ -0,0 +1,196 @@ +"""Tests for speaker diarization infrastructure. + +Tests the SpeakerTurn DTO and speaker assignment utilities. +""" + +from __future__ import annotations + +import pytest + +from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker, assign_speakers_batch + + +class TestSpeakerTurn: + """Tests for the SpeakerTurn dataclass.""" + + def test_create_valid_turn(self) -> None: + """Create a valid speaker turn.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0) + assert turn.speaker == "SPEAKER_00" + assert turn.start == 0.0 + assert turn.end == 5.0 + assert turn.confidence == 1.0 + + def test_create_turn_with_confidence(self) -> None: + """Create a turn with custom confidence.""" + turn = SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0, confidence=0.85) + assert turn.confidence == 0.85 + + def test_invalid_end_before_start_raises(self) -> None: + """End time before start time raises ValueError.""" + with pytest.raises(ValueError, match=r"end.*<.*start"): + SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=5.0) + + def test_invalid_confidence_negative_raises(self) -> None: + """Negative confidence raises ValueError.""" + with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"): + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=-0.1) + + def test_invalid_confidence_above_one_raises(self) -> None: + """Confidence above 1.0 raises ValueError.""" + with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"): + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=1.5) + + def test_duration_property(self) -> None: + """Duration property calculates correctly.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=2.5, end=7.5) + assert turn.duration == 5.0 + + def test_overlaps_returns_true_for_overlap(self) -> None: + """overlaps() returns True when ranges overlap.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlaps(3.0, 7.0) + assert turn.overlaps(7.0, 12.0) + assert turn.overlaps(5.0, 10.0) + assert turn.overlaps(0.0, 15.0) + + def test_overlaps_returns_false_for_no_overlap(self) -> None: + """overlaps() returns False when ranges don't overlap.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert not turn.overlaps(0.0, 5.0) + assert not turn.overlaps(10.0, 15.0) + assert not turn.overlaps(0.0, 3.0) + assert not turn.overlaps(12.0, 20.0) + + def test_overlap_duration_full_overlap(self) -> None: + """overlap_duration() for full overlap returns turn duration.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlap_duration(0.0, 15.0) == 5.0 + + def test_overlap_duration_partial_overlap_left(self) -> None: + """overlap_duration() for partial overlap on left side.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlap_duration(3.0, 7.0) == 2.0 + + def test_overlap_duration_partial_overlap_right(self) -> None: + """overlap_duration() for partial overlap on right side.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlap_duration(8.0, 15.0) == 2.0 + + def test_overlap_duration_contained(self) -> None: + """overlap_duration() when range is contained within turn.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=20.0) + assert turn.overlap_duration(5.0, 10.0) == 5.0 + + def test_overlap_duration_no_overlap(self) -> None: + """overlap_duration() returns 0.0 when no overlap.""" + turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) + assert turn.overlap_duration(0.0, 3.0) == 0.0 + assert turn.overlap_duration(12.0, 20.0) == 0.0 + + +class TestAssignSpeaker: + """Tests for the assign_speaker function.""" + + def test_empty_turns_returns_none(self) -> None: + """Empty turns list returns None with 0 confidence.""" + speaker, confidence = assign_speaker(0.0, 5.0, []) + assert speaker is None + assert confidence == 0.0 + + def test_zero_duration_segment_returns_none(self) -> None: + """Zero duration segment returns None.""" + turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] + speaker, confidence = assign_speaker(5.0, 5.0, turns) + assert speaker is None + assert confidence == 0.0 + + def test_single_turn_full_overlap(self) -> None: + """Single turn with full overlap returns high confidence.""" + turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] + speaker, confidence = assign_speaker(2.0, 8.0, turns) + assert speaker == "SPEAKER_00" + assert confidence == 1.0 + + def test_single_turn_partial_overlap(self) -> None: + """Single turn with partial overlap returns proportional confidence.""" + turns = [SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)] + speaker, confidence = assign_speaker(0.0, 10.0, turns) + assert speaker == "SPEAKER_00" + assert confidence == 0.5 + + def test_multiple_turns_chooses_dominant_speaker(self) -> None: + """Multiple turns chooses speaker with most overlap.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0), + SpeakerTurn(speaker="SPEAKER_01", start=3.0, end=10.0), + ] + speaker, confidence = assign_speaker(0.0, 10.0, turns) + assert speaker == "SPEAKER_01" + assert confidence == 0.7 + + def test_no_overlap_returns_none(self) -> None: + """No overlapping turns returns None.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), + SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0), + ] + speaker, confidence = assign_speaker(6.0, 9.0, turns) + assert speaker is None + assert confidence == 0.0 + + def test_equal_overlap_chooses_first_encountered(self) -> None: + """Equal overlap chooses first speaker encountered.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), + SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0), + ] + speaker, confidence = assign_speaker(3.0, 7.0, turns) + # SPEAKER_00: overlap 2.0, SPEAKER_01: overlap 2.0 + # First one wins since > not >= + assert speaker == "SPEAKER_00" + assert confidence == 0.5 + + +class TestAssignSpeakersBatch: + """Tests for the assign_speakers_batch function.""" + + def test_empty_segments(self) -> None: + """Empty segments list returns empty results.""" + turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] + results = assign_speakers_batch([], turns) + assert results == [] + + def test_empty_turns(self) -> None: + """Empty turns returns all None speakers.""" + segments = [(0.0, 5.0), (5.0, 10.0)] + results = assign_speakers_batch(segments, []) + assert len(results) == 2 + assert all(speaker is None for speaker, _ in results) + assert all(conf == 0.0 for _, conf in results) + + def test_batch_assignment(self) -> None: + """Batch assignment processes all segments.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), + SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0), + SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=15.0), + ] + segments = [(0.0, 5.0), (5.0, 10.0), (10.0, 15.0)] + results = assign_speakers_batch(segments, turns) + assert len(results) == 3 + assert results[0] == ("SPEAKER_00", 1.0) + assert results[1] == ("SPEAKER_01", 1.0) + assert results[2] == ("SPEAKER_00", 1.0) + + def test_batch_with_gaps(self) -> None: + """Batch assignment handles gaps between turns.""" + turns = [ + SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0), + SpeakerTurn(speaker="SPEAKER_01", start=7.0, end=10.0), + ] + segments = [(0.0, 3.0), (3.0, 7.0), (7.0, 10.0)] + results = assign_speakers_batch(segments, turns) + assert results[0] == ("SPEAKER_00", 1.0) + assert results[1] == (None, 0.0) + assert results[2] == ("SPEAKER_01", 1.0) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 2d56e96..6e84b94 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -17,6 +17,7 @@ if TYPE_CHECKING: from noteflow.infrastructure.persistence.models import Base + # Store container reference at module level to reuse class PgTestContainer: """Minimal Postgres testcontainer wrapper with custom readiness wait.""" diff --git a/tests/stress/__init__.py b/tests/stress/__init__.py new file mode 100644 index 0000000..7358ee3 --- /dev/null +++ b/tests/stress/__init__.py @@ -0,0 +1,5 @@ +"""Stress and fuzz tests for NoteFlow. + +These tests detect race conditions, infrastructure defects, and logic bugs +through concurrent execution, file corruption simulation, and state machine fuzzing. +""" diff --git a/tests/stress/conftest.py b/tests/stress/conftest.py new file mode 100644 index 0000000..a3a5e7b --- /dev/null +++ b/tests/stress/conftest.py @@ -0,0 +1,245 @@ +"""Pytest fixtures for stress and fuzz tests.""" + +from __future__ import annotations + +import time +from dataclasses import dataclass +from importlib import import_module +from pathlib import Path +from typing import TYPE_CHECKING +from urllib.parse import quote +from unittest.mock import MagicMock + +import numpy as np +import pytest + +from noteflow.grpc.service import NoteFlowServicer +from noteflow.infrastructure.security.crypto import AesGcmCryptoBox +from noteflow.infrastructure.security.keystore import InMemoryKeyStore + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator + from collections.abc import Self + + from numpy.typing import NDArray + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + +@dataclass +class MockAsrResult: + """Mock ASR transcription result.""" + + text: str + start: float = 0.0 + end: float = 1.0 + language: str = "en" + language_probability: float = 0.99 + avg_logprob: float = -0.5 + no_speech_prob: float = 0.01 + + +# Store container reference at module level to reuse in stress tests +class PgTestContainer: + """Minimal Postgres testcontainer wrapper with custom readiness wait.""" + + def __init__( + self, + image: str = "pgvector/pgvector:pg16", + username: str = "test", + password: str = "test", + dbname: str = "noteflow_test", + port: int = 5432, + ) -> None: + self.username = username + self.password = password + self.dbname = dbname + self.port = port + + container_module = import_module("testcontainers.core.container") + docker_container_cls = container_module.DockerContainer + self._container = ( + docker_container_cls(image) + .with_env("POSTGRES_USER", username) + .with_env("POSTGRES_PASSWORD", password) + .with_env("POSTGRES_DB", dbname) + .with_exposed_ports(port) + ) + + def start(self) -> Self: + """Start the container.""" + self._container.start() + self._wait_until_ready() + return self + + def stop(self) -> None: + """Stop the container.""" + self._container.stop() + + def get_connection_url(self) -> str: + """Return a SQLAlchemy-style connection URL.""" + host = self._container.get_container_host_ip() + port = self._container._get_exposed_port(self.port) + quoted_password = quote(self.password, safe=" +") + return f"postgresql+psycopg2://{self.username}:{quoted_password}@{host}:{port}/{self.dbname}" + + def _wait_until_ready(self, timeout: float = 30.0, interval: float = 0.5) -> None: + """Wait for Postgres to accept connections by running a simple query.""" + start_time = time.time() + escaped_password = self.password.replace("'", "'\"'\"'") + cmd = [ + "sh", + "-c", + ( + f"PGPASSWORD='{escaped_password}' " + f"psql --username {self.username} --dbname {self.dbname} --host 127.0.0.1 " + "-c 'select 1;'" + ), + ] + last_error: str | None = None + + while True: + result = self._container.exec(cmd) + if result.exit_code == 0: + return + if result.output: + last_error = result.output.decode(errors="ignore") + if time.time() - start_time > timeout: + raise TimeoutError( + "Postgres container did not become ready in time" + + (f": {last_error}" if last_error else "") + ) + time.sleep(interval) + + +_container: PgTestContainer | None = None +_database_url: str | None = None + + +def get_or_create_container() -> tuple[PgTestContainer, str]: + """Get or create the PostgreSQL container for stress tests.""" + global _container, _database_url + + if _container is None: + container = PgTestContainer().start() + _container = container + url = container.get_connection_url() + _database_url = url.replace("postgresql+psycopg2://", "postgresql+asyncpg://") + + assert _container is not None, "Container should be initialized" + assert _database_url is not None, "Database URL should be initialized" + return _container, _database_url + + +def create_mock_asr_engine(transcribe_results: list[str] | None = None) -> MagicMock: + """Create mock ASR engine with configurable transcription results. + + Args: + transcribe_results: List of transcription texts to return. + + Returns: + Mock ASR engine with sync and async transcribe methods. + """ + engine = MagicMock() + engine.is_loaded = True + engine.model_size = "base" + + results = transcribe_results or ["Test transcription"] + + def _transcribe(_audio: NDArray[np.float32]) -> list[MockAsrResult]: + return [MockAsrResult(text=text) for text in results] + + async def _transcribe_async( + _audio: NDArray[np.float32], + _language: str | None = None, + ) -> list[MockAsrResult]: + return [MockAsrResult(text=text) for text in results] + + engine.transcribe = _transcribe + engine.transcribe_async = _transcribe_async + return engine + + +@pytest.fixture +def in_memory_keystore() -> InMemoryKeyStore: + """Create an in-memory keystore for testing.""" + return InMemoryKeyStore() + + +@pytest.fixture +def crypto(in_memory_keystore: InMemoryKeyStore) -> AesGcmCryptoBox: + """Create crypto box with in-memory keystore for testing.""" + return AesGcmCryptoBox(in_memory_keystore) + + +@pytest.fixture +def meetings_dir(tmp_path: Path) -> Path: + """Create temporary meetings directory.""" + meetings = tmp_path / "meetings" + meetings.mkdir(parents=True) + return meetings + + +@pytest.fixture +def mock_asr_engine() -> MagicMock: + """Create default mock ASR engine.""" + return create_mock_asr_engine() + + +@pytest.fixture +def memory_servicer(mock_asr_engine: MagicMock, tmp_path: Path) -> NoteFlowServicer: + """Create NoteFlowServicer with in-memory MeetingStore backend. + + Uses memory store (no database) for fast unit testing of + concurrency and state management. + """ + return NoteFlowServicer( + asr_engine=mock_asr_engine, + session_factory=None, + meetings_dir=tmp_path / "meetings", + ) + + +# Import session_factory from integration tests for PostgreSQL backend +# This is lazily imported to avoid requiring testcontainers for non-integration tests +@pytest.fixture +async def postgres_session_factory() -> AsyncGenerator[async_sessionmaker[AsyncSession], None]: + """Create PostgreSQL session factory using testcontainers. + + Uses a local container helper to avoid importing test modules. + """ + # Import here to avoid requiring testcontainers for all stress tests + from sqlalchemy import text + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + + from noteflow.infrastructure.persistence.models import Base + + _, database_url = get_or_create_container() + + engine = create_async_engine(database_url, echo=False) + + async with engine.begin() as conn: + await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) + await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) + await conn.execute(text("CREATE SCHEMA noteflow")) + await conn.run_sync(Base.metadata.create_all) + + yield async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, + autocommit=False, + autoflush=False, + ) + + async with engine.begin() as conn: + await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) + + await engine.dispose() + + +def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: + """Cleanup container after stress tests complete.""" + global _container + if _container is not None: + _container.stop() + _container = None diff --git a/tests/stress/test_audio_integrity.py b/tests/stress/test_audio_integrity.py new file mode 100644 index 0000000..91d18e8 --- /dev/null +++ b/tests/stress/test_audio_integrity.py @@ -0,0 +1,493 @@ +"""Stress tests for encrypted audio file format (NFAE) resilience. + +Tests truncation recovery, missing manifest handling, and corruption detection. +""" + +from __future__ import annotations + +import json +import struct +from pathlib import Path +from uuid import uuid4 + +import numpy as np +import pytest +from numpy.typing import NDArray + +from noteflow.infrastructure.audio.reader import MeetingAudioReader +from noteflow.infrastructure.audio.writer import MeetingAudioWriter +from noteflow.infrastructure.security.crypto import ( + FILE_MAGIC, + FILE_VERSION, + AesGcmCryptoBox, + ChunkedAssetReader, + ChunkedAssetWriter, +) +from noteflow.infrastructure.security.keystore import InMemoryKeyStore + + +@pytest.fixture +def crypto() -> AesGcmCryptoBox: + """Create crypto with in-memory keystore.""" + return AesGcmCryptoBox(InMemoryKeyStore()) + + +@pytest.fixture +def meetings_dir(tmp_path: Path) -> Path: + """Create temporary meetings directory.""" + return tmp_path / "meetings" + + +def make_audio(samples: int = 1600) -> NDArray[np.float32]: + """Create test audio with random values.""" + return np.random.uniform(-0.5, 0.5, samples).astype(np.float32) + + +class TestTruncatedWriteRecovery: + """Test behavior when audio file is truncated (power loss simulation).""" + + @pytest.mark.stress + def test_truncated_header_partial_magic( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """Truncated file (only partial magic bytes) raises on read.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + audio_path = meeting_dir / "audio.enc" + audio_path.write_bytes(FILE_MAGIC[:2]) + + reader = ChunkedAssetReader(crypto) + dek = crypto.generate_dek() + + with pytest.raises(ValueError, match="Invalid file format"): + reader.open(audio_path, dek) + + @pytest.mark.stress + def test_truncated_header_missing_version( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """File with magic but truncated before version byte.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + audio_path = meeting_dir / "audio.enc" + audio_path.write_bytes(FILE_MAGIC) + + reader = ChunkedAssetReader(crypto) + dek = crypto.generate_dek() + + with pytest.raises((struct.error, ValueError)): + reader.open(audio_path, dek) + + @pytest.mark.stress + def test_truncated_chunk_length_partial( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """File with complete header but truncated chunk length.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + audio_path = meeting_dir / "audio.enc" + with audio_path.open("wb") as f: + f.write(FILE_MAGIC) + f.write(struct.pack("B", FILE_VERSION)) + f.write(struct.pack(">I", 1000)[:2]) + + dek = crypto.generate_dek() + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, dek) + + chunks = list(reader.read_chunks()) + assert not chunks + reader.close() + + @pytest.mark.stress + def test_truncated_chunk_data_raises(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """File with chunk length but truncated data raises ValueError.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + audio_path = meeting_dir / "audio.enc" + with audio_path.open("wb") as f: + f.write(FILE_MAGIC) + f.write(struct.pack("B", FILE_VERSION)) + f.write(struct.pack(">I", 100)) + f.write(b"short") + + dek = crypto.generate_dek() + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, dek) + + with pytest.raises(ValueError, match="Truncated chunk"): + list(reader.read_chunks()) + reader.close() + + @pytest.mark.stress + def test_valid_chunks_before_truncation_preserved( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """Valid chunks before truncation can still be read.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + audio_path = meeting_dir / "audio.enc" + dek = crypto.generate_dek() + + writer = ChunkedAssetWriter(crypto) + writer.open(audio_path, dek) + test_data = b"valid audio chunk data 1" + writer.write_chunk(test_data) + writer.close() + + with audio_path.open("ab") as f: + f.write(struct.pack(">I", 500)) + f.write(b"truncated") + + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, dek) + + chunks = [] + try: + chunks.extend(iter(reader.read_chunks())) + except ValueError: + pass + finally: + reader.close() + + assert len(chunks) == 1 + assert chunks[0] == test_data + + +class TestMissingManifest: + """Test behavior when manifest.json is missing.""" + + @pytest.mark.stress + def test_audio_exists_false_without_manifest( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """audio_exists returns False when only audio.enc exists.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + (meeting_dir / "audio.enc").write_bytes(FILE_MAGIC + bytes([FILE_VERSION])) + + reader = MeetingAudioReader(crypto, meetings_dir) + assert reader.audio_exists(meeting_id) is False + + @pytest.mark.stress + def test_audio_exists_false_without_audio( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """audio_exists returns False when only manifest exists.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + manifest = { + "meeting_id": meeting_id, + "sample_rate": 16000, + "wrapped_dek": wrapped_dek.hex(), + } + (meeting_dir / "manifest.json").write_text(json.dumps(manifest)) + + reader = MeetingAudioReader(crypto, meetings_dir) + assert reader.audio_exists(meeting_id) is False + + @pytest.mark.stress + def test_audio_exists_true_when_both_exist( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """audio_exists returns True when both manifest and audio exist.""" + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer = MeetingAudioWriter(crypto, meetings_dir) + writer.open(meeting_id, dek, wrapped_dek) + writer.write_chunk(make_audio()) + writer.close() + + reader = MeetingAudioReader(crypto, meetings_dir) + assert reader.audio_exists(meeting_id) is True + + @pytest.mark.stress + def test_load_audio_raises_without_manifest( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """load_meeting_audio raises FileNotFoundError without manifest.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + (meeting_dir / "audio.enc").write_bytes(FILE_MAGIC + bytes([FILE_VERSION])) + + reader = MeetingAudioReader(crypto, meetings_dir) + with pytest.raises(FileNotFoundError, match="Manifest not found"): + reader.load_meeting_audio(meeting_id) + + +class TestCorruptedCiphertextDetection: + """Test corrupted ciphertext/tag detection.""" + + @pytest.mark.stress + def test_bit_flip_in_ciphertext_detected( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """Single bit flip in ciphertext causes decryption failure.""" + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer = MeetingAudioWriter(crypto, meetings_dir) + writer.open(meeting_id, dek, wrapped_dek) + writer.write_chunk(make_audio(1600)) + writer.close() + + audio_path = meetings_dir / meeting_id / "audio.enc" + data = bytearray(audio_path.read_bytes()) + + header_size = 5 + length_size = 4 + nonce_size = 12 + corrupt_offset = header_size + length_size + nonce_size + 5 + + if len(data) > corrupt_offset: + data[corrupt_offset] ^= 0x01 + audio_path.write_bytes(bytes(data)) + + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, dek) + + with pytest.raises(ValueError, match="Chunk decryption failed"): + list(reader.read_chunks()) + reader.close() + + @pytest.mark.stress + def test_bit_flip_in_tag_detected(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """Bit flip in authentication tag causes decryption failure.""" + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer = MeetingAudioWriter(crypto, meetings_dir) + writer.open(meeting_id, dek, wrapped_dek) + writer.write_chunk(make_audio(1600)) + writer.close() + + audio_path = meetings_dir / meeting_id / "audio.enc" + data = bytearray(audio_path.read_bytes()) + + data[-5] ^= 0x01 + audio_path.write_bytes(bytes(data)) + + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, dek) + + with pytest.raises(ValueError, match="Chunk decryption failed"): + list(reader.read_chunks()) + reader.close() + + @pytest.mark.stress + def test_wrong_dek_detected(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """Using wrong DEK fails decryption.""" + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrong_dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer = MeetingAudioWriter(crypto, meetings_dir) + writer.open(meeting_id, dek, wrapped_dek) + writer.write_chunk(make_audio(1600)) + writer.close() + + audio_path = meetings_dir / meeting_id / "audio.enc" + reader = ChunkedAssetReader(crypto) + reader.open(audio_path, wrong_dek) + + with pytest.raises(ValueError, match="Chunk decryption failed"): + list(reader.read_chunks()) + reader.close() + + +class TestInvalidManifest: + """Test handling of invalid manifest.json content.""" + + @pytest.mark.stress + def test_missing_wrapped_dek_raises(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """Manifest without wrapped_dek raises ValueError.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + manifest = {"meeting_id": meeting_id, "sample_rate": 16000} + (meeting_dir / "manifest.json").write_text(json.dumps(manifest)) + (meeting_dir / "audio.enc").write_bytes(FILE_MAGIC + bytes([FILE_VERSION])) + + reader = MeetingAudioReader(crypto, meetings_dir) + with pytest.raises(ValueError, match="missing wrapped_dek"): + reader.load_meeting_audio(meeting_id) + + @pytest.mark.stress + def test_invalid_wrapped_dek_hex_raises( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """Invalid hex string in wrapped_dek raises ValueError.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + manifest = { + "meeting_id": meeting_id, + "sample_rate": 16000, + "wrapped_dek": "not_valid_hex_!!!", + } + (meeting_dir / "manifest.json").write_text(json.dumps(manifest)) + (meeting_dir / "audio.enc").write_bytes(FILE_MAGIC + bytes([FILE_VERSION])) + + reader = MeetingAudioReader(crypto, meetings_dir) + with pytest.raises(ValueError): + reader.load_meeting_audio(meeting_id) + + @pytest.mark.stress + def test_corrupted_wrapped_dek_raises( + self, crypto: AesGcmCryptoBox, meetings_dir: Path + ) -> None: + """Corrupted wrapped_dek (valid hex but invalid content) raises.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + corrupted = bytearray(wrapped_dek) + corrupted[10] ^= 0xFF + + manifest = { + "meeting_id": meeting_id, + "sample_rate": 16000, + "wrapped_dek": bytes(corrupted).hex(), + } + (meeting_dir / "manifest.json").write_text(json.dumps(manifest)) + (meeting_dir / "audio.enc").write_bytes(FILE_MAGIC + bytes([FILE_VERSION])) + + reader = MeetingAudioReader(crypto, meetings_dir) + with pytest.raises(ValueError, match="unwrap failed"): + reader.load_meeting_audio(meeting_id) + + +class TestWriterReaderRoundTrip: + """Test write-read round trip integrity.""" + + @pytest.mark.stress + def test_single_chunk_roundtrip(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """Single chunk write and read preserves data.""" + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + original_audio = make_audio(1600) + + writer = MeetingAudioWriter(crypto, meetings_dir) + writer.open(meeting_id, dek, wrapped_dek) + writer.write_chunk(original_audio) + writer.close() + + reader = MeetingAudioReader(crypto, meetings_dir) + chunks = reader.load_meeting_audio(meeting_id) + + assert len(chunks) == 1 + np.testing.assert_array_almost_equal(chunks[0].frames, original_audio, decimal=4) + + @pytest.mark.stress + def test_multiple_chunks_roundtrip(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """Multiple chunk write and read preserves data.""" + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + original_chunks = [make_audio(1600) for _ in range(10)] + + writer = MeetingAudioWriter(crypto, meetings_dir) + writer.open(meeting_id, dek, wrapped_dek) + for chunk in original_chunks: + writer.write_chunk(chunk) + writer.close() + + reader = MeetingAudioReader(crypto, meetings_dir) + loaded_chunks = reader.load_meeting_audio(meeting_id) + + assert len(loaded_chunks) == len(original_chunks) + for original, loaded in zip(original_chunks, loaded_chunks, strict=True): + np.testing.assert_array_almost_equal(loaded.frames, original, decimal=4) + + @pytest.mark.stress + @pytest.mark.slow + def test_large_audio_roundtrip(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """Large audio file (1000 chunks) write and read succeeds.""" + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer = MeetingAudioWriter(crypto, meetings_dir) + writer.open(meeting_id, dek, wrapped_dek) + + np.random.seed(42) + chunk_count = 1000 + for _ in range(chunk_count): + writer.write_chunk(make_audio(1600)) + writer.close() + + reader = MeetingAudioReader(crypto, meetings_dir) + chunks = reader.load_meeting_audio(meeting_id) + + assert len(chunks) == chunk_count + total_duration = sum(c.duration for c in chunks) + expected_duration = chunk_count * (1600 / 16000) + assert abs(total_duration - expected_duration) < 0.01 + + +class TestFileVersionHandling: + """Test file version validation.""" + + @pytest.mark.stress + def test_unsupported_version_raises(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """Unsupported file version raises ValueError.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + audio_path = meeting_dir / "audio.enc" + with audio_path.open("wb") as f: + f.write(FILE_MAGIC) + f.write(struct.pack("B", 99)) + + dek = crypto.generate_dek() + reader = ChunkedAssetReader(crypto) + + with pytest.raises(ValueError, match="Unsupported file version"): + reader.open(audio_path, dek) + + @pytest.mark.stress + def test_wrong_magic_raises(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: + """Wrong magic bytes raises ValueError.""" + meeting_id = str(uuid4()) + meeting_dir = meetings_dir / meeting_id + meeting_dir.mkdir(parents=True) + + audio_path = meeting_dir / "audio.enc" + audio_path.write_bytes(b"XXXX" + bytes([FILE_VERSION])) + + dek = crypto.generate_dek() + reader = ChunkedAssetReader(crypto) + + with pytest.raises(ValueError, match="Invalid file format"): + reader.open(audio_path, dek) diff --git a/tests/stress/test_concurrency_stress.py b/tests/stress/test_concurrency_stress.py new file mode 100644 index 0000000..95b91fd --- /dev/null +++ b/tests/stress/test_concurrency_stress.py @@ -0,0 +1,333 @@ +"""Stress tests for NoteFlowServicer concurrent stream handling. + +Detects race conditions when multiple clients stream simultaneously. +Verifies _cleanup_streaming_state prevents memory leaks. +""" + +from __future__ import annotations + +import asyncio +from uuid import uuid4 + +import pytest + +from noteflow.grpc.service import NoteFlowServicer + + +class TestStreamingStateInitialization: + """Test streaming state initialization correctness.""" + + @pytest.mark.stress + def test_init_streaming_state_creates_all_state( + self, memory_servicer: NoteFlowServicer + ) -> None: + """Initialize streaming state creates entries in all state dictionaries.""" + meeting_id = str(uuid4()) + + memory_servicer._init_streaming_state(meeting_id, next_segment_id=0) + + assert meeting_id in memory_servicer._partial_buffers + assert meeting_id in memory_servicer._vad_instances + assert meeting_id in memory_servicer._segmenters + assert meeting_id in memory_servicer._was_speaking + assert meeting_id in memory_servicer._segment_counters + assert meeting_id in memory_servicer._last_partial_time + assert meeting_id in memory_servicer._last_partial_text + assert meeting_id in memory_servicer._diarization_turns + assert meeting_id in memory_servicer._diarization_stream_time + + memory_servicer._cleanup_streaming_state(meeting_id) + + @pytest.mark.stress + def test_init_with_different_segment_ids(self, memory_servicer: NoteFlowServicer) -> None: + """Initialize with different segment IDs sets counter correctly.""" + meeting_id1 = str(uuid4()) + meeting_id2 = str(uuid4()) + + memory_servicer._init_streaming_state(meeting_id1, next_segment_id=0) + memory_servicer._init_streaming_state(meeting_id2, next_segment_id=42) + + assert memory_servicer._segment_counters[meeting_id1] == 0 + assert memory_servicer._segment_counters[meeting_id2] == 42 + + memory_servicer._cleanup_streaming_state(meeting_id1) + memory_servicer._cleanup_streaming_state(meeting_id2) + + +class TestCleanupStreamingState: + """Test _cleanup_streaming_state removes all per-meeting resources.""" + + @pytest.mark.stress + def test_cleanup_removes_all_state_dictionaries( + self, memory_servicer: NoteFlowServicer + ) -> None: + """Verify cleanup removes entries from all state dictionaries.""" + meeting_id = str(uuid4()) + + memory_servicer._init_streaming_state(meeting_id, next_segment_id=0) + memory_servicer._active_streams.add(meeting_id) + + assert meeting_id in memory_servicer._partial_buffers + assert meeting_id in memory_servicer._vad_instances + assert meeting_id in memory_servicer._segmenters + + memory_servicer._cleanup_streaming_state(meeting_id) + memory_servicer._active_streams.discard(meeting_id) + + assert meeting_id not in memory_servicer._partial_buffers + assert meeting_id not in memory_servicer._vad_instances + assert meeting_id not in memory_servicer._segmenters + assert meeting_id not in memory_servicer._was_speaking + assert meeting_id not in memory_servicer._segment_counters + assert meeting_id not in memory_servicer._stream_formats + assert meeting_id not in memory_servicer._last_partial_time + assert meeting_id not in memory_servicer._last_partial_text + assert meeting_id not in memory_servicer._diarization_turns + assert meeting_id not in memory_servicer._diarization_stream_time + assert meeting_id not in memory_servicer._diarization_streaming_failed + assert meeting_id not in memory_servicer._active_streams + + @pytest.mark.stress + def test_cleanup_idempotent(self, memory_servicer: NoteFlowServicer) -> None: + """Cleanup is idempotent - multiple calls don't raise.""" + meeting_id = str(uuid4()) + + memory_servicer._init_streaming_state(meeting_id, next_segment_id=0) + + memory_servicer._cleanup_streaming_state(meeting_id) + memory_servicer._cleanup_streaming_state(meeting_id) + memory_servicer._cleanup_streaming_state(meeting_id) + + @pytest.mark.stress + def test_cleanup_nonexistent_meeting_no_error(self, memory_servicer: NoteFlowServicer) -> None: + """Cleanup of non-existent meeting ID doesn't raise.""" + nonexistent_id = str(uuid4()) + + memory_servicer._cleanup_streaming_state(nonexistent_id) + + +class TestConcurrentStreamInitialization: + """Test concurrent stream initialization.""" + + @pytest.mark.stress + @pytest.mark.asyncio + async def test_concurrent_init_different_meetings( + self, memory_servicer: NoteFlowServicer + ) -> None: + """Multiple concurrent init calls for different meetings succeed.""" + meeting_ids = [str(uuid4()) for _ in range(20)] + + async def init_meeting(meeting_id: str, segment_id: int) -> None: + await asyncio.sleep(0.001) + memory_servicer._init_streaming_state(meeting_id, segment_id) + + tasks = [asyncio.create_task(init_meeting(mid, idx)) for idx, mid in enumerate(meeting_ids)] + await asyncio.gather(*tasks) + + assert len(memory_servicer._vad_instances) == len(meeting_ids) + assert len(memory_servicer._segmenters) == len(meeting_ids) + assert len(memory_servicer._partial_buffers) == len(meeting_ids) + + for mid in meeting_ids: + memory_servicer._cleanup_streaming_state(mid) + + @pytest.mark.stress + @pytest.mark.asyncio + async def test_concurrent_cleanup_different_meetings( + self, memory_servicer: NoteFlowServicer + ) -> None: + """Multiple concurrent cleanup calls for different meetings succeed.""" + meeting_ids = [str(uuid4()) for _ in range(20)] + + for idx, mid in enumerate(meeting_ids): + memory_servicer._init_streaming_state(mid, idx) + + async def cleanup_meeting(meeting_id: str) -> None: + await asyncio.sleep(0.001) + memory_servicer._cleanup_streaming_state(meeting_id) + + tasks = [asyncio.create_task(cleanup_meeting(mid)) for mid in meeting_ids] + await asyncio.gather(*tasks) + + assert len(memory_servicer._vad_instances) == 0 + assert len(memory_servicer._segmenters) == 0 + assert len(memory_servicer._partial_buffers) == 0 + + +class TestNoMemoryLeaksUnderLoad: + """Test no memory leaks after many stream cycles.""" + + @pytest.mark.stress + def test_stream_cycles_cleanup_completely(self, memory_servicer: NoteFlowServicer) -> None: + """Many init/cleanup cycles leave no leaked state.""" + for _ in range(100): + meeting_id = str(uuid4()) + memory_servicer._init_streaming_state(meeting_id, next_segment_id=0) + memory_servicer._active_streams.add(meeting_id) + memory_servicer._cleanup_streaming_state(meeting_id) + memory_servicer._active_streams.discard(meeting_id) + + assert len(memory_servicer._active_streams) == 0 + assert len(memory_servicer._partial_buffers) == 0 + assert len(memory_servicer._vad_instances) == 0 + assert len(memory_servicer._segmenters) == 0 + assert len(memory_servicer._was_speaking) == 0 + assert len(memory_servicer._segment_counters) == 0 + assert len(memory_servicer._last_partial_time) == 0 + assert len(memory_servicer._last_partial_text) == 0 + assert len(memory_servicer._diarization_turns) == 0 + assert len(memory_servicer._diarization_stream_time) == 0 + assert len(memory_servicer._diarization_streaming_failed) == 0 + + @pytest.mark.stress + @pytest.mark.slow + def test_many_concurrent_meetings_no_leak(self, memory_servicer: NoteFlowServicer) -> None: + """Many meetings initialized then cleaned up leave no state.""" + meeting_ids = [str(uuid4()) for _ in range(500)] + + for idx, mid in enumerate(meeting_ids): + memory_servicer._init_streaming_state(mid, idx) + memory_servicer._active_streams.add(mid) + + assert len(memory_servicer._vad_instances) == 500 + assert len(memory_servicer._segmenters) == 500 + + for mid in meeting_ids: + memory_servicer._cleanup_streaming_state(mid) + memory_servicer._active_streams.discard(mid) + + assert len(memory_servicer._active_streams) == 0 + assert len(memory_servicer._vad_instances) == 0 + assert len(memory_servicer._segmenters) == 0 + + @pytest.mark.stress + @pytest.mark.asyncio + async def test_interleaved_init_cleanup(self, memory_servicer: NoteFlowServicer) -> None: + """Interleaved init and cleanup doesn't leak or corrupt.""" + for _ in range(50): + meeting_ids = [str(uuid4()) for _ in range(10)] + + for idx, mid in enumerate(meeting_ids): + memory_servicer._init_streaming_state(mid, idx) + + for mid in meeting_ids[:5]: + memory_servicer._cleanup_streaming_state(mid) + + for mid in meeting_ids[5:]: + memory_servicer._cleanup_streaming_state(mid) + + assert len(memory_servicer._vad_instances) == 0 + assert len(memory_servicer._segmenters) == 0 + + +class TestActiveStreamsTracking: + """Test _active_streams set behavior.""" + + @pytest.mark.stress + def test_active_streams_tracks_active_meetings(self, memory_servicer: NoteFlowServicer) -> None: + """_active_streams correctly tracks active meeting IDs.""" + meeting_ids = [str(uuid4()) for _ in range(5)] + + for mid in meeting_ids: + memory_servicer._active_streams.add(mid) + + assert len(memory_servicer._active_streams) == 5 + for mid in meeting_ids: + assert mid in memory_servicer._active_streams + + for mid in meeting_ids: + memory_servicer._active_streams.discard(mid) + + assert len(memory_servicer._active_streams) == 0 + + @pytest.mark.stress + def test_discard_nonexistent_no_error(self, memory_servicer: NoteFlowServicer) -> None: + """Discarding non-existent meeting ID doesn't raise.""" + nonexistent = str(uuid4()) + memory_servicer._active_streams.discard(nonexistent) + + +class TestDiarizationStateCleanup: + """Test diarization-related state cleanup.""" + + @pytest.mark.stress + def test_diarization_failed_set_cleaned(self, memory_servicer: NoteFlowServicer) -> None: + """_diarization_streaming_failed set is cleaned on cleanup.""" + meeting_id = str(uuid4()) + + memory_servicer._init_streaming_state(meeting_id, 0) + memory_servicer._diarization_streaming_failed.add(meeting_id) + + assert meeting_id in memory_servicer._diarization_streaming_failed + + memory_servicer._cleanup_streaming_state(meeting_id) + + assert meeting_id not in memory_servicer._diarization_streaming_failed + + @pytest.mark.stress + def test_diarization_turns_cleaned(self, memory_servicer: NoteFlowServicer) -> None: + """_diarization_turns dict is cleaned on cleanup.""" + meeting_id = str(uuid4()) + + memory_servicer._init_streaming_state(meeting_id, 0) + + assert meeting_id in memory_servicer._diarization_turns + assert memory_servicer._diarization_turns[meeting_id] == [] + + memory_servicer._cleanup_streaming_state(meeting_id) + + assert meeting_id not in memory_servicer._diarization_turns + + +class TestServicerInstantiation: + """Test NoteFlowServicer instantiation patterns.""" + + @pytest.mark.stress + def test_servicer_starts_with_empty_state(self) -> None: + """New servicer has empty state dictionaries.""" + servicer = NoteFlowServicer() + + assert len(servicer._active_streams) == 0 + assert len(servicer._partial_buffers) == 0 + assert len(servicer._vad_instances) == 0 + assert len(servicer._segmenters) == 0 + assert len(servicer._was_speaking) == 0 + assert len(servicer._segment_counters) == 0 + assert len(servicer._audio_writers) == 0 + + @pytest.mark.stress + def test_multiple_servicers_independent(self) -> None: + """Multiple servicer instances have independent state.""" + servicer1 = NoteFlowServicer() + servicer2 = NoteFlowServicer() + + meeting_id = str(uuid4()) + servicer1._init_streaming_state(meeting_id, 0) + + assert meeting_id in servicer1._vad_instances + assert meeting_id not in servicer2._vad_instances + + servicer1._cleanup_streaming_state(meeting_id) + + +class TestMemoryStoreAccess: + """Test memory store access patterns.""" + + @pytest.mark.stress + def test_get_memory_store_returns_store(self, memory_servicer: NoteFlowServicer) -> None: + """_get_memory_store returns MeetingStore when configured.""" + store = memory_servicer._get_memory_store() + assert store is not None + + @pytest.mark.stress + def test_memory_store_create_meeting(self, memory_servicer: NoteFlowServicer) -> None: + """Memory store can create and retrieve meetings.""" + store = memory_servicer._get_memory_store() + + meeting = store.create(title="Test Meeting") + assert meeting is not None + assert meeting.title == "Test Meeting" + + retrieved = store.get(str(meeting.id)) + assert retrieved is not None + assert retrieved.title == "Test Meeting" diff --git a/tests/stress/test_segmenter_fuzz.py b/tests/stress/test_segmenter_fuzz.py new file mode 100644 index 0000000..c46b7ae --- /dev/null +++ b/tests/stress/test_segmenter_fuzz.py @@ -0,0 +1,538 @@ +"""Fuzz tests for Segmenter state machine. + +Tests edge cases with rapid VAD transitions and random input sequences. +Verifies invariants hold under stress conditions. +""" + +from __future__ import annotations + +import random + +import numpy as np +import pytest +from numpy.typing import NDArray + +from noteflow.infrastructure.asr.segmenter import ( + AudioSegment, + Segmenter, + SegmenterConfig, + SegmenterState, +) + + +def make_audio(duration: float, sample_rate: int = 16000) -> NDArray[np.float32]: + """Create test audio of specified duration with random values.""" + samples = int(duration * sample_rate) + return np.random.uniform(-1.0, 1.0, samples).astype(np.float32) + + +def make_silence(duration: float, sample_rate: int = 16000) -> NDArray[np.float32]: + """Create silent audio of specified duration.""" + samples = int(duration * sample_rate) + return np.zeros(samples, dtype=np.float32) + + +class TestSegmenterInvariants: + """Verify segmenter invariants hold under various inputs.""" + + @pytest.mark.stress + @pytest.mark.parametrize("sample_rate", [16000, 44100, 48000]) + def test_segment_duration_positive(self, sample_rate: int) -> None: + """All emitted segments have positive duration.""" + config = SegmenterConfig( + sample_rate=sample_rate, + min_speech_duration=0.0, + trailing_silence=0.1, + ) + segmenter = Segmenter(config=config) + + random.seed(42) + segments: list[AudioSegment] = [] + + for _ in range(100): + audio = make_audio(0.1, sample_rate) + is_speech = random.random() > 0.5 + segments.extend(segmenter.process_audio(audio, is_speech)) + + if final := segmenter.flush(): + segments.append(final) + + for seg in segments: + assert seg.duration > 0, f"Segment duration must be positive: {seg.duration}" + assert seg.end_time > seg.start_time + + @pytest.mark.stress + def test_segment_audio_length_matches_duration(self) -> None: + """Segment audio length matches (end_time - start_time) * sample_rate.""" + sample_rate = 16000 + config = SegmenterConfig( + sample_rate=sample_rate, + min_speech_duration=0.0, + trailing_silence=0.1, + leading_buffer=0.0, + ) + segmenter = Segmenter(config=config) + + speech = make_audio(0.5, sample_rate) + silence = make_silence(0.2, sample_rate) + + list(segmenter.process_audio(speech, is_speech=True)) + segments = list(segmenter.process_audio(silence, is_speech=False)) + + for seg in segments: + expected_samples = int(seg.duration * sample_rate) + actual_samples = len(seg.audio) + assert abs(actual_samples - expected_samples) <= 1, ( + f"Audio length {actual_samples} != expected {expected_samples}" + ) + + @pytest.mark.stress + def test_segments_strictly_sequential(self) -> None: + """Emitted segments have non-overlapping, sequential time ranges.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.05, + ) + segmenter = Segmenter(config=config) + + random.seed(123) + segments: list[AudioSegment] = [] + + for _ in range(50): + audio = make_audio(0.05) + is_speech = random.random() > 0.3 + segments.extend(segmenter.process_audio(audio, is_speech)) + + if final := segmenter.flush(): + segments.append(final) + + for i in range(1, len(segments)): + prev_end = segments[i - 1].end_time + curr_start = segments[i].start_time + assert curr_start >= prev_end, ( + f"Segment overlap: prev_end={prev_end}, curr_start={curr_start}" + ) + + @pytest.mark.stress + def test_all_segments_have_audio(self) -> None: + """All emitted segments contain audio data.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.1, + ) + segmenter = Segmenter(config=config) + + random.seed(456) + segments: list[AudioSegment] = [] + + for _ in range(100): + audio = make_audio(0.05) + is_speech = random.random() > 0.4 + segments.extend(segmenter.process_audio(audio, is_speech)) + + if final := segmenter.flush(): + segments.append(final) + + for seg in segments: + assert len(seg.audio) > 0, "Segment must contain audio data" + + +class TestRapidVadTransitions: + """Test rapid VAD state transitions (chattering).""" + + @pytest.mark.stress + def test_rapid_speech_silence_alternation(self) -> None: + """Rapid alternation between speech and silence doesn't crash.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.05, + ) + segmenter = Segmenter(config=config) + + for i in range(1000): + audio = make_silence(0.01) # 10ms at 16kHz + is_speech = i % 2 == 0 + list(segmenter.process_audio(audio, is_speech)) + + assert segmenter.state in ( + SegmenterState.IDLE, + SegmenterState.SPEECH, + SegmenterState.TRAILING, + ) + + @pytest.mark.stress + def test_single_sample_chunks(self) -> None: + """Processing single-sample chunks doesn't crash or produce invalid segments.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.01, + ) + segmenter = Segmenter(config=config) + + random.seed(789) + segments: list[AudioSegment] = [] + + for i in range(1000): + audio = np.array([random.uniform(-1, 1)], dtype=np.float32) + is_speech = i % 10 < 5 + segments.extend(segmenter.process_audio(audio, is_speech)) + + for seg in segments: + assert seg.duration >= 0 + assert len(seg.audio) > 0 + + @pytest.mark.stress + def test_very_short_speech_bursts(self) -> None: + """Very short speech bursts are handled correctly.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.02, + ) + segmenter = Segmenter(config=config) + + segments: list[AudioSegment] = [] + + for _ in range(100): + speech = make_audio(0.01) + silence = make_silence(0.05) + + segments.extend(segmenter.process_audio(speech, is_speech=True)) + segments.extend(segmenter.process_audio(silence, is_speech=False)) + + if final := segmenter.flush(): + segments.append(final) + + for seg in segments: + assert seg.duration > 0 + assert seg.end_time > seg.start_time + + +class TestEdgeCaseConfigurations: + """Test edge case segmenter configurations.""" + + @pytest.mark.stress + @pytest.mark.parametrize("min_speech", [0.0, 0.001, 0.01, 0.1, 1.0]) + def test_various_min_speech_durations(self, min_speech: float) -> None: + """Various min_speech_duration values work correctly.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=min_speech, + trailing_silence=0.1, + ) + segmenter = Segmenter(config=config) + + speech = make_audio(1.0) + silence = make_silence(0.2) + + segments_speech = list(segmenter.process_audio(speech, is_speech=True)) + segments_silence = list(segmenter.process_audio(silence, is_speech=False)) + + all_segments = segments_speech + segments_silence + + for seg in all_segments: + assert seg.duration > 0, f"Segment duration must be positive: {seg.duration}" + + @pytest.mark.stress + def test_zero_trailing_silence(self) -> None: + """Zero trailing_silence immediately emits on silence.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.0, + ) + segmenter = Segmenter(config=config) + + speech = make_audio(0.1) + silence = make_silence(0.01) + + list(segmenter.process_audio(speech, is_speech=True)) + segments = list(segmenter.process_audio(silence, is_speech=False)) + + assert len(segments) == 1 + + @pytest.mark.stress + def test_max_duration_forced_split(self) -> None: + """Segments are force-split at max_segment_duration.""" + config = SegmenterConfig( + sample_rate=16000, + max_segment_duration=0.5, + min_speech_duration=0.0, + ) + segmenter = Segmenter(config=config) + + segments: list[AudioSegment] = [] + + for _ in range(20): + audio = make_audio(0.1) + segments.extend(segmenter.process_audio(audio, is_speech=True)) + + assert len(segments) >= 3, f"Expected at least 3 splits, got {len(segments)}" + + for seg in segments: + assert seg.duration <= config.max_segment_duration + 0.2 + + @pytest.mark.stress + def test_zero_leading_buffer(self) -> None: + """Zero leading_buffer doesn't include pre-speech audio.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.1, + leading_buffer=0.0, + ) + segmenter = Segmenter(config=config) + + silence = make_silence(0.5) + speech = make_audio(0.3) + more_silence = make_silence(0.2) + + list(segmenter.process_audio(silence, is_speech=False)) + list(segmenter.process_audio(speech, is_speech=True)) + segments = list(segmenter.process_audio(more_silence, is_speech=False)) + + assert len(segments) == 1 + seg = segments[0] + expected_duration = 0.3 + 0.2 + assert abs(seg.duration - expected_duration) < 0.05 + + @pytest.mark.stress + @pytest.mark.parametrize("leading_buffer", [0.0, 0.1, 0.2, 0.5, 1.0]) + def test_various_leading_buffers(self, leading_buffer: float) -> None: + """Various leading_buffer values work correctly.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.1, + leading_buffer=leading_buffer, + ) + segmenter = Segmenter(config=config) + + silence = make_silence(0.5) + speech = make_audio(0.3) + more_silence = make_silence(0.2) + + list(segmenter.process_audio(silence, is_speech=False)) + list(segmenter.process_audio(speech, is_speech=True)) + if segments := list( + segmenter.process_audio(more_silence, is_speech=False) + ): + seg = segments[0] + assert seg.duration > 0 + + +class TestStateTransitions: + """Test specific state transition scenarios.""" + + @pytest.mark.stress + def test_idle_to_speech_to_idle(self) -> None: + """IDLE -> SPEECH -> IDLE transition.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.1, + ) + segmenter = Segmenter(config=config) + + assert segmenter.state == SegmenterState.IDLE + + speech = make_audio(0.2) + list(segmenter.process_audio(speech, is_speech=True)) + assert segmenter.state == SegmenterState.SPEECH + + silence = make_silence(0.2) + list(segmenter.process_audio(silence, is_speech=False)) + assert segmenter.state == SegmenterState.IDLE + + @pytest.mark.stress + def test_trailing_back_to_speech(self) -> None: + """TRAILING -> SPEECH transition when speech resumes.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.5, + ) + segmenter = Segmenter(config=config) + + speech = make_audio(0.2) + list(segmenter.process_audio(speech, is_speech=True)) + + short_silence = make_silence(0.1) + list(segmenter.process_audio(short_silence, is_speech=False)) + assert segmenter.state == SegmenterState.TRAILING + + more_speech = make_audio(0.2) + list(segmenter.process_audio(more_speech, is_speech=True)) + assert segmenter.state == SegmenterState.SPEECH + + @pytest.mark.stress + def test_flush_from_speech_state(self) -> None: + """Flush from SPEECH state emits segment.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + ) + segmenter = Segmenter(config=config) + + speech = make_audio(0.3) + list(segmenter.process_audio(speech, is_speech=True)) + assert segmenter.state == SegmenterState.SPEECH + + segment = segmenter.flush() + assert segment is not None + assert segment.duration > 0 + assert segmenter.state == SegmenterState.IDLE + + @pytest.mark.stress + def test_flush_from_trailing_state(self) -> None: + """Flush from TRAILING state emits segment.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=1.0, + ) + segmenter = Segmenter(config=config) + + speech = make_audio(0.3) + list(segmenter.process_audio(speech, is_speech=True)) + + silence = make_silence(0.1) + list(segmenter.process_audio(silence, is_speech=False)) + assert segmenter.state == SegmenterState.TRAILING + + segment = segmenter.flush() + assert segment is not None + assert segment.duration > 0 + assert segmenter.state == SegmenterState.IDLE + + @pytest.mark.stress + def test_flush_from_idle_returns_none(self) -> None: + """Flush from IDLE state returns None.""" + config = SegmenterConfig(sample_rate=16000) + segmenter = Segmenter(config=config) + + assert segmenter.state == SegmenterState.IDLE + segment = segmenter.flush() + assert segment is None + + +class TestFuzzRandomPatterns: + """Fuzz testing with random VAD patterns.""" + + @pytest.mark.stress + @pytest.mark.slow + def test_random_vad_patterns_1000_iterations(self) -> None: + """Run 1000 random VAD pattern iterations.""" + for seed in range(1000): + random.seed(seed) + np.random.seed(seed) + + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=random.uniform(0, 0.5), + max_segment_duration=random.uniform(1, 10), + trailing_silence=random.uniform(0.05, 0.5), + leading_buffer=random.uniform(0, 0.3), + ) + segmenter = Segmenter(config=config) + + segments: list[AudioSegment] = [] + + for _ in range(random.randint(10, 100)): + duration = random.uniform(0.01, 0.5) + audio = make_audio(duration) + is_speech = random.random() > 0.4 + segments.extend(segmenter.process_audio(audio, is_speech)) + + if final := segmenter.flush(): + segments.append(final) + + for seg in segments: + assert seg.duration > 0, f"Seed {seed}: duration must be positive" + assert seg.end_time > seg.start_time, f"Seed {seed}: end > start" + assert len(seg.audio) > 0, f"Seed {seed}: audio must exist" + + @pytest.mark.stress + def test_deterministic_with_same_seed(self) -> None: + """Same random seed produces same segments.""" + + def run_with_seed(seed: int) -> list[tuple[float, float]]: + random.seed(seed) + np.random.seed(seed) + + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.1, + ) + segmenter = Segmenter(config=config) + segments: list[AudioSegment] = [] + + for _ in range(50): + duration = random.uniform(0.05, 0.2) + audio = make_audio(duration) + is_speech = random.random() > 0.5 + segments.extend(segmenter.process_audio(audio, is_speech)) + + if final := segmenter.flush(): + segments.append(final) + + return [(s.start_time, s.end_time) for s in segments] + + result1 = run_with_seed(999) + result2 = run_with_seed(999) + + assert result1 == result2 + + +class TestResetBehavior: + """Test reset functionality.""" + + @pytest.mark.stress + def test_reset_clears_all_state(self) -> None: + """Reset clears all internal state.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.5, + ) + segmenter = Segmenter(config=config) + + speech = make_audio(0.5) + list(segmenter.process_audio(speech, is_speech=True)) + + silence = make_silence(0.1) + list(segmenter.process_audio(silence, is_speech=False)) + + segmenter.reset() + + assert segmenter.state == SegmenterState.IDLE + + @pytest.mark.stress + def test_reset_allows_fresh_processing(self) -> None: + """After reset, segmenter works from fresh state.""" + config = SegmenterConfig( + sample_rate=16000, + min_speech_duration=0.0, + trailing_silence=0.1, + ) + segmenter = Segmenter(config=config) + + speech1 = make_audio(0.3) + list(segmenter.process_audio(speech1, is_speech=True)) + silence1 = make_silence(0.2) + segments1 = list(segmenter.process_audio(silence1, is_speech=False)) + + segmenter.reset() + + speech2 = make_audio(0.3) + list(segmenter.process_audio(speech2, is_speech=True)) + silence2 = make_silence(0.2) + segments2 = list(segmenter.process_audio(silence2, is_speech=False)) + + assert len(segments1) == len(segments2) == 1 + assert segments2[0].start_time == 0.0 diff --git a/tests/stress/test_transaction_boundaries.py b/tests/stress/test_transaction_boundaries.py new file mode 100644 index 0000000..5e0d303 --- /dev/null +++ b/tests/stress/test_transaction_boundaries.py @@ -0,0 +1,376 @@ +"""Tests for SqlAlchemyUnitOfWork transaction boundaries and rollback behavior. + +Verifies rollback works correctly when operations fail mid-transaction. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from noteflow.domain.entities.meeting import Meeting +from noteflow.domain.entities.segment import Segment +from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +pytestmark = [pytest.mark.integration, pytest.mark.stress] + + +class TestExceptionRollback: + """Test automatic rollback on exception.""" + + @pytest.mark.asyncio + async def test_exception_during_context_rolls_back( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Exception in context manager triggers automatic rollback.""" + meeting = Meeting.create(title="Rollback Test") + + with pytest.raises(RuntimeError): + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + raise RuntimeError("Simulated failure") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.meetings.get(meeting.id) + assert result is None + + @pytest.mark.asyncio + async def test_rollback_after_multiple_operations( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Rollback after multiple operations reverts all changes.""" + meeting = Meeting.create(title="Multi-op Rollback") + + with pytest.raises(ValueError): + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + + segment = Segment( + segment_id=0, + text="Test segment", + start_time=0.0, + end_time=1.0, + meeting_id=meeting.id, + ) + await uow.segments.add(meeting.id, segment) + + raise ValueError("Simulated batch failure") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.meetings.get(meeting.id) + assert result is None + + @pytest.mark.asyncio + async def test_exception_type_does_not_matter( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Any exception type triggers rollback.""" + meeting = Meeting.create(title="Exception Type Test") + + class CustomError(Exception): + pass + + with pytest.raises(CustomError): + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + raise CustomError("Custom error") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.meetings.get(meeting.id) + assert result is None + + +class TestExplicitRollback: + """Test explicit rollback behavior.""" + + @pytest.mark.asyncio + async def test_explicit_rollback_reverts_changes( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Explicit rollback() call reverts uncommitted changes.""" + meeting = Meeting.create(title="Explicit Rollback") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + await uow.rollback() + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.meetings.get(meeting.id) + assert result is None + + @pytest.mark.asyncio + async def test_commit_after_rollback_is_no_op( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Commit after rollback doesn't resurrect rolled-back data.""" + meeting = Meeting.create(title="Commit After Rollback") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + await uow.rollback() + await uow.commit() + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.meetings.get(meeting.id) + assert result is None + + +class TestCommitPersistence: + """Test that committed data persists.""" + + @pytest.mark.asyncio + async def test_committed_data_visible_in_new_uow( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Data committed in one UoW is visible in subsequent UoW.""" + meeting = Meeting.create(title="Visibility Test") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + await uow.commit() + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.meetings.get(meeting.id) + assert result is not None + assert result.title == "Visibility Test" + + @pytest.mark.asyncio + async def test_committed_meeting_and_segment( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Committed meeting and segment both persist.""" + meeting = Meeting.create(title="Meeting With Segment") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + await uow.commit() + + segment = Segment( + segment_id=0, + text="Test segment text", + start_time=0.0, + end_time=1.5, + meeting_id=meeting.id, + ) + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.segments.add(meeting.id, segment) + await uow.commit() + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + segments = await uow.segments.get_by_meeting(meeting.id) + assert len(segments) == 1 + assert segments[0].text == "Test segment text" + + +class TestBatchOperationRollback: + """Test rollback behavior with batch operations.""" + + @pytest.mark.asyncio + async def test_batch_segment_add_rollback( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Batch segment operations are fully rolled back on failure.""" + meeting = Meeting.create(title="Batch Rollback Test") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + await uow.commit() + + segments = [ + Segment( + segment_id=i, + text=f"Segment {i}", + start_time=float(i), + end_time=float(i + 1), + meeting_id=meeting.id, + ) + for i in range(10) + ] + + with pytest.raises(RuntimeError): + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.segments.add_batch(meeting.id, segments) + raise RuntimeError("Batch failure") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.segments.get_by_meeting(meeting.id) + assert len(result) == 0 + + @pytest.mark.asyncio + async def test_partial_batch_no_partial_persist( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Failure mid-batch doesn't leave partial data.""" + meeting = Meeting.create(title="Partial Batch Test") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + await uow.commit() + + with pytest.raises(ValueError): + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + for i in range(5): + segment = Segment( + segment_id=i, + text=f"Segment {i}", + start_time=float(i), + end_time=float(i + 1), + meeting_id=meeting.id, + ) + await uow.segments.add(meeting.id, segment) + + raise ValueError("Mid-batch failure") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.segments.get_by_meeting(meeting.id) + assert len(result) == 0 + + +class TestIsolation: + """Test transaction isolation between UoW instances.""" + + @pytest.mark.asyncio + async def test_uncommitted_data_not_visible_externally( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Uncommitted data in one UoW not visible in another.""" + meeting = Meeting.create(title="Isolation Test") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow1: + await uow1.meetings.create(meeting) + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow2: + result = await uow2.meetings.get(meeting.id) + assert result is None + + @pytest.mark.asyncio + async def test_independent_uow_transactions( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Two UoW instances have independent transactions.""" + meeting1 = Meeting.create(title="Meeting 1") + meeting2 = Meeting.create(title="Meeting 2") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow1: + await uow1.meetings.create(meeting1) + await uow1.commit() + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow2: + await uow2.meetings.create(meeting2) + await uow2.rollback() + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result1 = await uow.meetings.get(meeting1.id) + result2 = await uow.meetings.get(meeting2.id) + + assert result1 is not None + assert result2 is None + + +class TestMeetingStateRollback: + """Test rollback on meeting state changes.""" + + @pytest.mark.asyncio + async def test_meeting_state_change_rollback( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Meeting state changes are rolled back on failure.""" + meeting = Meeting.create(title="State Rollback") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + await uow.meetings.create(meeting) + await uow.commit() + + original_state = meeting.state + + with pytest.raises(ValueError): + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + m = await uow.meetings.get(meeting.id) + assert m is not None + m.start_recording() + await uow.meetings.update(m) + raise ValueError("Business logic failure") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + result = await uow.meetings.get(meeting.id) + assert result is not None + assert result.state == original_state + + +class TestRepositoryContextRequirement: + """Test that repositories require UoW context.""" + + @pytest.mark.asyncio + async def test_repo_access_outside_context_raises( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Accessing repository outside context raises RuntimeError.""" + uow = SqlAlchemyUnitOfWork(postgres_session_factory) + + with pytest.raises(RuntimeError, match="UnitOfWork not in context"): + _ = uow.meetings + + @pytest.mark.asyncio + async def test_commit_outside_context_raises( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Calling commit outside context raises RuntimeError.""" + uow = SqlAlchemyUnitOfWork(postgres_session_factory) + + with pytest.raises(RuntimeError, match="UnitOfWork not in context"): + await uow.commit() + + @pytest.mark.asyncio + async def test_rollback_outside_context_raises( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Calling rollback outside context raises RuntimeError.""" + uow = SqlAlchemyUnitOfWork(postgres_session_factory) + + with pytest.raises(RuntimeError, match="UnitOfWork not in context"): + await uow.rollback() + + +class TestMultipleMeetingOperations: + """Test transactions spanning multiple meetings.""" + + @pytest.mark.asyncio + async def test_multiple_meetings_atomic( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Multiple meeting creates are atomic.""" + meetings = [Meeting.create(title=f"Meeting {i}") for i in range(5)] + + with pytest.raises(RuntimeError): + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + for meeting in meetings: + await uow.meetings.create(meeting) + raise RuntimeError("All or nothing") + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + for meeting in meetings: + result = await uow.meetings.get(meeting.id) + assert result is None + + @pytest.mark.asyncio + async def test_multiple_meetings_commit_all( + self, postgres_session_factory: async_sessionmaker[AsyncSession] + ) -> None: + """Multiple meetings commit together.""" + meetings = [Meeting.create(title=f"Meeting {i}") for i in range(5)] + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + for meeting in meetings: + await uow.meetings.create(meeting) + await uow.commit() + + async with SqlAlchemyUnitOfWork(postgres_session_factory) as uow: + for meeting in meetings: + result = await uow.meetings.get(meeting.id) + assert result is not None + assert meeting.title in result.title diff --git a/uv.lock b/uv.lock index bb53027..7f5c8d2 100644 --- a/uv.lock +++ b/uv.lock @@ -1890,7 +1890,6 @@ dependencies = [ { name = "alembic" }, { name = "asyncpg" }, { name = "cryptography" }, - { name = "diart" }, { name = "faster-whisper" }, { name = "flet" }, { name = "grpcio" }, @@ -1936,6 +1935,7 @@ triggers = [ [package.dev-dependencies] dev = [ { name = "ruff" }, + { name = "watchfiles" }, ] [package.metadata] @@ -1945,8 +1945,7 @@ requires-dist = [ { name = "asyncpg", specifier = ">=0.29" }, { name = "basedpyright", marker = "extra == 'dev'", specifier = ">=1.18" }, { name = "cryptography", specifier = ">=42.0" }, - { name = "diart", specifier = ">=0.9.2" }, - { name = "diart", marker = "extra == 'diarization'", specifier = ">=0.9" }, + { name = "diart", marker = "extra == 'diarization'", specifier = ">=0.9.2" }, { name = "faster-whisper", specifier = ">=1.0" }, { name = "flet", specifier = ">=0.21" }, { name = "grpcio", specifier = ">=1.60" }, @@ -1978,7 +1977,10 @@ requires-dist = [ provides-extras = ["dev", "triggers", "summarization", "diarization"] [package.metadata.requires-dev] -dev = [{ name = "ruff", specifier = ">=0.14.9" }] +dev = [ + { name = "ruff", specifier = ">=0.14.9" }, + { name = "watchfiles", specifier = ">=1.1.1" }, +] [[package]] name = "numpy" @@ -6488,6 +6490,76 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/b9/4095b668ea3678bf6a0af005527f39de12fb026516fb3df17495a733b7f8/urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd", size = 131182, upload-time = "2025-12-11T15:56:38.584Z" }, ] +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" }, + { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" }, + { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816, upload-time = "2025-10-14T15:04:52.031Z" }, + { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186, upload-time = "2025-10-14T15:04:53.064Z" }, + { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" }, + { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" }, + { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" }, + { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" }, + { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" }, + { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, +] + [[package]] name = "websocket-client" version = "1.9.0"