From a1fc7edeea3061ce7e15dee5b1f1a607358df4f2 Mon Sep 17 00:00:00 2001 From: Travis Vasceannie Date: Fri, 19 Dec 2025 10:40:21 +0000 Subject: [PATCH] Enhance recovery and summarization services with asset path management - Added `asset_path` to the `Meeting` entity for audio asset storage. - Implemented `AudioValidationResult` for audio integrity checks during recovery. - Updated `RecoveryService` to validate audio file integrity for crashed meetings. - Enhanced `SummarizationService` to include consent persistence callbacks. - Introduced new database migrations for `diarization_jobs` and `user_preferences` tables. - Refactored various components to support the new asset path and audio validation features. - Improved documentation in `CLAUDE.md` to reflect changes in recovery and summarization functionalities. --- CLAUDE.md | 161 +- docs/triage.md | 363 +- repomix-output.md | 28577 ++++------------ repomix.config.json | 6 +- scripts/dev_watch_server.py | 4 +- .../application/services/recovery_service.py | 183 +- .../services/summarization_service.py | 12 +- src/noteflow/cli/retention.py | 2 - .../client/components/_thread_mixin.py | 2 - .../client/components/meeting_library.py | 5 +- src/noteflow/client/components/vu_meter.py | 14 +- src/noteflow/client/state.py | 2 - src/noteflow/config/constants.py | 9 +- src/noteflow/config/settings.py | 2 - src/noteflow/domain/entities/meeting.py | 5 + src/noteflow/domain/triggers/entities.py | 2 - src/noteflow/grpc/_mixins/diarization.py | 308 +- src/noteflow/grpc/_mixins/meeting.py | 2 + src/noteflow/grpc/_mixins/protocols.py | 8 + src/noteflow/grpc/_mixins/streaming.py | 40 +- src/noteflow/grpc/client.py | 13 +- src/noteflow/grpc/server.py | 42 +- src/noteflow/grpc/service.py | 48 +- src/noteflow/infrastructure/asr/dto.py | 2 - src/noteflow/infrastructure/audio/dto.py | 2 - src/noteflow/infrastructure/audio/levels.py | 2 - src/noteflow/infrastructure/audio/reader.py | 26 +- src/noteflow/infrastructure/audio/writer.py | 156 +- .../converters/orm_converters.py | 1 + .../infrastructure/diarization/assigner.py | 2 - .../infrastructure/diarization/dto.py | 2 - .../infrastructure/export/_formatting.py | 2 - .../infrastructure/persistence/database.py | 16 - .../persistence/migrations/env.py | 2 - ...d8e5f6a7b2c3_add_diarization_jobs_table.py | 80 + ...e9f0a1b2c3d4_add_asset_path_to_meetings.py | 45 + ...f0a1b2c3d4e5_add_user_preferences_table.py | 54 + ...3d4e5f6_add_streaming_diarization_turns.py | 62 + .../infrastructure/persistence/models.py | 92 + .../persistence/repositories/__init__.py | 10 + .../repositories/diarization_job_repo.py | 282 + .../persistence/repositories/meeting_repo.py | 4 +- .../repositories/preferences_repo.py | 85 + .../persistence/repositories/segment_repo.py | 2 - .../persistence/repositories/summary_repo.py | 113 +- .../persistence/unit_of_work.py | 22 + .../infrastructure/security/keystore.py | 131 +- .../infrastructure/security/protocols.py | 2 - .../infrastructure/summarization/factory.py | 2 - .../summarization/mock_provider.py | 2 - .../infrastructure/triggers/app_audio.py | 28 +- .../infrastructure/triggers/calendar.py | 9 +- .../infrastructure/triggers/foreground_app.py | 2 - support/__init__.py | 1 + support/db_utils.py | 186 + tests/application/test_meeting_service.py | 97 - tests/application/test_recovery_service.py | 251 +- tests/application/test_retention_service.py | 19 - .../application/test_summarization_service.py | 12 +- tests/conftest.py | 22 + tests/fixtures/__init__.py | 15 + tests/grpc/test_generate_summary.py | 4 +- tests/infrastructure/audio/test_writer.py | 259 +- .../infrastructure/security/test_keystore.py | 94 +- tests/integration/conftest.py | 136 +- tests/stress/conftest.py | 134 +- tests/stress/test_audio_integrity.py | 28 +- tests/stress/test_segmenter_fuzz.py | 4 +- 68 files changed, 8668 insertions(+), 23644 deletions(-) create mode 100644 src/noteflow/infrastructure/persistence/migrations/versions/d8e5f6a7b2c3_add_diarization_jobs_table.py create mode 100644 src/noteflow/infrastructure/persistence/migrations/versions/e9f0a1b2c3d4_add_asset_path_to_meetings.py create mode 100644 src/noteflow/infrastructure/persistence/migrations/versions/f0a1b2c3d4e5_add_user_preferences_table.py create mode 100644 src/noteflow/infrastructure/persistence/migrations/versions/g1b2c3d4e5f6_add_streaming_diarization_turns.py create mode 100644 src/noteflow/infrastructure/persistence/repositories/diarization_job_repo.py create mode 100644 src/noteflow/infrastructure/persistence/repositories/preferences_repo.py create mode 100644 support/__init__.py create mode 100644 support/db_utils.py create mode 100644 tests/fixtures/__init__.py diff --git a/CLAUDE.md b/CLAUDE.md index 9f34532..7ca0870 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,23 +29,46 @@ ruff check . # Lint ruff check --fix . # Autofix mypy src/noteflow # Strict type checks basedpyright # Additional type checks + +# Docker development +docker compose up -d postgres # PostgreSQL with health checks +python scripts/dev_watch_server.py # Auto-reload server (watches src/) ``` +## Docker Development + +```bash +# Start PostgreSQL (with pgvector) +docker compose up -d postgres + +# Dev container (VS Code) - full GUI environment +# .devcontainer/ includes PortAudio, GTK, pystray, pynput support +code . # Open in VS Code, select "Reopen in Container" + +# Development server with auto-reload +python scripts/dev_watch_server.py # Uses watchfiles, monitors src/ and alembic.ini +``` + +Dev container features: dbus-x11, GTK-3, libgl1 for system tray and hotkey support. + ## Architecture ``` src/noteflow/ -├── domain/ # Entities (meeting, segment, annotation, summary) + ports (repository interfaces) -├── application/ # Use-cases/services (MeetingService, RecoveryService, ExportService) +├── domain/ # Entities (meeting, segment, annotation, summary, triggers) + ports +├── application/ # Use-cases/services (MeetingService, RecoveryService, ExportService, SummarizationService, TriggerService) ├── infrastructure/ # Implementations -│ ├── audio/ # sounddevice capture, ring buffer, VU levels, playback +│ ├── audio/ # sounddevice capture, ring buffer, VU levels, playback, buffered writer │ ├── asr/ # faster-whisper engine, VAD segmenter, streaming +│ ├── diarization/ # Speaker diarization (streaming: diart, offline: pyannote.audio) +│ ├── summarization/# Multi-provider summarization (CloudProvider, OllamaProvider) + citation verification +│ ├── triggers/ # Auto-start signal providers (calendar, audio activity, foreground app) │ ├── persistence/ # SQLAlchemy + asyncpg + pgvector, Alembic migrations │ ├── security/ # keyring keystore, AES-GCM encryption │ ├── export/ # Markdown/HTML export │ └── converters/ # ORM ↔ domain entity converters -├── grpc/ # Proto definitions, server, client, meeting store -├── client/ # Flet UI app + components (transcript, VU meter, playback) +├── grpc/ # Proto definitions, server, client, meeting store, modular mixins +├── client/ # Flet UI app + components (transcript, VU meter, playback, trigger mixin) └── config/ # Pydantic settings (NOTEFLOW_ env vars) ``` @@ -54,6 +77,26 @@ src/noteflow/ - Repository pattern with Unit of Work (`SQLAlchemyUnitOfWork`) - gRPC bidirectional streaming for audio → transcript flow - Protocol-based DI (see `domain/ports/` and infrastructure `protocols.py` files) +- Modular gRPC mixins for separation of concerns (see below) +- `BackgroundWorkerMixin` for standardized thread lifecycle in components + +## gRPC Mixin Architecture + +The gRPC server uses modular mixins for maintainability: + +``` +grpc/_mixins/ +├── streaming.py # ASR streaming, audio processing, partial buffers +├── diarization.py # Speaker diarization jobs (background refinement, job TTL) +├── summarization.py # Summary generation (separates LLM inference from DB transactions) +├── meeting.py # Meeting lifecycle (create, get, list, delete) +├── annotation.py # Segment annotations CRUD +├── export.py # Markdown/HTML document export +├── converters.py # Protobuf ↔ domain entity converters +└── protocols.py # ServicerHost protocol for mixin composition +``` + +Each mixin operates on `ServicerHost` protocol, enabling clean composition in `NoteFlowServicer`. ## Database @@ -101,3 +144,111 @@ python -m grpc_tools.protoc -I src/noteflow/grpc/proto \ - `spike_02_audio_capture/` - sounddevice + PortAudio - `spike_03_asr_latency/` - faster-whisper benchmarks (0.05x real-time) - `spike_04_encryption/` - keyring + AES-GCM (826 MB/s throughput) + +## Key Subsystems + +### Speaker Diarization +- **Streaming**: diart for real-time speaker detection during recording +- **Offline**: pyannote.audio for post-meeting refinement (higher quality) +- **gRPC**: `RefineSpeakerDiarization` (background job), `GetDiarizationJobStatus` (polling), `RenameSpeaker` + +### Summarization +- **Providers**: CloudProvider (Anthropic/OpenAI), OllamaProvider (local), MockProvider (testing) +- **Citation verification**: Links summary claims to transcript evidence +- **Consent**: Cloud providers require explicit user consent (not yet persisted) + +### Trigger Detection +- **Signals**: Calendar proximity, audio activity, foreground app detection +- **Actions**: IGNORE, NOTIFY, AUTO_START with confidence thresholds +- **Client integration**: Background polling with dialog prompts (start/snooze/dismiss) + +## Shared Utilities & Factories + +### Factories + +| Location | Function | Purpose | +|----------|----------|---------| +| `infrastructure/summarization/factory.py` | `create_summarization_service()` | Auto-configured service with provider detection | +| `infrastructure/persistence/database.py` | `create_async_engine()` | SQLAlchemy async engine from settings | +| `infrastructure/persistence/database.py` | `create_async_session_factory()` | Session factory from DB URL | +| `config/settings.py` | `get_settings()` | Cached Settings from env vars | +| `config/settings.py` | `get_trigger_settings()` | Cached TriggerSettings from env vars | + +### Converters + +| Location | Class/Function | Purpose | +|----------|----------------|---------| +| `infrastructure/converters/orm_converters.py` | `OrmConverter` | ORM ↔ domain entities (Meeting, Segment, Summary, etc.) | +| `infrastructure/converters/asr_converters.py` | `AsrConverter` | ASR DTOs → domain WordTiming | +| `grpc/_mixins/converters.py` | `meeting_to_proto()`, `segment_to_proto_update()` | Domain → protobuf messages | +| `grpc/_mixins/converters.py` | `create_segment_from_asr()` | ASR result → Segment with word timings | + +### Repository Base (`persistence/repositories/_base.py`) + +| Method | Purpose | +|--------|---------| +| `_execute_scalar()` | Single result query (or None) | +| `_execute_scalars()` | All scalar results from query | +| `_add_and_flush()` | Add model and flush to DB | +| `_delete_and_flush()` | Delete model and flush | + +### Security Helpers (`infrastructure/security/keystore.py`) + +| Function | Purpose | +|----------|---------| +| `_decode_and_validate_key()` | Validate base64 key, check size | +| `_generate_key()` | Generate 256-bit key as `(bytes, base64_str)` | + +### Export Helpers (`infrastructure/export/_formatting.py`) + +| Function | Purpose | +|----------|---------| +| `format_timestamp()` | Seconds → `MM:SS` or `HH:MM:SS` | +| `format_datetime()` | Datetime → display string | + +### Summarization (`infrastructure/summarization/`) + +| Location | Function | Purpose | +|----------|----------|---------| +| `_parsing.py` | `build_transcript_prompt()` | Transcript with segment markers for LLM | +| `_parsing.py` | `parse_llm_response()` | JSON → Summary entity | +| `citation_verifier.py` | `verify_citations()` | Validate segment_ids exist | + +### Diarization (`infrastructure/diarization/assigner.py`) + +| Function | Purpose | +|----------|---------| +| `assign_speaker()` | Speaker for time range from turns | +| `assign_speakers_batch()` | Batch speaker assignment | + +### Triggers (`infrastructure/triggers/calendar.py`) + +| Function | Purpose | +|----------|---------| +| `parse_calendar_events()` | Parse events from config/env | + +### Client Mixins (`client/components/`) + +| Class | Purpose | +|-------|---------| +| `BackgroundWorkerMixin` | Thread lifecycle: `_start_worker()`, `_stop_worker()`, `_should_run()` | +| `AsyncOperationMixin[T]` | Async ops with state: `run_async_operation()` | +| `TriggerMixin` | Trigger signal polling | + +### Recovery Service (`application/services/recovery_service.py`) + +| Method | Purpose | +|--------|---------| +| `recover_all()` | Orchestrate meeting + job recovery | +| `RecoveryResult` | Dataclass with recovery counts | + +## Known Issues + +See `docs/triage.md` for tracked technical debt. + +**Resolved:** +- ~~Server-side state volatility~~ → Diarization jobs persisted to DB +- ~~Hardcoded directory paths~~ → `asset_path` column added to meetings +- ~~Synchronous blocking in async gRPC~~ → `run_in_executor` for diarization +- ~~Summarization consent not persisted~~ → Stored in `user_preferences` table +- ~~VU meter update throttling~~ → 20fps throttle implemented diff --git a/docs/triage.md b/docs/triage.md index f502d63..3ee4b21 100644 --- a/docs/triage.md +++ b/docs/triage.md @@ -1,265 +1,196 @@ -This is a comprehensive code review of the `NoteFlow` repository. +# Triage Review (Validated) -Overall, this codebase demonstrates a high level of engineering maturity. It effectively utilizes Clean Architecture concepts (Entities, Use Cases, Ports/Adapters), leveraging strong typing, Pydantic for validation, and SQLAlchemy/Alembic for persistence. The integration test setup using `testcontainers` is particularly robust. +Validated: 2025-12-19 +Legend: Status = Confirmed, Partially confirmed, Not observed, Already implemented +Citations use [path:line] format. -However, there are critical performance bottlenecks regarding async/sync bridging in the ASR engine, potential concurrency issues in the UI state management, and specific security considerations regarding the encryption implementation. +## 2. Architecture & State Management -Below is the review categorized into actionable feedback, formatted to be convertible into Git issues. +### Issue 2.1: Server-Side State Volatility ---- +Status: Confirmed +Severity: High +Location: src/noteflow/grpc/service.py, src/noteflow/grpc/_mixins/diarization.py, src/noteflow/grpc/server.py -## 1. Critical Architecture & Performance Issues +Evidence: +- In-memory stream state lives on the servicer (_active_streams, _audio_writers, _partial_buffers, _diarization_jobs). [src/noteflow/grpc/service.py:95] [src/noteflow/grpc/service.py:106] [src/noteflow/grpc/service.py:109] [src/noteflow/grpc/service.py:122] +- Background diarization jobs are stored only in a dict and status reads from it. [src/noteflow/grpc/_mixins/diarization.py:241] [src/noteflow/grpc/_mixins/diarization.py:480] +- Server shutdown only stops gRPC; no servicer cleanup hook is invoked. [src/noteflow/grpc/server.py:132] +- Crash recovery marks meetings ERROR but does not validate audio assets. [src/noteflow/application/services/recovery_service.py:21] [src/noteflow/application/services/recovery_service.py:71] -### Issue 1: Blocking ASR Inference in Async gRPC Server -**Severity:** Critical -**Location:** `src/noteflow/grpc/service.py`, `src/noteflow/infrastructure/asr/engine.py` +Example: +- If job state is lost (for example, after a restart), polling will fail and surface a fetch error in the UI. [src/noteflow/grpc/_mixins/diarization.py:479] [src/noteflow/grpc/client.py:885] [src/noteflow/client/components/meeting_library.py:534] -**The Problem:** -The `NoteFlowServer` uses `grpc.aio` (AsyncIO), but the `FasterWhisperEngine.transcribe` method is blocking (synchronous CPU-bound operation). -In `NoteFlowServicer._maybe_emit_partial` and `_process_audio_segment`, the code calls: -```python -# src/noteflow/grpc/service.py -partial_text = " ".join(result.text for result in self._asr_engine.transcribe(combined)) -``` -Since `transcribe` performs heavy computation, executing it directly within an `async def` method freezes the entire Python AsyncIO event loop. This blocks heartbeats, other RPC calls, and other concurrent meeting streams until inference completes. +Reusable code locations: +- `_close_audio_writer` already centralizes writer cleanup. [src/noteflow/grpc/service.py:247] +- Migration patterns for new tables exist (annotations). [src/noteflow/infrastructure/persistence/migrations/versions/b5c3e8a2d1f0_add_annotations_table.py:22] -**Actionable Solution:** -Offload the transcription to a separate thread pool executor. +Actions: +- Persist diarization jobs (table or cache) and query from `GetDiarizationJobStatus`. +- Add a shutdown hook to close all `_audio_writers` and flush buffers. +- Optional: add asset integrity checks after RecoveryService marks a meeting ERROR. -1. Modify `FasterWhisperEngine` to remain synchronous (it wraps CTranslate2 which releases the GIL often, but it is still blocking from an asyncio perspective). -2. Update `NoteFlowServicer` to run transcription in an executor. +### Issue 2.2: Implicit Meeting Asset Paths -```python -# In NoteFlowServicer -from functools import partial +Status: Confirmed +Severity: Medium +Location: src/noteflow/infrastructure/audio/reader.py, src/noteflow/infrastructure/audio/writer.py, src/noteflow/infrastructure/persistence/models.py -# Helper method -async def _run_transcription(self, audio): - loop = asyncio.get_running_loop() - # Use a ThreadPoolExecutor specifically for compute-heavy tasks - return await loop.run_in_executor( - None, - partial(list, self._asr_engine.transcribe(audio)) - ) +Evidence: +- Audio assets are read/written under `meetings_dir / meeting_id`. [src/noteflow/infrastructure/audio/reader.py:72] [src/noteflow/infrastructure/audio/writer.py:94] +- MeetingModel defines meeting fields (id/title/state/metadata/wrapped_dek) but no asset path. [src/noteflow/infrastructure/persistence/models.py:38] [src/noteflow/infrastructure/persistence/models.py:64] +- Delete logic also assumes `meetings_dir / meeting_id`. [src/noteflow/application/services/meeting_service.py:195] -# Usage in _maybe_emit_partial -results = await self._run_transcription(combined) -partial_text = " ".join(r.text for r in results) -``` +Example: +- Record a meeting with `meetings_dir` set to `~/.noteflow/meetings`, then change it to `/mnt/noteflow`. Playback will look in the new base and fail to find older audio. -### Issue 2: Synchronous `sounddevice` Callbacks in Async Client App -**Severity:** High -**Location:** `src/noteflow/infrastructure/audio/capture.py` +Reusable code locations: +- `MeetingAudioWriter.open` and `MeetingAudioReader.load_meeting_audio` are the path entry points. [src/noteflow/infrastructure/audio/writer.py:70] [src/noteflow/infrastructure/audio/reader.py:60] +- Migration templates live in `infrastructure/persistence/migrations/versions`. [src/noteflow/infrastructure/persistence/migrations/versions/b5c3e8a2d1f0_add_annotations_table.py:22] -**The Problem:** -The `sounddevice` library calls the python callback from a C-level background thread. In `SoundDeviceCapture._stream_callback`, you are invoking the user-provided callback: -```python -self._callback(audio_data, timestamp) -``` -In `app.py`, this callback (`_on_audio_frames`) interacts with `self._audio_activity.update` and `self._client.send_audio`. While `queue.put` is thread-safe, any heavy logic or object allocation here happens in the real-time audio thread. If Python garbage collection pauses this thread, audio artifacts (dropouts) will occur. +Actions: +- Add `asset_path` (or `storage_path`) column to meetings. +- Store the relative path at creation time and use it on read/delete. -**Actionable Solution:** -The callback should strictly put bytes into a thread-safe queue and return immediately. A separate consumer thread/task should process the VAD, VU meter logic, and network sending. +## 3. Concurrency & Performance -### Issue 3: Encryption Key Material in Memory -**Severity:** Medium -**Location:** `src/noteflow/infrastructure/security/crypto.py` +### Issue 3.1: Synchronous Blocking in Async gRPC (Streaming Diarization) -**The Problem:** -The `AesGcmCryptoBox` keeps the master key in memory via `_get_master_cipher`. While inevitable for operation, `secrets.token_bytes` creates immutable bytes objects which cannot be zeroed out (wiped) from memory when no longer needed. Python's GC handles cleanup, but the key lingers in RAM. +Status: Confirmed +Severity: Medium +Location: src/noteflow/grpc/_mixins/diarization.py, src/noteflow/grpc/_mixins/streaming.py -**Actionable Solution:** -While strict memory zeroing is hard in Python, you should minimize the lifespan of the `dek` (Data Encryption Key). -1. In `MeetingAudioWriter`, the `dek` is stored as an instance attribute: `self._dek`. This keeps the unencrypted key in memory for the duration of the meeting. -2. Consider refactoring `ChunkedAssetWriter` to store the `cipher` object (the `AESGCM` context) rather than the raw bytes of the `dek` if the underlying C-library handles memory better, though strictly speaking, the key is still in RAM. -3. **Critical:** Ensure `writer.close()` sets `self._dek = None` immediately (it currently does, which is good practice). +Evidence: +- `_process_streaming_diarization` calls `process_chunk` synchronously. [src/noteflow/grpc/_mixins/diarization.py:63] [src/noteflow/grpc/_mixins/diarization.py:92] +- It is invoked in the async streaming loop on every chunk. [src/noteflow/grpc/_mixins/streaming.py:379] +- Diarization engine uses pyannote/diart pipelines. [src/noteflow/infrastructure/diarization/engine.py:1] ---- +Example: +- With diarization enabled on CPU, heavy `process_chunk` calls can stall the event loop, delaying transcript updates and heartbeats. -## 2. Domain & Infrastructure Logic +Reusable code locations: +- ASR already offloads blocking work via `run_in_executor`. [src/noteflow/infrastructure/asr/engine.py:156] +- Offline diarization uses `asyncio.to_thread`. [src/noteflow/grpc/_mixins/diarization.py:305] -### Issue 4: Fallback Logic in `SummarizationService` -**Severity:** Low -**Location:** `src/noteflow/application/services/summarization_service.py` +Actions: +- Offload streaming diarization to a thread/process pool similar to ASR. +- Consider a bounded queue so diarization lag does not backpressure streaming. -**The Problem:** -The method `_get_provider_with_fallback` iterates through a hardcoded `fallback_order = [SummarizationMode.LOCAL, SummarizationMode.MOCK]`. This ignores the configuration order or user preference if they added new providers. +### Issue 3.2: VU Meter UI Updates on Every Audio Chunk -**Actionable Solution:** -Allow `SummarizationServiceSettings` to define a `fallback_chain: list[SummarizationMode]`. +Status: Confirmed +Severity: Medium +Location: src/noteflow/client/app.py, src/noteflow/client/components/vu_meter.py -### Issue 5: Race Condition in `MeetingStore` (In-Memory) -**Severity:** Medium -**Location:** `src/noteflow/grpc/meeting_store.py` +Evidence: +- Audio capture uses 100ms chunks. [src/noteflow/client/app.py:307] +- Each chunk triggers `VuMeterComponent.on_audio_frames`, which schedules a UI update. [src/noteflow/client/app.py:552] [src/noteflow/client/components/vu_meter.py:58] -**The Problem:** -The `MeetingStore` uses `threading.RLock`. However, the methods return the actual `Meeting` object reference. -```python -def get(self, meeting_id: str) -> Meeting | None: - with self._lock: - return self._meetings.get(meeting_id) -``` -The caller gets a reference to the mutable `Meeting` entity. If two threads get the meeting and modify it (e.g., `meeting.state = ...`), the `MeetingStore` lock does nothing to protect the entity itself, only the dictionary lookups. +Example: +- At 100ms chunks, the UI updates about 10 times per second. If chunk duration is lowered (e.g., 20ms), that becomes about 50 updates per second and can cause stutter. -**Actionable Solution:** -1. Return deep copies of the Meeting object (performance impact). -2. Or, implement specific atomic update methods on the Store (e.g., `update_status(id, status)`), rather than returning the whole object for modification. +Reusable code locations: +- Recording timer throttles updates with a fixed interval and background worker. [src/noteflow/client/components/recording_timer.py:14] -### Issue 6: `pgvector` Dependency Management -**Severity:** Low -**Location:** `src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py` +Actions: +- Throttle VU updates (for example, 20 fps) or update only when delta exceeds a threshold. -**The Problem:** -The migration blindly executes `CREATE EXTENSION IF NOT EXISTS vector`. On managed database services (like RDS or standard Docker Postgres images), the user might not have superuser privileges to install extensions, or the extension binaries might be missing. +## 4. Domain Logic & Reliability -**Actionable Solution:** -Wrap the extension creation in a try/catch block or check capabilities. For the integration tests, ensure the `pgvector/pgvector:pg16` image is strictly pinned (which you have done, good job). +### Issue 4.1: Summarization Consent Persistence ---- +Status: Confirmed +Severity: Low (UX) +Location: src/noteflow/application/services/summarization_service.py -## 3. Client & UI (Flet) +Evidence: +- Consent is stored on `SummarizationServiceSettings` and defaults to False. [src/noteflow/application/services/summarization_service.py:56] +- `grant_cloud_consent` only mutates the in-memory settings. [src/noteflow/application/services/summarization_service.py:150] -### Issue 7: Massive `app.py` File Size -**Severity:** Medium -**Location:** `src/noteflow/client/app.py` +Example: +- Users who grant cloud consent must re-consent after every server/client restart. -**The Problem:** -`app.py` is orchestrating too much. It handles UI layout, audio capture orchestration, gRPC client events, and state updates. It serves as a "God Class" Controller. +Reusable code locations: +- Existing persistence patterns for per-meeting data live in `infrastructure/persistence`. [src/noteflow/infrastructure/persistence/models.py:32] -**Actionable Solution:** -Refactor into a `ClientController` class separate from the UI layout construction. -1. `src/noteflow/client/controller.py`: Handles `NoteFlowClient`, `SoundDeviceCapture`, and updates `AppState`. -2. `src/noteflow/client/views.py`: Accepts `AppState` and renders UI. +Actions: +- Persist consent in a preferences table or config file and hydrate on startup. -### Issue 8: Re-rendering Efficiency in Transcript -**Severity:** Medium -**Location:** `src/noteflow/client/components/transcript.py` +### Issue 4.2: Annotation Validation / Point-in-Time Annotations -**The Problem:** -`_render_final_segment` appends controls to `self._list_view.controls`. In Flet, modifying a large list of controls can become slow as the transcript grows (hundreds of segments). +Status: Not observed (current code supports point annotations) +Severity: Low +Location: src/noteflow/domain/entities/annotation.py, src/noteflow/client/components/annotation_toolbar.py, src/noteflow/client/components/annotation_display.py -**Actionable Solution:** -1. Implement a "virtualized" list or pagination if Flet supports it efficiently. -2. If not, implement a sliding window rendering approach where only the last N segments + visible segments are rendered in the DOM, though this is complex in Flet. -3. **Immediate fix:** Ensure `auto_scroll` is handled efficiently. The current implementation clears and re-adds specific rows during search, which is heavy. +Evidence: +- Validation allows `end_time == start_time`. [src/noteflow/domain/entities/annotation.py:37] +- UI creates point annotations by setting `start_time == end_time`. [src/noteflow/client/components/annotation_toolbar.py:189] +- Display uses `start_time` only, not duration. [src/noteflow/client/components/annotation_display.py:164] ---- +Example: +- Clicking a point annotation seeks to the exact timestamp (no duration needed). -## 4. Specific Code Feedback (Nitpicks & Bugs) +Reusable code locations: +- If range annotations are introduced later, `AnnotationDisplayComponent` is where a start-end range would be rendered. [src/noteflow/client/components/annotation_display.py:148] -### 1. Hardcoded Audio Constants -**File:** `src/noteflow/infrastructure/asr/segmenter.py` -The `SegmenterConfig` defaults to `sample_rate=16000`. -The `SoundDeviceCapture` defaults to `16000`. -**Risk:** If the server is configured for 44.1kHz, the client currently defaults to 16kHz hardcoded in several places. -**Fix:** Ensure `DEFAULT_SAMPLE_RATE` from `src/noteflow/config/constants.py` is used everywhere. +Action: +- None required now; revisit if range annotations are added to the UI or exports. -### 2. Exception Swallowing in Audio Writer -**File:** `src/noteflow/grpc/service.py` -> `_write_audio_chunk_safe` -```python -except Exception as e: - logger.error("Failed to write audio chunk: %s", e) -``` -If the disk fills up or permissions change, the audio writer fails silently (just logging), but the meeting continues. The user might lose the audio recording entirely while thinking it's safe. -**Fix:** This error should probably trigger a circuit breaker that stops the recording or notifies the client via a gRPC status update or a metadata stream update. +## 5. Suggested Git Issues (Validated) -### 3. Trigger Service Rate Limiting Logic -**File:** `src/noteflow/application/services/trigger_service.py` -In `_determine_action`: -```python -if self._last_prompt is not None: - elapsed = now - self._last_prompt - if elapsed < self._settings.rate_limit_seconds: - return TriggerAction.IGNORE -``` -This logic ignores *all* triggers if within the rate limit. If a **high confidence** trigger (Auto-start) comes in 10 seconds after a low confidence prompt, it gets ignored. -**Fix:** The rate limit should likely apply to `NOTIFY` actions, but `AUTO_START` might need to bypass the rate limit or have a shorter one. +Issue A: Persist Diarization Jobs to Database +Status: Confirmed +Evidence: Jobs live in-memory and `GetDiarizationJobStatus` reads from the dict. [src/noteflow/grpc/_mixins/diarization.py:241] [src/noteflow/grpc/_mixins/diarization.py:480] +Reusable code locations: +- SQLAlchemy models/migrations patterns. [src/noteflow/infrastructure/persistence/models.py:32] [src/noteflow/infrastructure/persistence/migrations/versions/b5c3e8a2d1f0_add_annotations_table.py:22] +Tasks: +- Add JobModel with status fields. +- Update mixin to persist and query DB. -### 4. Database Session Lifecycle in UoW -**File:** `src/noteflow/infrastructure/persistence/unit_of_work.py` -The `__init__` does not create the session, `__aenter__` does. This is correct. However, `SqlAlchemyUnitOfWork` caches repositories: -```python -self._annotations_repo = SqlAlchemyAnnotationRepository(self._session) -``` -If `__aenter__` is called, `__aexit__` closes the session. If the same UoW instance is reused (calling `async with uow:` again), it creates a *new* session but overwrites the repo references. This is generally safe, but verify that `SqlAlchemyUnitOfWork` instances are intended to be reusable or disposable. Currently, they look reusable, which is fine. +Issue B: Implement Audio Writer Buffering +Status: Already implemented (close or re-scope) +Evidence: `MeetingAudioWriter` buffers and flushes based on `buffer_size`. [src/noteflow/infrastructure/audio/writer.py:126] +Reusable code locations: +- `AUDIO_BUFFER_SIZE_BYTES` constant. [src/noteflow/config/constants.py:26] +Tasks: +- None, unless you want to tune buffer size. -### 5. Frontend Polling vs Events -**File:** `src/noteflow/client/components/playback_sync.py` -`POSITION_POLL_INTERVAL = 0.1`. -Using a thread to poll `self._state.playback.current_position` every 100ms is CPU inefficient in Python (due to GIL). -**Suggestion:** Use the `sounddevice` stream callback time info to update the position state only when audio is actually playing, rather than a separate `while True` loop. +Issue C: Fallback for Headless Keyring +Status: Confirmed +Evidence: `KeyringKeyStore` only falls back to env var, not file storage. [src/noteflow/infrastructure/security/keystore.py:49] +Reusable code locations: +- `KeyringKeyStore` and `InMemoryKeyStore` live in the same module. [src/noteflow/infrastructure/security/keystore.py:35] +Tasks: +- Add `FileKeyStore` and wire fallback in server/service initialization. ---- +Issue D: Throttled VU Meter in Client +Status: Confirmed +Evidence: Each chunk schedules a UI update with no throttle. [src/noteflow/client/components/vu_meter.py:58] +Reusable code locations: +- Background worker/throttle pattern in `RecordingTimerComponent`. [src/noteflow/client/components/recording_timer.py:14] +Tasks: +- Add a `last_update_time` and update at fixed cadence. -## 5. Security Review +Issue E: Explicit Asset Path Storage +Status: Confirmed +Evidence: Meeting paths derived from `meetings_dir / meeting_id`. [src/noteflow/infrastructure/audio/reader.py:72] +Reusable code locations: +- Meeting model + migrations. [src/noteflow/infrastructure/persistence/models.py:32] +Tasks: +- Add `asset_path` column and persist at create time. -### 1. Keyring Headless Failure -**File:** `src/noteflow/infrastructure/security/keystore.py` -**Risk:** The app crashes if `keyring` cannot find a backend (common in Docker/Headless Linux servers). -**Fix:** -```python -except keyring.errors.KeyringError: - logger.warning("Keyring unavailable, falling back to environment variable or temporary key") - # Implement a fallback strategy or explicit failure -``` -Currently, it raises `RuntimeError`, which crashes the server startup. +Issue F: PGVector Index Creation +Status: Confirmed (requires product decision) +Evidence: Migration uses `ivfflat` index created immediately. [src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py:95] +Reusable code locations: +- Same migration file for index changes. [src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py:95] +Tasks: +- Consider switching to HNSW or defer index creation until data exists. -### 2. DEK Handling -**Analysis:** You generate a DEK, wrap it, and store `wrapped_dek` in the DB. The `dek` stays in memory during the stream. -**Verdict:** This is standard envelope encryption practice. Acceptable for this application tier. +## 6. Code Quality & Nitpicks (Validated) ---- - -## 6. Generated Issues for Git - -### Issue: Asynchronous Transcription Processing -**Title:** Refactor ASR Engine to run in ThreadPoolExecutor -**Description:** -The gRPC server uses `asyncio`, but `FasterWhisperEngine.transcribe` is blocking. This freezes the event loop during transcription segments. -**Task:** -1. Inject `asyncio.get_running_loop()` into `NoteFlowServicer`. -2. Wrap `self._asr_engine.transcribe` calls in `loop.run_in_executor`. - -### Issue: Client Audio Callback Optimization -**Title:** Optimize Audio Capture Callback -**Description:** -`SoundDeviceCapture` callback executes application logic (network sending, VAD updates) in the audio thread. -**Task:** -1. Change callback to only `queue.put_nowait()`. -2. Move logic to a dedicated consumer worker thread. - -### Issue: Handle Write Errors in Audio Stream -**Title:** Critical Error Handling for Audio Writer -**Description:** -`_write_audio_chunk_safe` catches exceptions and logs them, potentially resulting in data loss without user feedback. -**Task:** -1. If writing fails, update the meeting state to `ERROR`. -2. Send an error message back to the client via the Transcript stream if possible, or terminate the connection. - -### Issue: Database Extension Installation Check -**Title:** Graceful degradation for `pgvector` -**Description:** -Migration script `6a9d9f408f40` attempts to create an extension. This fails if the DB user isn't superuser. -**Task:** -1. Check if extension exists or if user has permissions. -2. If not, fail with a clear message about required database setup steps. - -### Issue: Foreground App Window Detection on Linux/Headless -**Title:** Handle `pywinctl` dependencies -**Description:** -`pywinctl` requires X11/display headers on Linux. The server might run headless. -**Task:** -1. Wrap `ForegroundAppProvider` imports in try/except blocks. -2. Ensure the app doesn't crash if `pywinctl` fails to load. - ---- - -## 7. Packaging & Deployment (Future) - -Since you mentioned packaging is a WIP: -1. **Dependencies:** Separating `server` deps (torch, faster-whisper) from `client` deps (flet, sounddevice) is crucial. Use `pyproject.toml` extras: `pip install noteflow[server]` vs `noteflow[client]`. -2. **Model Management:** The Docker image for the server will be huge due to Torch/Whisper. Consider a build stage that pre-downloads the "base" model so the container starts faster. - -## Conclusion - -The code is high quality, well-typed, and structurally sound. Fixing the **Blocking ASR** issue is the only mandatory change before any serious load testing or deployment. The rest are robustness and architectural improvements. \ No newline at end of file +- HTML export template is minimal inline CSS (no external framework). [src/noteflow/infrastructure/export/html.py:33] + Example: optionally add a lightweight stylesheet (with offline fallback) for nicer exports. +- Transcript partial row is updated in place (no remove/re-add), so flicker risk is already mitigated. [src/noteflow/client/components/transcript.py:182] +- `Segment.word_count` uses `text.split()` when no words are present. [src/noteflow/domain/entities/segment.py:72] + Example: for very large transcripts, a streaming count (for example, regex iterator) avoids allocating a full list. diff --git a/repomix-output.md b/repomix-output.md index ba72a62..9fdc70d 100644 --- a/repomix-output.md +++ b/repomix-output.md @@ -1,4 +1,5 @@ This file is a merged representation of a subset of the codebase, containing specifically included files, combined into a single document by Repomix. +The content has been processed where comments have been removed, empty lines have been removed, content has been formatted for parsing in markdown style. # File Summary @@ -28,9 +29,12 @@ The content is organized as follows: ## Notes - Some files may have been excluded based on .gitignore rules and Repomix's configuration - Binary files are not included in this packed representation. Please refer to the Repository Structure section for a complete list of file paths, including binary files -- Only files matching these patterns are included: src/, tests/ +- Only files matching these patterns are included: src/ - Files matching patterns in .gitignore are excluded - Files matching default ignore patterns are excluded +- Code comments have been removed from supported file types +- Empty lines have been removed from all files +- Content has been formatted for parsing in markdown style - Files are sorted by Git change count (files with more changes are at the bottom) # Directory Structure @@ -97,6 +101,16 @@ src/ __init__.py value_objects.py grpc/ + _mixins/ + __init__.py + annotation.py + converters.py + diarization.py + export.py + meeting.py + protocols.py + streaming.py + summarization.py proto/ __init__.py noteflow_pb2_grpc.py @@ -184,913 +198,64 @@ src/ __init__.py __init__.py noteflow_pb2.py -tests/ - application/ - __init__.py - test_export_service.py - test_meeting_service.py - test_recovery_service.py - test_retention_service.py - test_summarization_service.py - test_trigger_service.py - client/ - test_async_mixin.py - test_summary_panel.py - test_transcript_component.py - domain/ - __init__.py - test_annotation.py - test_meeting.py - test_segment.py - test_summary.py - test_triggers.py - test_value_objects.py - grpc/ - __init__.py - test_diarization_refine.py - test_generate_summary.py - test_partial_transcription.py - infrastructure/ - asr/ - __init__.py - test_dto.py - test_engine.py - test_segmenter.py - test_streaming_vad.py - audio/ - __init__.py - conftest.py - test_capture.py - test_dto.py - test_levels.py - test_reader.py - test_ring_buffer.py - test_writer.py - export/ - test_formatting.py - test_html.py - test_markdown.py - security/ - test_crypto.py - test_keystore.py - summarization/ - test_citation_verifier.py - test_cloud_provider.py - test_mock_provider.py - test_ollama_provider.py - triggers/ - conftest.py - test_audio_activity.py - test_foreground_app.py - __init__.py - test_converters.py - test_diarization.py - integration/ - __init__.py - conftest.py - test_repositories.py - test_trigger_settings.py - test_unit_of_work.py - __init__.py - conftest.py ``` # Files -## File: src/noteflow/config/constants.py -````python -"""Centralized constants for NoteFlow. - -This module provides shared constants used across the codebase to avoid -magic numbers and ensure consistency. -""" - -from __future__ import annotations - -from typing import Final - -# Audio constants -DEFAULT_SAMPLE_RATE: Final[int] = 16000 -"""Default audio sample rate in Hz (16 kHz).""" - -# gRPC constants -DEFAULT_GRPC_PORT: Final[int] = 50051 -"""Default gRPC server port.""" - -MAX_GRPC_MESSAGE_SIZE: Final[int] = 100 * 1024 * 1024 -"""Maximum gRPC message size in bytes (100 MB).""" -```` - -## File: src/noteflow/infrastructure/triggers/app_audio.py -````python -"""App audio activity provider. - -Detects audio activity from system output while whitelisted meeting apps are active. -This is a best-effort heuristic: it combines (a) system output activity and -(b) presence of whitelisted app windows to infer a likely meeting. -""" - -from __future__ import annotations - -import logging -import time -from dataclasses import dataclass, field -from typing import TYPE_CHECKING - -from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource -from noteflow.infrastructure.audio.levels import RmsLevelProvider -from noteflow.infrastructure.triggers.audio_activity import ( - AudioActivityProvider, - AudioActivitySettings, -) - -if TYPE_CHECKING: - import numpy as np - from numpy.typing import NDArray - -logger = logging.getLogger(__name__) - - -@dataclass -class AppAudioSettings: - """Configuration for app audio detection. - - Attributes: - enabled: Whether app audio detection is enabled. - threshold_db: Minimum dB level to consider as activity. - window_seconds: Time window for sustained activity detection. - min_active_ratio: Minimum ratio of active samples in window. - min_samples: Minimum samples required before evaluation. - max_history: Maximum samples retained in history. - weight: Confidence weight contributed by this provider. - meeting_apps: Set of app name substrings to match (lowercase). - suppressed_apps: App substrings to ignore even if matched. - sample_rate: Sample rate for system output capture. - sample_duration_seconds: Duration of each sampling read. - chunk_duration_seconds: Duration of sub-chunks for activity history updates. - """ - - enabled: bool - threshold_db: float - window_seconds: float - min_active_ratio: float - min_samples: int - max_history: int - weight: float - meeting_apps: set[str] = field(default_factory=set) - suppressed_apps: set[str] = field(default_factory=set) - sample_rate: int = 16000 - sample_duration_seconds: float = 0.5 - chunk_duration_seconds: float = 0.1 - - def __post_init__(self) -> None: - self.meeting_apps = {app.lower() for app in self.meeting_apps} - self.suppressed_apps = {app.lower() for app in self.suppressed_apps} - - -class _SystemOutputSampler: - """Best-effort system output sampler using sounddevice.""" - - def __init__(self, sample_rate: int, channels: int = 1) -> None: - self._sample_rate = sample_rate - self._channels = channels - self._stream = None - self._extra_settings = None - self._device = None - self._available: bool | None = None - - def _select_device(self) -> None: - try: - import sounddevice as sd - except ImportError: - return self._extracted_from__select_device_5( - "sounddevice not available - app audio detection disabled" - ) - # Default to output device and WASAPI loopback when available (Windows) - try: - default_output = sd.default.device[1] - except (TypeError, IndexError): - default_output = None - - try: - hostapi_index = sd.default.hostapi - hostapi = sd.query_hostapis(hostapi_index) if hostapi_index is not None else None - except Exception: - hostapi = None - - if hostapi and hostapi.get("type") == "Windows WASAPI" and default_output is not None: - # On WASAPI, loopback devices appear as separate input devices - # Fall through to monitor/loopback device detection below - pass - - # Fallback: look for monitor/loopback devices (Linux/PulseAudio) - try: - devices = sd.query_devices() - except Exception: - return self._extracted_from__select_device_5( - "Failed to query audio devices for app audio detection" - ) - for idx, dev in enumerate(devices): - name = str(dev.get("name", "")).lower() - if int(dev.get("max_input_channels", 0)) <= 0: - continue - if "monitor" in name or "loopback" in name: - return self._extracted_from__select_device_24(idx) - self._available = False - logger.warning("No loopback audio device found - app audio detection disabled") - - # TODO Rename this here and in `_select_device` - def _extracted_from__select_device_24(self, arg0): - self._device = arg0 - self._available = True - return - - # TODO Rename this here and in `_select_device` - def _extracted_from__select_device_5(self, arg0): - self._available = False - logger.warning(arg0) - return - - def _ensure_stream(self) -> bool: - if self._available is False: - return False - - if self._available is None: - self._select_device() - if self._available is False: - return False - - if self._stream is not None: - return True - - try: - import sounddevice as sd - - self._stream = sd.InputStream( - device=self._device, - channels=self._channels, - samplerate=self._sample_rate, - dtype="float32", - extra_settings=self._extra_settings, - ) - self._stream.start() - return True - except Exception as exc: - logger.warning("Failed to start system output capture: %s", exc) - self._stream = None - self._available = False - return False - - def read_frames(self, duration_seconds: float) -> NDArray[np.float32] | None: - if not self._ensure_stream(): - return None - - if self._stream is None: - return None - - frames = max(1, int(self._sample_rate * duration_seconds)) - try: - data, _ = self._stream.read(frames) - except Exception as exc: - logger.debug("System output read failed: %s", exc) - return None - - return data.reshape(-1).astype("float32") - - def close(self) -> None: - if self._stream is None: - return - try: - self._stream.stop() - self._stream.close() - except Exception: - logger.debug("Failed to close system output stream", exc_info=True) - finally: - self._stream = None - - -class AppAudioProvider: - """Detect app audio activity from whitelisted meeting apps.""" - - def __init__(self, settings: AppAudioSettings) -> None: - self._settings = settings - self._sampler = _SystemOutputSampler(sample_rate=settings.sample_rate) - self._level_provider = RmsLevelProvider() - self._audio_activity = AudioActivityProvider( - self._level_provider, - AudioActivitySettings( - enabled=settings.enabled, - threshold_db=settings.threshold_db, - window_seconds=settings.window_seconds, - min_active_ratio=settings.min_active_ratio, - min_samples=settings.min_samples, - max_history=settings.max_history, - weight=settings.weight, - ), - ) - - @property - def source(self) -> TriggerSource: - return TriggerSource.AUDIO_ACTIVITY - - @property - def max_weight(self) -> float: - return self._settings.weight - - def is_enabled(self) -> bool: - return self._settings.enabled - - def get_signal(self) -> TriggerSignal | None: - if not self.is_enabled(): - return None - if not self._settings.meeting_apps: - return None - - app_title = self._detect_meeting_app() - if not app_title: - return None - - frames = self._sampler.read_frames(self._settings.sample_duration_seconds) - if frames is None or frames.size == 0: - return None - - self._update_activity_history(frames) - if self._audio_activity.get_signal() is None: - return None - - return TriggerSignal( - source=self.source, - weight=self.max_weight, - app_name=app_title, - ) - - def _update_activity_history(self, frames: NDArray[np.float32]) -> None: - chunk_size = max(1, int(self._settings.sample_rate * self._settings.chunk_duration_seconds)) - now = time.monotonic() - for offset in range(0, len(frames), chunk_size): - chunk = frames[offset : offset + chunk_size] - if chunk.size == 0: - continue - self._audio_activity.update(chunk, now) - - def _detect_meeting_app(self) -> str | None: - try: - import pywinctl - except ImportError: - return None - - titles: list[str] = [] - try: - if hasattr(pywinctl, "getAllWindows"): - windows = pywinctl.getAllWindows() - titles = [w.title for w in windows if getattr(w, "title", None)] - elif hasattr(pywinctl, "getAllTitles"): - titles = [t for t in pywinctl.getAllTitles() if t] - except Exception as exc: - logger.debug("Failed to list windows for app detection: %s", exc) - return None - - for title in titles: - title_lower = title.lower() - if any(suppressed in title_lower for suppressed in self._settings.suppressed_apps): - continue - if any(app in title_lower for app in self._settings.meeting_apps): - return title - - return None - - def close(self) -> None: - """Release system audio resources.""" - self._sampler.close() -```` - -## File: src/noteflow/infrastructure/triggers/calendar.py -````python -"""Calendar trigger provider. - -Best-effort calendar integration using configured event windows. -""" - -from __future__ import annotations - -import json -import logging -from dataclasses import dataclass -from datetime import datetime, timedelta, timezone -from typing import TYPE_CHECKING - -from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource - -if TYPE_CHECKING: - from collections.abc import Iterable - -logger = logging.getLogger(__name__) - - -@dataclass(frozen=True) -class CalendarEvent: - """Simple calendar event window.""" - - start: datetime - end: datetime - title: str | None = None - - -@dataclass -class CalendarSettings: - """Configuration for calendar trigger detection.""" - - enabled: bool - weight: float - lookahead_minutes: int - lookbehind_minutes: int - events: list[CalendarEvent] - - -class CalendarProvider: - """Provide trigger signal based on calendar proximity.""" - - def __init__(self, settings: CalendarSettings) -> None: - self._settings = settings - - @property - def source(self) -> TriggerSource: - return TriggerSource.CALENDAR - - @property - def max_weight(self) -> float: - return self._settings.weight - - def is_enabled(self) -> bool: - return self._settings.enabled - - def get_signal(self) -> TriggerSignal | None: - if not self.is_enabled(): - return None - - if not self._settings.events: - return None - - now = datetime.now(timezone.utc) - window_start = now - timedelta(minutes=self._settings.lookbehind_minutes) - window_end = now + timedelta(minutes=self._settings.lookahead_minutes) - - return next( - ( - TriggerSignal( - source=self.source, - weight=self.max_weight, - app_name=event.title, - ) - for event in self._settings.events - if self._event_overlaps_window(event, window_start, window_end) - ), - None, - ) - - @staticmethod - def _event_overlaps_window( - event: CalendarEvent, - window_start: datetime, - window_end: datetime, - ) -> bool: - event_start = _ensure_tz(event.start) - event_end = _ensure_tz(event.end) - return event_start <= window_end and event_end >= window_start - - -def parse_calendar_events(raw_events: object) -> list[CalendarEvent]: - """Parse calendar events from config/env payloads.""" - if raw_events is None: - return [] - - if isinstance(raw_events, str): - raw_events = _load_events_from_json(raw_events) - - if isinstance(raw_events, dict): - raw_events = [raw_events] - - if not isinstance(raw_events, Iterable): - return [] - - events: list[CalendarEvent] = [] - for item in raw_events: - if isinstance(item, CalendarEvent): - events.append(item) - continue - if isinstance(item, dict): - start = _parse_datetime(item.get("start")) - end = _parse_datetime(item.get("end")) - if start and end: - events.append(CalendarEvent(start=start, end=end, title=item.get("title"))) - return events - - -def _load_events_from_json(raw: str) -> list[dict[str, object]]: - try: - parsed = json.loads(raw) - except json.JSONDecodeError: - logger.debug("Failed to parse calendar events JSON") - return [] - if isinstance(parsed, list): - return [item for item in parsed if isinstance(item, dict)] - return [parsed] if isinstance(parsed, dict) else [] - - -def _parse_datetime(value: object) -> datetime | None: - if isinstance(value, datetime): - return value - if not isinstance(value, str) or not value: - return None - cleaned = value.strip() - if cleaned.endswith("Z"): - cleaned = f"{cleaned[:-1]}+00:00" - try: - return datetime.fromisoformat(cleaned) - except ValueError: - return None - - -def _ensure_tz(value: datetime) -> datetime: - if value.tzinfo is None: - return value.replace(tzinfo=timezone.utc) - return value.astimezone(timezone.utc) -```` - -## File: tests/grpc/test_diarization_refine.py -````python -"""Tests for RefineSpeakerDiarization RPC guards.""" - -from __future__ import annotations - -import pytest - -from noteflow.grpc.proto import noteflow_pb2 -from noteflow.grpc.service import NoteFlowServicer - - -class _DummyContext: - """Minimal gRPC context that raises if abort is invoked.""" - - async def abort(self, code, details): # type: ignore[override] - raise AssertionError(f"abort called: {code} - {details}") - - -@pytest.mark.asyncio -async def test_refine_speaker_diarization_rejects_active_meeting() -> None: - """Refinement should be blocked while a meeting is still recording.""" - servicer = NoteFlowServicer(diarization_engine=object()) - store = servicer._get_memory_store() - - meeting = store.create("Active meeting") - meeting.start_recording() - store.update(meeting) - - response = await servicer.RefineSpeakerDiarization( - noteflow_pb2.RefineSpeakerDiarizationRequest(meeting_id=str(meeting.id)), - _DummyContext(), - ) - - assert response.segments_updated == 0 - assert response.error_message - assert "stopped" in response.error_message.lower() -```` - -## File: tests/infrastructure/test_diarization.py -````python -"""Tests for speaker diarization infrastructure. - -Tests the SpeakerTurn DTO and speaker assignment utilities. -""" - -from __future__ import annotations - -import pytest - -from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker, assign_speakers_batch - - -class TestSpeakerTurn: - """Tests for the SpeakerTurn dataclass.""" - - def test_create_valid_turn(self) -> None: - """Create a valid speaker turn.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0) - assert turn.speaker == "SPEAKER_00" - assert turn.start == 0.0 - assert turn.end == 5.0 - assert turn.confidence == 1.0 - - def test_create_turn_with_confidence(self) -> None: - """Create a turn with custom confidence.""" - turn = SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0, confidence=0.85) - assert turn.confidence == 0.85 - - def test_invalid_end_before_start_raises(self) -> None: - """End time before start time raises ValueError.""" - with pytest.raises(ValueError, match=r"end.*<.*start"): - SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=5.0) - - def test_invalid_confidence_negative_raises(self) -> None: - """Negative confidence raises ValueError.""" - with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"): - SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=-0.1) - - def test_invalid_confidence_above_one_raises(self) -> None: - """Confidence above 1.0 raises ValueError.""" - with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"): - SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=1.5) - - def test_duration_property(self) -> None: - """Duration property calculates correctly.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=2.5, end=7.5) - assert turn.duration == 5.0 - - def test_overlaps_returns_true_for_overlap(self) -> None: - """overlaps() returns True when ranges overlap.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) - assert turn.overlaps(3.0, 7.0) - assert turn.overlaps(7.0, 12.0) - assert turn.overlaps(5.0, 10.0) - assert turn.overlaps(0.0, 15.0) - - def test_overlaps_returns_false_for_no_overlap(self) -> None: - """overlaps() returns False when ranges don't overlap.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) - assert not turn.overlaps(0.0, 5.0) - assert not turn.overlaps(10.0, 15.0) - assert not turn.overlaps(0.0, 3.0) - assert not turn.overlaps(12.0, 20.0) - - def test_overlap_duration_full_overlap(self) -> None: - """overlap_duration() for full overlap returns turn duration.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) - assert turn.overlap_duration(0.0, 15.0) == 5.0 - - def test_overlap_duration_partial_overlap_left(self) -> None: - """overlap_duration() for partial overlap on left side.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) - assert turn.overlap_duration(3.0, 7.0) == 2.0 - - def test_overlap_duration_partial_overlap_right(self) -> None: - """overlap_duration() for partial overlap on right side.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) - assert turn.overlap_duration(8.0, 15.0) == 2.0 - - def test_overlap_duration_contained(self) -> None: - """overlap_duration() when range is contained within turn.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=20.0) - assert turn.overlap_duration(5.0, 10.0) == 5.0 - - def test_overlap_duration_no_overlap(self) -> None: - """overlap_duration() returns 0.0 when no overlap.""" - turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0) - assert turn.overlap_duration(0.0, 3.0) == 0.0 - assert turn.overlap_duration(12.0, 20.0) == 0.0 - - -class TestAssignSpeaker: - """Tests for the assign_speaker function.""" - - def test_empty_turns_returns_none(self) -> None: - """Empty turns list returns None with 0 confidence.""" - speaker, confidence = assign_speaker(0.0, 5.0, []) - assert speaker is None - assert confidence == 0.0 - - def test_zero_duration_segment_returns_none(self) -> None: - """Zero duration segment returns None.""" - turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] - speaker, confidence = assign_speaker(5.0, 5.0, turns) - assert speaker is None - assert confidence == 0.0 - - def test_single_turn_full_overlap(self) -> None: - """Single turn with full overlap returns high confidence.""" - turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] - speaker, confidence = assign_speaker(2.0, 8.0, turns) - assert speaker == "SPEAKER_00" - assert confidence == 1.0 - - def test_single_turn_partial_overlap(self) -> None: - """Single turn with partial overlap returns proportional confidence.""" - turns = [SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)] - speaker, confidence = assign_speaker(0.0, 10.0, turns) - assert speaker == "SPEAKER_00" - assert confidence == 0.5 - - def test_multiple_turns_chooses_dominant_speaker(self) -> None: - """Multiple turns chooses speaker with most overlap.""" - turns = [ - SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0), - SpeakerTurn(speaker="SPEAKER_01", start=3.0, end=10.0), - ] - speaker, confidence = assign_speaker(0.0, 10.0, turns) - assert speaker == "SPEAKER_01" - assert confidence == 0.7 - - def test_no_overlap_returns_none(self) -> None: - """No overlapping turns returns None.""" - turns = [ - SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), - SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0), - ] - speaker, confidence = assign_speaker(6.0, 9.0, turns) - assert speaker is None - assert confidence == 0.0 - - def test_equal_overlap_chooses_first_encountered(self) -> None: - """Equal overlap chooses first speaker encountered.""" - turns = [ - SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), - SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0), - ] - speaker, confidence = assign_speaker(3.0, 7.0, turns) - # SPEAKER_00: overlap 2.0, SPEAKER_01: overlap 2.0 - # First one wins since > not >= - assert speaker == "SPEAKER_00" - assert confidence == 0.5 - - -class TestAssignSpeakersBatch: - """Tests for the assign_speakers_batch function.""" - - def test_empty_segments(self) -> None: - """Empty segments list returns empty results.""" - turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)] - results = assign_speakers_batch([], turns) - assert results == [] - - def test_empty_turns(self) -> None: - """Empty turns returns all None speakers.""" - segments = [(0.0, 5.0), (5.0, 10.0)] - results = assign_speakers_batch(segments, []) - assert len(results) == 2 - assert all(speaker is None for speaker, _ in results) - assert all(conf == 0.0 for _, conf in results) - - def test_batch_assignment(self) -> None: - """Batch assignment processes all segments.""" - turns = [ - SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0), - SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0), - SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=15.0), - ] - segments = [(0.0, 5.0), (5.0, 10.0), (10.0, 15.0)] - results = assign_speakers_batch(segments, turns) - assert len(results) == 3 - assert results[0] == ("SPEAKER_00", 1.0) - assert results[1] == ("SPEAKER_01", 1.0) - assert results[2] == ("SPEAKER_00", 1.0) - - def test_batch_with_gaps(self) -> None: - """Batch assignment handles gaps between turns.""" - turns = [ - SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0), - SpeakerTurn(speaker="SPEAKER_01", start=7.0, end=10.0), - ] - segments = [(0.0, 3.0), (3.0, 7.0), (7.0, 10.0)] - results = assign_speakers_batch(segments, turns) - assert results[0] == ("SPEAKER_00", 1.0) - assert results[1] == (None, 0.0) - assert results[2] == ("SPEAKER_01", 1.0) -```` - ## File: src/noteflow/application/services/export_service.py ````python -"""Export application service. - -Orchestrates transcript export to various formats. -""" - from __future__ import annotations - from enum import Enum from pathlib import Path from typing import TYPE_CHECKING - from noteflow.infrastructure.export import HtmlExporter, MarkdownExporter, TranscriptExporter - if TYPE_CHECKING: from noteflow.domain.entities import Meeting, Segment from noteflow.domain.ports.unit_of_work import UnitOfWork from noteflow.domain.value_objects import MeetingId - - class ExportFormat(Enum): - """Supported export formats.""" - MARKDOWN = "markdown" HTML = "html" - - class ExportService: - """Application service for transcript export operations. - - Provides use cases for exporting meeting transcripts to various formats. - """ - def __init__(self, uow: UnitOfWork) -> None: - """Initialize the export service. - - Args: - uow: Unit of work for persistence. - """ self._uow = uow self._exporters: dict[ExportFormat, TranscriptExporter] = { ExportFormat.MARKDOWN: MarkdownExporter(), ExportFormat.HTML: HtmlExporter(), } - def _get_exporter(self, fmt: ExportFormat) -> TranscriptExporter: - """Get exporter for format. - - Args: - fmt: Export format. - - Returns: - Exporter instance. - - Raises: - ValueError: If format is not supported. - """ exporter = self._exporters.get(fmt) if exporter is None: raise ValueError(f"Unsupported export format: {fmt}") return exporter - async def export_transcript( self, meeting_id: MeetingId, fmt: ExportFormat = ExportFormat.MARKDOWN, ) -> str: - """Export meeting transcript to string. - - Args: - meeting_id: Meeting identifier. - fmt: Export format. - - Returns: - Formatted transcript string. - - Raises: - ValueError: If meeting not found. - """ async with self._uow: meeting = await self._uow.meetings.get(meeting_id) if meeting is None: raise ValueError(f"Meeting {meeting_id} not found") - segments = await self._uow.segments.get_by_meeting(meeting_id) exporter = self._get_exporter(fmt) return exporter.export(meeting, segments) - async def export_to_file( self, meeting_id: MeetingId, output_path: Path, fmt: ExportFormat | None = None, ) -> Path: - """Export meeting transcript to file. - - Args: - meeting_id: Meeting identifier. - output_path: Output file path (extension determines format if not specified). - fmt: Export format (optional, inferred from extension if not provided). - - Returns: - Path to the exported file. - - Raises: - ValueError: If meeting not found or format cannot be determined. - """ - # Determine format from extension if not provided if fmt is None: fmt = self._infer_format_from_extension(output_path.suffix) - content = await self.export_transcript(meeting_id, fmt) - - # Ensure correct extension exporter = self._get_exporter(fmt) if output_path.suffix != exporter.file_extension: output_path = output_path.with_suffix(exporter.file_extension) - output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(content, encoding="utf-8") return output_path - def _infer_format_from_extension(self, extension: str) -> ExportFormat: - """Infer export format from file extension. - - Args: - extension: File extension (e.g., '.md', '.html'). - - Returns: - Inferred export format. - - Raises: - ValueError: If extension is not recognized. - """ extension_map = { ".md": ExportFormat.MARKDOWN, ".markdown": ExportFormat.MARKDOWN, @@ -1104,135 +269,68 @@ class ExportService: f"Supported: {', '.join(extension_map.keys())}" ) return fmt - def get_supported_formats(self) -> list[tuple[str, str]]: - """Get list of supported export formats. - - Returns: - List of (format_name, file_extension) tuples. - """ return [(e.format_name, e.file_extension) for e in self._exporters.values()] - async def preview_export( self, meeting: Meeting, segments: list[Segment], fmt: ExportFormat = ExportFormat.MARKDOWN, ) -> str: - """Preview export without fetching from database. - - Useful for previewing exports with in-memory data. - - Args: - meeting: Meeting entity. - segments: List of segments. - fmt: Export format. - - Returns: - Formatted transcript string. - """ exporter = self._get_exporter(fmt) return exporter.export(meeting, segments) ```` ## File: src/noteflow/application/services/recovery_service.py ````python -"""Recovery service for crash recovery on startup. - -Detect and recover meetings left in active states after server restart. -""" - from __future__ import annotations - import logging from datetime import UTC, datetime from typing import TYPE_CHECKING, ClassVar - from noteflow.domain.value_objects import MeetingState - if TYPE_CHECKING: from noteflow.domain.entities import Meeting from noteflow.domain.ports.unit_of_work import UnitOfWork - logger = logging.getLogger(__name__) - - class RecoveryService: - """Recover meetings from crash states on server startup. - - Find meetings left in RECORDING or STOPPING state and mark them as ERROR. - This handles the case where the server crashed during an active meeting. - """ - ACTIVE_STATES: ClassVar[list[MeetingState]] = [ MeetingState.RECORDING, MeetingState.STOPPING, ] - def __init__(self, uow: UnitOfWork) -> None: - """Initialize recovery service. - - Args: - uow: Unit of work for persistence. - """ self._uow = uow - async def recover_crashed_meetings(self) -> list[Meeting]: - """Find and recover meetings left in active states. - - Mark all meetings in RECORDING or STOPPING state as ERROR - with metadata explaining the crash recovery. - - Returns: - List of recovered meetings. - """ async with self._uow: - # Find all meetings in active states meetings, total = await self._uow.meetings.list_all( states=self.ACTIVE_STATES, - limit=1000, # Handle up to 1000 crashed meetings + limit=1000, ) - if total == 0: logger.info("No crashed meetings found during recovery") return [] - logger.warning( "Found %d meetings in active state during startup, marking as ERROR", total, ) - recovered: list[Meeting] = [] recovery_time = datetime.now(UTC).isoformat() - for meeting in meetings: previous_state = meeting.state.name meeting.mark_error() - - # Add crash recovery metadata meeting.metadata["crash_recovered"] = "true" meeting.metadata["crash_recovery_time"] = recovery_time meeting.metadata["crash_previous_state"] = previous_state - await self._uow.meetings.update(meeting) recovered.append(meeting) - logger.info( "Recovered crashed meeting: id=%s, previous_state=%s", meeting.id, previous_state, ) - await self._uow.commit() logger.info("Crash recovery complete: %d meetings recovered", len(recovered)) return recovered - async def count_crashed_meetings(self) -> int: - """Count meetings currently in crash states. - - Returns: - Number of meetings in RECORDING or STOPPING state. - """ async with self._uow: total = 0 for state in self.ACTIVE_STATES: @@ -1240,301 +338,37 @@ class RecoveryService: return total ```` -## File: src/noteflow/application/services/trigger_service.py -````python -"""Trigger evaluation and decision service. - -Orchestrate trigger detection with rate limiting and snooze support. -""" - -from __future__ import annotations - -import logging -import time -from dataclasses import dataclass -from typing import TYPE_CHECKING - -from noteflow.domain.triggers.entities import TriggerAction, TriggerDecision, TriggerSignal - -if TYPE_CHECKING: - from noteflow.domain.triggers.ports import SignalProvider - -logger = logging.getLogger(__name__) - - -@dataclass -class TriggerServiceSettings: - """Configuration for trigger service. - - Attributes: - enabled: Whether trigger detection is enabled. - auto_start_enabled: Whether to auto-start recording at high confidence. - rate_limit_seconds: Minimum seconds between trigger prompts. - snooze_seconds: Default snooze duration. - threshold_ignore: Confidence below which triggers are ignored. - threshold_auto_start: Confidence at or above which auto-start is allowed. - """ - - enabled: bool - auto_start_enabled: bool - rate_limit_seconds: int - snooze_seconds: int - threshold_ignore: float - threshold_auto_start: float - - def __post_init__(self) -> None: - if self.threshold_auto_start < self.threshold_ignore: - msg = "threshold_auto_start must be >= threshold_ignore" - raise ValueError(msg) - - -class TriggerService: - """Orchestrate trigger detection with rate limiting and snooze. - - Evaluates all signal providers and determines the appropriate action - based on combined confidence scores, rate limits, and snooze state. - - Threshold behavior is driven by TriggerServiceSettings: - - Confidence < threshold_ignore: IGNORE - - Confidence >= threshold_auto_start: AUTO_START (if enabled, else NOTIFY) - - Otherwise: NOTIFY - """ - - def __init__( - self, - providers: list[SignalProvider], - settings: TriggerServiceSettings, - ) -> None: - """Initialize trigger service. - - Args: - providers: List of signal providers to evaluate. - settings: Configuration settings for trigger behavior. - """ - self._providers = providers - self._settings = settings - self._last_prompt: float | None = None - self._snoozed_until: float | None = None - - @property - def is_enabled(self) -> bool: - """Check if trigger service is enabled.""" - return self._settings.enabled - - @property - def is_snoozed(self) -> bool: - """Check if triggers are currently snoozed.""" - if self._snoozed_until is None: - return False - return time.monotonic() < self._snoozed_until - - @property - def snooze_remaining_seconds(self) -> float: - """Get remaining snooze time in seconds, or 0 if not snoozed.""" - if self._snoozed_until is None: - return 0.0 - remaining = self._snoozed_until - time.monotonic() - return max(0.0, remaining) - - def evaluate(self) -> TriggerDecision: - """Evaluate all providers and determine action. - - Returns: - TriggerDecision with action and confidence details. - """ - now = time.monotonic() - - # Check if disabled - if not self._settings.enabled: - return self._make_decision(TriggerAction.IGNORE, 0.0, ()) - - # Check if snoozed - if self._snoozed_until is not None and now < self._snoozed_until: - return self._make_decision(TriggerAction.IGNORE, 0.0, ()) - - # Collect signals from all enabled providers - signals = [] - for provider in self._providers: - if not provider.is_enabled(): - continue - if signal := provider.get_signal(): - signals.append(signal) - - # Calculate total confidence - confidence = sum(s.weight for s in signals) - - # Determine action - action = self._determine_action(confidence, now) - - # Record prompt time for rate limiting - if action in (TriggerAction.NOTIFY, TriggerAction.AUTO_START): - self._last_prompt = now - logger.info( - "Trigger %s: confidence=%.2f, signals=%d", - action.value, - confidence, - len(signals), - ) - - return self._make_decision(action, confidence, tuple(signals)) - - def _determine_action(self, confidence: float, now: float) -> TriggerAction: - """Determine action based on confidence and rate limits. - - Args: - confidence: Total confidence from all signals. - now: Current monotonic time. - - Returns: - TriggerAction to take. - """ - # Check threshold_ignore first - if confidence < self._settings.threshold_ignore: - return TriggerAction.IGNORE - - # AUTO_START bypasses rate limit (high-confidence trigger should not be delayed) - if confidence >= self._settings.threshold_auto_start and self._settings.auto_start_enabled: - return TriggerAction.AUTO_START - - # Rate limit applies only to NOTIFY actions - if self._last_prompt is not None: - elapsed = now - self._last_prompt - if elapsed < self._settings.rate_limit_seconds: - return TriggerAction.IGNORE - - return TriggerAction.NOTIFY - - def _make_decision( - self, - action: TriggerAction, - confidence: float, - signals: tuple[TriggerSignal, ...], - ) -> TriggerDecision: - """Create a TriggerDecision with the given parameters.""" - return TriggerDecision( - action=action, - confidence=confidence, - signals=signals, - ) - - def snooze(self, seconds: int | None = None) -> None: - """Snooze triggers for the specified duration. - - Args: - seconds: Snooze duration in seconds (uses default if None). - """ - duration = seconds if seconds is not None else self._settings.snooze_seconds - self._snoozed_until = time.monotonic() + duration - logger.info("Triggers snoozed for %d seconds", duration) - - def clear_snooze(self) -> None: - """Clear any active snooze.""" - if self._snoozed_until is not None: - self._snoozed_until = None - logger.info("Trigger snooze cleared") - - def set_enabled(self, enabled: bool) -> None: - """Enable or disable trigger detection. - - Args: - enabled: Whether triggers should be enabled. - """ - self._settings.enabled = enabled - logger.info("Triggers %s", "enabled" if enabled else "disabled") - - def set_auto_start(self, enabled: bool) -> None: - """Enable or disable auto-start on high confidence. - - Args: - enabled: Whether auto-start should be enabled. - """ - self._settings.auto_start_enabled = enabled - logger.info("Auto-start %s", "enabled" if enabled else "disabled") -```` - ## File: src/noteflow/application/__init__.py ````python -"""NoteFlow application layer. -Contains application services that orchestrate use cases. -""" ```` ## File: src/noteflow/cli/__init__.py ````python -"""NoteFlow CLI tools.""" + ```` ## File: src/noteflow/cli/__main__.py ````python -"""Main entry point for NoteFlow CLI.""" - from noteflow.cli.retention import main - if __name__ == "__main__": main() ```` ## File: src/noteflow/client/components/_thread_mixin.py ````python -"""Mixin for background worker thread lifecycle management. - -Provides standardized thread start/stop patterns for UI components -that need background polling or timer threads. -""" - from __future__ import annotations - import threading from collections.abc import Callable - - class BackgroundWorkerMixin: - """Mixin providing background worker thread lifecycle management. - - Manages thread creation, start, stop, and cleanup for components - that need background polling loops. - - Usage: - class MyComponent(BackgroundWorkerMixin): - def __init__(self): - self._init_worker() - - def start_polling(self): - self._start_worker(self._poll_loop, "MyPoller") - - def stop_polling(self): - self._stop_worker() - - def _poll_loop(self): - while self._should_run(): - # Do work - self._wait_interval(0.1) - """ - _worker_thread: threading.Thread | None _stop_event: threading.Event - def _init_worker(self) -> None: - """Initialize worker attributes. - - Call this in __init__ of classes using this mixin. - """ self._worker_thread = None self._stop_event = threading.Event() - def _start_worker(self, target: Callable[[], None], name: str) -> None: - """Start background worker thread. - - No-op if worker is already running. - - Args: - target: Callable to run in background thread. - name: Thread name for debugging. - """ if self._worker_thread and self._worker_thread.is_alive(): return - self._stop_event.clear() self._worker_thread = threading.Thread( target=target, @@ -1542,74 +376,32 @@ class BackgroundWorkerMixin: name=name, ) self._worker_thread.start() - def _stop_worker(self, timeout: float = 1.0) -> None: - """Stop background worker thread. - - Signals stop event and waits for thread to finish. - - Args: - timeout: Maximum seconds to wait for thread join. - """ self._stop_event.set() if self._worker_thread: self._worker_thread.join(timeout=timeout) self._worker_thread = None - def _should_run(self) -> bool: - """Check if worker loop should continue. - - Returns: - True if worker should continue, False if stop requested. - """ return not self._stop_event.is_set() - def _wait_interval(self, seconds: float) -> None: - """Wait for interval, returning early if stop requested. - - Use this instead of time.sleep() in worker loops. - - Args: - seconds: Seconds to wait (returns early if stop signaled). - """ self._stop_event.wait(seconds) ```` ## File: src/noteflow/client/components/connection_panel.py ````python -"""Server connection management panel. - -Uses NoteFlowClient directly (not wrapped) and follows same callback pattern. -Does not recreate any types - imports and uses existing ones. -""" - from __future__ import annotations - import logging import threading from collections.abc import Callable from typing import TYPE_CHECKING, Final - import flet as ft - -# REUSE existing types - do not recreate from noteflow.grpc.client import NoteFlowClient, ServerInfo - if TYPE_CHECKING: from noteflow.client.state import AppState - logger = logging.getLogger(__name__) - RECONNECT_ATTEMPTS: Final[int] = 3 RECONNECT_DELAY_SECONDS: Final[float] = 2.0 - - class ConnectionPanelComponent: - """Server connection management panel. - - Uses NoteFlowClient directly (not wrapped) and follows same callback pattern. - """ - def __init__( self, state: AppState, @@ -1618,15 +410,6 @@ class ConnectionPanelComponent: on_transcript_callback: Callable[..., None] | None = None, on_connection_change_callback: Callable[[bool, str], None] | None = None, ) -> None: - """Initialize connection panel. - - Args: - state: Centralized application state. - on_connected: Callback when connected with client and server info. - on_disconnected: Callback when disconnected. - on_transcript_callback: Callback to pass to NoteFlowClient for transcripts. - on_connection_change_callback: Callback to pass to NoteFlowClient for connection changes. - """ self._state = state self._on_connected = on_connected self._on_disconnected = on_disconnected @@ -1640,23 +423,14 @@ class ConnectionPanelComponent: self._reconnect_lock = threading.Lock() self._reconnect_in_progress = False self._suppress_connection_events = False - self._server_field: ft.TextField | None = None self._connect_btn: ft.ElevatedButton | None = None self._status_text: ft.Text | None = None self._server_info_text: ft.Text | None = None - @property def client(self) -> NoteFlowClient | None: - """Get current gRPC client instance.""" return self._client - def build(self) -> ft.Column: - """Build connection panel UI. - - Returns: - Column containing connection controls and status. - """ self._status_text = ft.Text( "Not connected", size=14, @@ -1667,7 +441,6 @@ class ConnectionPanelComponent: size=12, color=ft.Colors.GREY_500, ) - self._server_field = ft.TextField( value=self._state.server_address, label="Server Address", @@ -1679,7 +452,6 @@ class ConnectionPanelComponent: on_click=self._on_connect_click, icon=ft.Icons.CLOUD_OFF, ) - return ft.Column( [ self._status_text, @@ -1688,9 +460,7 @@ class ConnectionPanelComponent: ], spacing=10, ) - def update_button_state(self) -> None: - """Update connect button state based on connection status.""" if self._connect_btn: if self._state.connected: self._connect_btn.text = "Disconnect" @@ -1699,9 +469,7 @@ class ConnectionPanelComponent: self._connect_btn.text = "Connect" self._connect_btn.icon = ft.Icons.CLOUD_OFF self._state.request_update() - def disconnect(self) -> None: - """Disconnect from server.""" self._manual_disconnect = True self._auto_reconnect_enabled = False self._cancel_reconnect() @@ -1712,45 +480,26 @@ class ConnectionPanelComponent: finally: self._suppress_connection_events = False self._client = None - self._state.connected = False self._state.server_info = None - self._update_status("Disconnected", ft.Colors.GREY_600) self.update_button_state() - - # Follow NoteFlowClient callback pattern with error handling if self._on_disconnected: try: self._on_disconnected() except Exception as e: logger.error("on_disconnected callback error: %s", e) - def _on_server_change(self, e: ft.ControlEvent) -> None: - """Handle server address change. - - Args: - e: Control event. - """ self._state.server_address = str(e.control.value) - def _on_connect_click(self, e: ft.ControlEvent) -> None: - """Handle connect/disconnect button click. - - Args: - e: Control event. - """ if self._state.connected: self.disconnect() else: self._manual_disconnect = False self._cancel_reconnect() threading.Thread(target=self._connect, daemon=True).start() - def _connect(self) -> None: - """Connect to server (background thread).""" self._update_status("Connecting...", ft.Colors.ORANGE) - try: if self._client: self._suppress_connection_events = True @@ -1758,14 +507,11 @@ class ConnectionPanelComponent: self._client.disconnect() finally: self._suppress_connection_events = False - - # Create client with callbacks - use NoteFlowClient directly self._client = NoteFlowClient( server_address=self._state.server_address, on_transcript=self._on_transcript_callback, on_connection_change=self._handle_connection_change, ) - if self._client.connect(timeout=10.0): if info := self._client.get_server_info(): self._state.connected = True @@ -1787,19 +533,10 @@ class ConnectionPanelComponent: except Exception as exc: logger.error("Connection error: %s", exc) self._update_status(f"Error: {exc}", ft.Colors.RED) - def _handle_connection_change(self, connected: bool, message: str) -> None: - """Handle connection state change from NoteFlowClient. - - Args: - connected: Connection state. - message: Status message. - """ if self._suppress_connection_events: return - self._state.connected = connected - if connected: self._auto_reconnect_enabled = True self._manual_disconnect = False @@ -1814,29 +551,18 @@ class ConnectionPanelComponent: ) elif not self._reconnect_in_progress: self._start_reconnect_loop(message) - self._state.run_on_ui_thread(self.update_button_state) - - # Forward to external callback if provided if (callback := self._on_connection_change_callback) is not None: try: self._state.run_on_ui_thread(lambda: callback(connected, message)) except Exception as e: logger.error("on_connection_change callback error: %s", e) - def _on_connect_success(self, info: ServerInfo) -> None: - """Handle successful connection (UI thread). - - Args: - info: Server info from connection. - """ self._auto_reconnect_enabled = True self._reconnect_stop_event.set() self._reconnect_in_progress = False self.update_button_state() self._update_status("Connected", ft.Colors.GREEN) - - # Update server info display if self._server_info_text: asr_status = "ready" if info.asr_ready else "not ready" self._server_info_text.value = ( @@ -1844,22 +570,16 @@ class ConnectionPanelComponent: f"ASR: {info.asr_model} ({asr_status}) | " f"Active meetings: {info.active_meetings}" ) - self._state.request_update() - - # Follow NoteFlowClient callback pattern with error handling if self._on_connected and self._client: try: self._on_connected(self._client, info) except Exception as e: logger.error("on_connected callback error: %s", e) - def _start_reconnect_loop(self, message: str) -> None: - """Start background reconnect attempts.""" with self._reconnect_lock: if self._reconnect_in_progress: return - self._reconnect_in_progress = True self._reconnect_stop_event.clear() self._reconnect_thread = threading.Thread( @@ -1868,32 +588,23 @@ class ConnectionPanelComponent: daemon=True, ) self._reconnect_thread.start() - def _reconnect_worker(self, message: str) -> None: - """Attempt to reconnect several times before giving up.""" if not self._client: self._reconnect_in_progress = False return - - # Stop streaming here to avoid audio queue growth while reconnecting. self._client.stop_streaming() - for attempt in range(1, RECONNECT_ATTEMPTS + 1): if self._reconnect_stop_event.is_set(): self._reconnect_in_progress = False return - warning = f"Disconnected: {message}. Reconnecting ({attempt}/{RECONNECT_ATTEMPTS})" if self._state.recording: warning += " - recording will stop if not reconnected." self._update_status(warning, ft.Colors.ORANGE) - if self._attempt_reconnect(): self._reconnect_in_progress = False return - self._reconnect_stop_event.wait(RECONNECT_DELAY_SECONDS) - self._reconnect_in_progress = False self._auto_reconnect_enabled = False if self._state.recording: @@ -1901,25 +612,16 @@ class ConnectionPanelComponent: else: final_message = "Reconnection failed." self._finalize_disconnect(final_message) - def _attempt_reconnect(self) -> bool: - """Attempt a single reconnect. - - Returns: - True if reconnected successfully. - """ if not self._client: return False - self._suppress_connection_events = True try: self._client.disconnect() finally: self._suppress_connection_events = False - if not self._client.connect(timeout=10.0): return False - info = self._client.get_server_info() if not info: self._suppress_connection_events = True @@ -1928,1017 +630,59 @@ class ConnectionPanelComponent: finally: self._suppress_connection_events = False return False - self._state.connected = True self._state.server_info = info self._state.run_on_ui_thread(lambda: self._on_connect_success(info)) return True - def _finalize_disconnect(self, message: str) -> None: - """Finalize disconnect after failed reconnect attempts.""" self._state.connected = False self._state.server_info = None self._update_status(message, ft.Colors.RED) self._state.run_on_ui_thread(self.update_button_state) - def handle_disconnect() -> None: if self._on_disconnected: try: self._on_disconnected() except Exception as e: logger.error("on_disconnected callback error: %s", e) - if self._client: threading.Thread(target=self._disconnect_client, daemon=True).start() - self._state.run_on_ui_thread(handle_disconnect) - def _disconnect_client(self) -> None: - """Disconnect client without triggering connection callbacks.""" if not self._client: return - self._suppress_connection_events = True try: self._client.disconnect() finally: self._suppress_connection_events = False self._client = None - def _cancel_reconnect(self) -> None: - """Stop any in-progress reconnect attempt.""" self._reconnect_stop_event.set() - def _update_status(self, message: str, color: str) -> None: - """Update status text. - - Args: - message: Status message. - color: Text color. - """ - def update() -> None: if self._status_text: self._status_text.value = message self._status_text.color = color self._state.request_update() - self._state.run_on_ui_thread(update) ```` -## File: src/noteflow/client/components/meeting_library.py -````python -"""Meeting library component for browsing and exporting meetings. - -Uses MeetingInfo, ExportResult from grpc.client and format_datetime from _formatting. -Does not recreate any types - imports and uses existing ones. -""" - -from __future__ import annotations - -import logging -import threading -import time -from collections.abc import Callable -from datetime import datetime -from typing import TYPE_CHECKING - -import flet as ft - -# REUSE existing formatting - do not recreate -from noteflow.infrastructure.export._formatting import format_datetime - -if TYPE_CHECKING: - from noteflow.client.state import AppState - from noteflow.grpc.client import MeetingInfo, NoteFlowClient - -logger = logging.getLogger(__name__) - - -class MeetingLibraryComponent: - """Meeting library for browsing and exporting meetings. - - Uses NoteFlowClient.list_meetings() and export_transcript() for data. - """ - - DIARIZATION_POLL_INTERVAL_SECONDS: float = 2.0 - - def __init__( - self, - state: AppState, - get_client: Callable[[], NoteFlowClient | None], - on_meeting_selected: Callable[[MeetingInfo], None] | None = None, - ) -> None: - """Initialize meeting library. - - Args: - state: Centralized application state. - get_client: Callable that returns current gRPC client or None. - on_meeting_selected: Callback when a meeting is selected. - """ - self._state = state - self._get_client = get_client - self._on_meeting_selected = on_meeting_selected - - # UI elements - self._search_field: ft.TextField | None = None - self._list_view: ft.ListView | None = None - self._export_btn: ft.ElevatedButton | None = None - self._analyze_btn: ft.ElevatedButton | None = None - self._rename_btn: ft.ElevatedButton | None = None - self._refresh_btn: ft.IconButton | None = None - self._column: ft.Column | None = None - - # Export dialog - self._export_dialog: ft.AlertDialog | None = None - self._format_dropdown: ft.Dropdown | None = None - - # Analyze speakers dialog - self._analyze_dialog: ft.AlertDialog | None = None - self._num_speakers_field: ft.TextField | None = None - - # Rename speakers dialog - self._rename_dialog: ft.AlertDialog | None = None - self._rename_fields: dict[str, ft.TextField] = {} - - def build(self) -> ft.Column: - """Build meeting library UI. - - Returns: - Column containing search, list, and export controls. - """ - self._search_field = ft.TextField( - label="Search meetings", - prefix_icon=ft.Icons.SEARCH, - on_change=self._on_search_change, - expand=True, - ) - self._refresh_btn = ft.IconButton( - icon=ft.Icons.REFRESH, - tooltip="Refresh meetings", - on_click=self._on_refresh_click, - ) - self._export_btn = ft.ElevatedButton( - "Export", - icon=ft.Icons.DOWNLOAD, - on_click=self._show_export_dialog, - disabled=True, - ) - self._analyze_btn = ft.ElevatedButton( - "Refine Speakers", - icon=ft.Icons.RECORD_VOICE_OVER, - on_click=self._show_analyze_dialog, - disabled=True, - ) - self._rename_btn = ft.ElevatedButton( - "Rename Speakers", - icon=ft.Icons.EDIT, - on_click=self._show_rename_dialog, - disabled=True, - ) - - self._list_view = ft.ListView( - spacing=5, - padding=10, - height=200, - ) - - self._column = ft.Column( - [ - ft.Row([self._search_field, self._refresh_btn]), - ft.Container( - content=self._list_view, - border=ft.border.all(1, ft.Colors.GREY_400), - border_radius=8, - ), - ft.Row( - [self._analyze_btn, self._rename_btn, self._export_btn], - alignment=ft.MainAxisAlignment.END, - spacing=10, - ), - ], - spacing=10, - ) - return self._column - - def refresh_meetings(self) -> None: - """Refresh meeting list from server.""" - client = self._get_client() - if not client: - logger.warning("No gRPC client available") - return - - try: - meetings = client.list_meetings(limit=50) - self._state.meetings = meetings - self._state.run_on_ui_thread(self._render_meetings) - except Exception as exc: - logger.error("Error fetching meetings: %s", exc) - - def _on_search_change(self, e: ft.ControlEvent) -> None: - """Handle search field change.""" - self._render_meetings() - - def _on_refresh_click(self, e: ft.ControlEvent) -> None: - """Handle refresh button click.""" - self.refresh_meetings() - - def _render_meetings(self) -> None: - """Render meeting list (UI thread only).""" - if not self._list_view: - return - - self._list_view.controls.clear() - - # Filter by search query - search_query = (self._search_field.value or "").lower() if self._search_field else "" - filtered_meetings = [m for m in self._state.meetings if search_query in m.title.lower()] - - for meeting in filtered_meetings: - self._list_view.controls.append(self._create_meeting_row(meeting)) - - self._state.request_update() - - def _create_meeting_row(self, meeting: MeetingInfo) -> ft.Container: - """Create a row for a meeting. - - Args: - meeting: Meeting info to display. - - Returns: - Container with meeting details. - """ - # Format datetime from timestamp - created_dt = datetime.fromtimestamp(meeting.created_at) if meeting.created_at else None - date_str = format_datetime(created_dt) - - # Format duration - duration = meeting.duration_seconds - duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else "--:--" - - is_selected = self._state.selected_meeting and self._state.selected_meeting.id == meeting.id - - row = ft.Row( - [ - ft.Column( - [ - ft.Text(meeting.title, weight=ft.FontWeight.BOLD, size=14), - ft.Text( - f"{date_str} | {meeting.state} | {meeting.segment_count} segments | {duration_str}", - size=11, - color=ft.Colors.GREY_600, - ), - ], - spacing=2, - expand=True, - ), - ] - ) - - return ft.Container( - content=row, - padding=10, - border_radius=4, - bgcolor=ft.Colors.BLUE_50 if is_selected else None, - on_click=lambda e, m=meeting: self._on_meeting_click(m), - ink=True, - ) - - def _on_meeting_click(self, meeting: MeetingInfo) -> None: - """Handle meeting row click. - - Args: - meeting: Selected meeting. - """ - self._state.selected_meeting = meeting - - # Enable action buttons - if self._export_btn: - self._export_btn.disabled = False - if self._analyze_btn: - self._analyze_btn.disabled = not self._can_refine_speakers(meeting) - if self._rename_btn: - self._rename_btn.disabled = not self._can_refine_speakers(meeting) - - # Re-render to update selection - self._render_meetings() - - # Notify callback - if self._on_meeting_selected: - self._on_meeting_selected(meeting) - - def _show_export_dialog(self, e: ft.ControlEvent) -> None: - """Show export format selection dialog.""" - if not self._state.selected_meeting: - return - - self._format_dropdown = ft.Dropdown( - label="Export Format", - options=[ - ft.dropdown.Option("markdown", "Markdown (.md)"), - ft.dropdown.Option("html", "HTML (.html)"), - ], - value="markdown", - width=200, - ) - - self._export_dialog = ft.AlertDialog( - title=ft.Text("Export Transcript"), - content=ft.Column( - [ - ft.Text(f"Meeting: {self._state.selected_meeting.title}"), - self._format_dropdown, - ], - spacing=10, - tight=True, - ), - actions=[ - ft.TextButton("Cancel", on_click=self._close_export_dialog), - ft.ElevatedButton("Export", on_click=self._do_export), - ], - actions_alignment=ft.MainAxisAlignment.END, - ) - - if self._state._page: - self._state._page.dialog = self._export_dialog - self._export_dialog.open = True - self._state.request_update() - - def _close_export_dialog(self, e: ft.ControlEvent | None = None) -> None: - """Close the export dialog.""" - if self._export_dialog: - self._export_dialog.open = False - self._state.request_update() - - def _do_export(self, e: ft.ControlEvent) -> None: - """Perform the export.""" - if not self._state.selected_meeting or not self._format_dropdown: - return - - format_name = self._format_dropdown.value or "markdown" - meeting_id = self._state.selected_meeting.id - - self._close_export_dialog() - - client = self._get_client() - if not client: - logger.warning("No gRPC client available for export") - return - - try: - if result := client.export_transcript(meeting_id, format_name): - self._save_export(result.content, result.file_extension) - else: - logger.error("Export failed - no result returned") - except Exception as exc: - logger.error("Error exporting transcript: %s", exc) - - def _save_export(self, content: str, extension: str) -> None: - """Save exported content to file. - - Args: - content: Export content. - extension: File extension. - """ - if not self._state.selected_meeting: - return - - # Create filename from meeting title - safe_title = "".join( - c if c.isalnum() or c in " -_" else "_" for c in self._state.selected_meeting.title - ) - filename = f"{safe_title}.{extension}" - - # Use FilePicker for save dialog - if self._state._page: - - def on_save(e: ft.FilePickerResultEvent) -> None: - if e.path: - try: - with open(e.path, "w", encoding="utf-8") as f: - f.write(content) - logger.info("Exported to: %s", e.path) - except OSError as exc: - logger.error("Error saving export: %s", exc) - - picker = ft.FilePicker(on_result=on_save) - self._state._page.overlay.append(picker) - self._state._page.update() - picker.save_file( - file_name=filename, - allowed_extensions=[extension], - ) - - # ========================================================================= - # Speaker Refinement Methods - # ========================================================================= - - def _show_analyze_dialog(self, e: ft.ControlEvent) -> None: - """Show speaker refinement dialog.""" - if not self._state.selected_meeting: - return - - if not self._can_refine_speakers(self._state.selected_meeting): - self._show_simple_dialog( - "Meeting still active", - ft.Text("Stop the meeting before refining speakers."), - ) - return - - self._num_speakers_field = ft.TextField( - label="Number of speakers (optional)", - hint_text="Leave empty for auto-detect", - width=200, - keyboard_type=ft.KeyboardType.NUMBER, - ) - - self._analyze_dialog = ft.AlertDialog( - title=ft.Text("Refine Speakers"), - content=ft.Column( - [ - ft.Text(f"Meeting: {self._state.selected_meeting.title}"), - ft.Text( - "Refine speaker labels using offline diarization.", - size=12, - color=ft.Colors.GREY_600, - ), - self._num_speakers_field, - ], - spacing=10, - tight=True, - ), - actions=[ - ft.TextButton("Cancel", on_click=self._close_analyze_dialog), - ft.ElevatedButton("Analyze", on_click=self._do_analyze), - ], - actions_alignment=ft.MainAxisAlignment.END, - ) - - if self._state._page: - self._state._page.dialog = self._analyze_dialog - self._analyze_dialog.open = True - self._state.request_update() - - def _close_analyze_dialog(self, e: ft.ControlEvent | None = None) -> None: - """Close the analyze dialog.""" - if self._analyze_dialog: - self._analyze_dialog.open = False - self._state.request_update() - - def _do_analyze(self, e: ft.ControlEvent) -> None: - """Perform speaker analysis.""" - if not self._state.selected_meeting: - return - - # Parse number of speakers (optional) - num_speakers: int | None = None - if self._num_speakers_field and self._num_speakers_field.value: - try: - num_speakers = int(self._num_speakers_field.value) - if num_speakers < 1: - num_speakers = None - except ValueError: - logger.debug("Invalid speaker count input '%s', using auto-detection", self._num_speakers_field.value) - - meeting_id = self._state.selected_meeting.id - self._close_analyze_dialog() - - client = self._get_client() - if not client: - logger.warning("No gRPC client available for analysis") - return - - # Show progress indicator - self._show_analysis_progress("Starting...") - - try: - result = client.refine_speaker_diarization(meeting_id, num_speakers) - except Exception as exc: - logger.error("Error analyzing speakers: %s", exc) - self._show_analysis_error(str(exc)) - return - - if not result: - self._show_analysis_error("Analysis failed - no response from server") - return - - if result.is_terminal: - if result.success: - self._show_analysis_result(result.segments_updated, result.speaker_ids) - else: - self._show_analysis_error(result.error_message or "Analysis failed") - return - - if not result.job_id: - self._show_analysis_error(result.error_message or "Server did not return job ID") - return - - # Job queued/running - poll for completion - self._show_analysis_progress(self._format_job_status(result.status)) - self._start_diarization_poll(result.job_id) - - def _show_analysis_progress(self, status: str = "Refining...") -> None: - """Show refinement in progress indicator.""" - if self._analyze_btn: - self._analyze_btn.disabled = True - self._analyze_btn.text = status - self._state.request_update() - - def _show_analysis_result(self, segments_updated: int, speaker_ids: list[str]) -> None: - """Show refinement success result. - - Args: - segments_updated: Number of segments with speaker labels. - speaker_ids: List of detected speaker IDs. - """ - if self._analyze_btn: - self._analyze_btn.disabled = False - self._analyze_btn.text = "Refine Speakers" - - speaker_list = ", ".join(speaker_ids) if speaker_ids else "None found" - - result_dialog = ft.AlertDialog( - title=ft.Text("Refinement Complete"), - content=ft.Column( - [ - ft.Text(f"Segments updated: {segments_updated}"), - ft.Text(f"Speakers found: {speaker_list}"), - ft.Text( - "Reload the meeting to see speaker labels.", - size=12, - color=ft.Colors.GREY_600, - italic=True, - ), - ], - spacing=5, - tight=True, - ), - actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))], - ) - - if self._state._page: - self._state._page.dialog = result_dialog - result_dialog.open = True - self._state.request_update() - - def _show_analysis_error(self, error_message: str) -> None: - """Show analysis error. - - Args: - error_message: Error description. - """ - if self._analyze_btn: - self._analyze_btn.disabled = False - self._analyze_btn.text = "Refine Speakers" - self._show_simple_dialog("Refinement Failed", ft.Text(error_message)) - - def _close_result_dialog(self, e: ft.ControlEvent) -> None: - """Close any result dialog.""" - if self._state._page and self._state._page.dialog: - self._state._page.dialog.open = False - self._state.request_update() - - def _start_diarization_poll(self, job_id: str) -> None: - """Start polling for diarization job completion.""" - page = self._state._page - if page and hasattr(page, "run_thread"): - page.run_thread(lambda: self._poll_diarization_job(job_id)) - return - - threading.Thread( - target=self._poll_diarization_job, - args=(job_id,), - daemon=True, - name="diarization-poll", - ).start() - - def _poll_diarization_job(self, job_id: str) -> None: - """Poll background diarization job until completion.""" - client = self._get_client() - if not client: - self._state.run_on_ui_thread( - lambda: self._show_analysis_error("No gRPC client available for polling") - ) - return - - while True: - result = client.get_diarization_job_status(job_id) - if not result: - self._state.run_on_ui_thread( - lambda: self._show_analysis_error("Failed to fetch diarization status") - ) - return - - if result.is_terminal: - if result.success: - self._state.run_on_ui_thread( - lambda r=result: self._show_analysis_result( - r.segments_updated, - r.speaker_ids, - ) - ) - else: - self._state.run_on_ui_thread( - lambda r=result: self._show_analysis_error( - r.error_message or "Diarization failed" - ) - ) - return - - # Update status text while running - self._state.run_on_ui_thread( - lambda r=result: self._show_analysis_progress(self._format_job_status(r.status)) - ) - time.sleep(self.DIARIZATION_POLL_INTERVAL_SECONDS) - - @staticmethod - def _format_job_status(status: str) -> str: - """Format job status for button label.""" - return { - "queued": "Queued...", - "running": "Refining...", - }.get(status, "Refining...") - - def _show_simple_dialog(self, title: str, content: ft.Control) -> None: - """Show a simple dialog with title, content, and OK button. - - Args: - title: Dialog title. - content: Dialog content control. - """ - dialog = ft.AlertDialog( - title=ft.Text(title), - content=content, - actions=[ft.TextButton("OK", on_click=self._close_result_dialog)], - ) - if self._state._page: - self._state._page.dialog = dialog - dialog.open = True - self._state.request_update() - - # ========================================================================= - # Speaker Rename Methods - # ========================================================================= - - def _show_rename_dialog(self, e: ft.ControlEvent) -> None: - """Show speaker rename dialog with current speaker IDs.""" - if not self._state.selected_meeting: - return - - if not self._can_refine_speakers(self._state.selected_meeting): - self._show_simple_dialog( - "Meeting still active", - ft.Text("Stop the meeting before renaming speakers."), - ) - return - - client = self._get_client() - if not client: - logger.warning("No gRPC client available") - return - - # Get segments to extract distinct speaker IDs - meeting_id = self._state.selected_meeting.id - segments = client.get_meeting_segments(meeting_id) - - # Extract distinct speaker IDs - speaker_ids = sorted({s.speaker_id for s in segments if s.speaker_id}) - - if not speaker_ids: - self._show_no_speakers_message() - return - - # Create text fields for each speaker - self._rename_fields.clear() - speaker_controls: list[ft.Control] = [] - - for speaker_id in speaker_ids: - field = ft.TextField( - label=f"{speaker_id}", - hint_text="Enter new name", - width=200, - ) - self._rename_fields[speaker_id] = field - speaker_controls.append( - ft.Row( - [ - ft.Text(speaker_id, width=120, size=12), - ft.Icon(ft.Icons.ARROW_RIGHT, size=16), - field, - ], - alignment=ft.MainAxisAlignment.START, - ) - ) - - self._rename_dialog = ft.AlertDialog( - title=ft.Text("Rename Speakers"), - content=ft.Column( - [ - ft.Text(f"Meeting: {self._state.selected_meeting.title}"), - ft.Text( - "Enter new names for speakers (leave blank to keep current):", - size=12, - color=ft.Colors.GREY_600, - ), - ft.Divider(), - *speaker_controls, - ], - spacing=10, - scroll=ft.ScrollMode.AUTO, - height=300, - ), - actions=[ - ft.TextButton("Cancel", on_click=self._close_rename_dialog), - ft.ElevatedButton("Apply", on_click=self._do_rename), - ], - actions_alignment=ft.MainAxisAlignment.END, - ) - - if self._state._page: - self._state._page.dialog = self._rename_dialog - self._rename_dialog.open = True - self._state.request_update() - - def _close_rename_dialog(self, e: ft.ControlEvent | None = None) -> None: - """Close the rename dialog.""" - if self._rename_dialog: - self._rename_dialog.open = False - self._state.request_update() - - def _show_no_speakers_message(self) -> None: - """Show message when no speakers found.""" - self._show_simple_dialog( - "No Speakers Found", - ft.Text( - "This meeting has no speaker labels. " - "Run 'Refine Speakers' first to identify speakers." - ), - ) - - def _do_rename(self, e: ft.ControlEvent) -> None: - """Apply speaker renames.""" - if not self._state.selected_meeting: - return - - client = self._get_client() - if not client: - logger.warning("No gRPC client available") - return - - meeting_id = self._state.selected_meeting.id - self._close_rename_dialog() - - # Collect renames (only non-empty values) - renames: list[tuple[str, str]] = [] - for old_id, field in self._rename_fields.items(): - new_name = (field.value or "").strip() - if new_name and new_name != old_id: - renames.append((old_id, new_name)) - - if not renames: - return - - # Apply renames - total_updated = 0 - errors: list[str] = [] - - for old_id, new_name in renames: - try: - result = client.rename_speaker(meeting_id, old_id, new_name) - if result and result.success: - total_updated += result.segments_updated - else: - errors.append(f"{old_id}: rename failed") - except Exception as exc: - logger.error("Error renaming speaker %s: %s", old_id, exc) - errors.append(f"{old_id}: {exc}") - - # Show result - if errors: - self._show_rename_errors(errors) - else: - self._show_rename_success(total_updated, len(renames)) - - def _show_rename_success(self, segments_updated: int, speakers_renamed: int) -> None: - """Show rename success message. - - Args: - segments_updated: Total number of segments updated. - speakers_renamed: Number of speakers renamed. - """ - success_dialog = ft.AlertDialog( - title=ft.Text("Rename Complete"), - content=ft.Column( - [ - ft.Text(f"Renamed {speakers_renamed} speaker(s)"), - ft.Text(f"Updated {segments_updated} segment(s)"), - ft.Text( - "Reload the meeting to see the new speaker names.", - size=12, - color=ft.Colors.GREY_600, - italic=True, - ), - ], - spacing=5, - tight=True, - ), - actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))], - ) - - if self._state._page: - self._state._page.dialog = success_dialog - success_dialog.open = True - self._state.request_update() - - def _show_rename_errors(self, errors: list[str]) -> None: - """Show rename errors. - - Args: - errors: List of error messages. - """ - self._show_simple_dialog("Rename Errors", ft.Text("\n".join(errors))) - - @staticmethod - def _can_refine_speakers(meeting: MeetingInfo) -> bool: - """Return True when meeting is stopped/completed and safe to refine/rename.""" - return meeting.state in {"stopped", "completed", "error"} -```` - -## File: src/noteflow/client/components/playback_sync.py -````python -"""Playback-transcript synchronization controller. - -Polls playback position and updates transcript highlight state. -Follows RecordingTimerComponent pattern for background threading. -""" - -from __future__ import annotations - -import logging -import threading -from collections.abc import Callable -from typing import TYPE_CHECKING, Final - -from noteflow.infrastructure.audio import PlaybackState - -if TYPE_CHECKING: - from noteflow.client.state import AppState - -logger = logging.getLogger(__name__) - -POSITION_POLL_INTERVAL: Final[float] = 0.1 # 100ms for smooth highlighting - - -class PlaybackSyncController: - """Synchronize playback position with transcript highlighting. - - Polls playback position and updates state.highlighted_segment_index. - Triggers UI updates via state.run_on_ui_thread(). - """ - - def __init__( - self, - state: AppState, - on_highlight_change: Callable[[int | None], None] | None = None, - ) -> None: - """Initialize sync controller. - - Args: - state: Centralized application state. - on_highlight_change: Callback when highlighted segment changes. - """ - self._state = state - self._on_highlight_change = on_highlight_change - self._sync_thread: threading.Thread | None = None - self._stop_event = threading.Event() - - def start(self) -> None: - """Start position sync polling.""" - if self._sync_thread and self._sync_thread.is_alive(): - return - - self._stop_event.clear() - self._sync_thread = threading.Thread( - target=self._sync_loop, - daemon=True, - name="PlaybackSyncController", - ) - self._sync_thread.start() - logger.debug("Started playback sync controller") - - def stop(self) -> None: - """Stop position sync polling.""" - self._stop_event.set() - if self._sync_thread: - self._sync_thread.join(timeout=2.0) - self._sync_thread = None - logger.debug("Stopped playback sync controller") - - def _sync_loop(self) -> None: - """Background sync loop - polls position and updates highlight.""" - while not self._stop_event.is_set(): - playback = self._state.playback - - if playback.state == PlaybackState.PLAYING: - position = playback.current_position - self._update_position(position) - elif playback.state == PlaybackState.STOPPED: - # Clear highlight when stopped - if self._state.highlighted_segment_index is not None: - self._state.highlighted_segment_index = None - self._state.run_on_ui_thread(self._notify_highlight_change) - - self._stop_event.wait(POSITION_POLL_INTERVAL) - - def _update_position(self, position: float) -> None: - """Update state with current position and find matching segment.""" - self._state.playback_position = position - - new_index = self._state.find_segment_at_position(position) - old_index = self._state.highlighted_segment_index - - if new_index != old_index: - self._state.highlighted_segment_index = new_index - self._state.run_on_ui_thread(self._notify_highlight_change) - - def _notify_highlight_change(self) -> None: - """Notify UI of highlight change (UI thread only).""" - if self._on_highlight_change: - try: - self._on_highlight_change(self._state.highlighted_segment_index) - except Exception as e: - logger.error("Highlight change callback error: %s", e) - - self._state.request_update() - - def seek_to_segment(self, segment_index: int) -> bool: - """Seek playback to start of specified segment. - - Args: - segment_index: Index into state.transcript_segments. - - Returns: - True if seek was successful. - """ - segments = self._state.transcript_segments - if not (0 <= segment_index < len(segments)): - logger.warning("Invalid segment index: %d", segment_index) - return False - - playback = self._state.playback - segment = segments[segment_index] - - if playback.seek(segment.start_time): - self._state.highlighted_segment_index = segment_index - self._state.playback_position = segment.start_time - self._state.run_on_ui_thread(self._notify_highlight_change) - return True - - return False -```` - ## File: src/noteflow/client/components/vu_meter.py ````python -"""VU meter component for audio level visualization. - -Uses RmsLevelProvider from AppState (not a new instance). -""" - from __future__ import annotations - from typing import TYPE_CHECKING - import flet as ft import numpy as np from numpy.typing import NDArray - if TYPE_CHECKING: from noteflow.client.state import AppState - - class VuMeterComponent: - """Audio level visualization component. - - Uses RmsLevelProvider from AppState (not a new instance). - """ - def __init__(self, state: AppState) -> None: - """Initialize VU meter component. - - Args: - state: Centralized application state with level_provider. - """ self._state = state - # REUSE level_provider from state - do not create new instance self._progress_bar: ft.ProgressBar | None = None self._label: ft.Text | None = None - def build(self) -> ft.Row: - """Build VU meter UI elements. - - Returns: - Row containing progress bar and level label. - """ self._progress_bar = ft.ProgressBar( value=0, width=300, @@ -2947,7 +691,6 @@ class VuMeterComponent: bgcolor=ft.Colors.GREY_300, ) self._label = ft.Text("-60 dB", size=12, width=60) - return ft.Row( [ ft.Text("Level:", size=12), @@ -2955,255 +698,49 @@ class VuMeterComponent: self._label, ] ) - def on_audio_frames(self, frames: NDArray[np.float32]) -> None: - """Process incoming audio frames for level metering. - - Uses state.level_provider.get_db() - existing RmsLevelProvider method. - - Args: - frames: Audio samples as float32 array. - """ - # REUSE existing RmsLevelProvider from state db_level = self._state.level_provider.get_db(frames) self._state.current_db_level = db_level self._state.run_on_ui_thread(self._update_display) - def _update_display(self) -> None: - """Update VU meter display (UI thread only).""" if not self._progress_bar or not self._label: return - db = self._state.current_db_level - # Convert dB to 0-1 range (-60 to 0 dB) normalized = max(0.0, min(1.0, (db + 60) / 60)) - self._progress_bar.value = normalized self._progress_bar.color = ( ft.Colors.RED if db > -6 else ft.Colors.YELLOW if db > -20 else ft.Colors.GREEN ) self._label.value = f"{db:.0f} dB" - self._state.request_update() ```` ## File: src/noteflow/client/__init__.py ````python -"""NoteFlow client application.""" + ```` -## File: src/noteflow/client/_trigger_mixin.py +## File: src/noteflow/config/constants.py ````python -"""Trigger detection mixin for NoteFlow client. - -Extracts trigger detection logic from app.py to keep file under 750 lines. -Handles meeting detection triggers via app audio activity and calendar proximity. -""" - from __future__ import annotations - -import asyncio -import logging -from typing import TYPE_CHECKING, Protocol - -import flet as ft - -from noteflow.application.services import TriggerService, TriggerServiceSettings -from noteflow.config.settings import TriggerSettings, get_trigger_settings -from noteflow.domain.triggers import TriggerAction, TriggerDecision -from noteflow.infrastructure.triggers import ( - AppAudioProvider, - AppAudioSettings, - CalendarProvider, - CalendarSettings, -) -from noteflow.infrastructure.triggers.calendar import parse_calendar_events - -if TYPE_CHECKING: - from noteflow.client.state import AppState - -logger = logging.getLogger(__name__) - - -class TriggerHost(Protocol): - """Protocol for app hosting trigger mixin.""" - - _state: AppState - _trigger_settings: TriggerSettings | None - _trigger_service: TriggerService | None - _app_audio: AppAudioProvider | None - _calendar_provider: CalendarProvider | None - _trigger_poll_interval: float - _trigger_task: asyncio.Task | None - - def _start_recording(self) -> None: - """Start recording audio.""" - ... - - def _ensure_audio_capture(self) -> bool: - """Ensure audio capture is running.""" - ... - - -class TriggerMixin: - """Mixin providing trigger detection functionality. - - Requires host to implement TriggerHost protocol. - """ - - def _initialize_triggers(self: TriggerHost) -> None: - """Initialize trigger settings, providers, and service.""" - self._trigger_settings = get_trigger_settings() - self._state.trigger_enabled = self._trigger_settings.trigger_enabled - self._trigger_poll_interval = self._trigger_settings.trigger_poll_interval_seconds - meeting_apps = {app.lower() for app in self._trigger_settings.trigger_meeting_apps} - suppressed_apps = {app.lower() for app in self._trigger_settings.trigger_suppressed_apps} - - app_audio_settings = AppAudioSettings( - enabled=self._trigger_settings.trigger_audio_enabled, - threshold_db=self._trigger_settings.trigger_audio_threshold_db, - window_seconds=self._trigger_settings.trigger_audio_window_seconds, - min_active_ratio=self._trigger_settings.trigger_audio_min_active_ratio, - min_samples=self._trigger_settings.trigger_audio_min_samples, - max_history=self._trigger_settings.trigger_audio_max_history, - weight=self._trigger_settings.trigger_weight_audio, - meeting_apps=meeting_apps, - suppressed_apps=suppressed_apps, - ) - calendar_settings = CalendarSettings( - enabled=self._trigger_settings.trigger_calendar_enabled, - weight=self._trigger_settings.trigger_weight_calendar, - lookahead_minutes=self._trigger_settings.trigger_calendar_lookahead_minutes, - lookbehind_minutes=self._trigger_settings.trigger_calendar_lookbehind_minutes, - events=parse_calendar_events(self._trigger_settings.trigger_calendar_events), - ) - - self._app_audio = AppAudioProvider(app_audio_settings) - self._calendar_provider = CalendarProvider(calendar_settings) - self._trigger_service = TriggerService( - providers=[self._app_audio, self._calendar_provider], - settings=TriggerServiceSettings( - enabled=self._trigger_settings.trigger_enabled, - auto_start_enabled=self._trigger_settings.trigger_auto_start, - rate_limit_seconds=self._trigger_settings.trigger_rate_limit_minutes * 60, - snooze_seconds=self._trigger_settings.trigger_snooze_minutes * 60, - threshold_ignore=self._trigger_settings.trigger_confidence_ignore, - threshold_auto_start=self._trigger_settings.trigger_confidence_auto, - ), - ) - - def _should_keep_capture_running(self: TriggerHost) -> bool: - """Return True if background audio capture should remain active.""" - return False - - async def _trigger_check_loop(self: TriggerHost) -> None: - """Background loop to check trigger conditions. - - Runs every poll interval while not recording. - """ - check_interval = self._trigger_poll_interval - try: - while True: - await asyncio.sleep(check_interval) - - # Skip if recording or trigger pending - if self._state.recording or self._state.trigger_pending: - continue - - # Skip if triggers disabled - if not self._state.trigger_enabled or not self._trigger_service: - continue - - # Evaluate triggers - decision = self._trigger_service.evaluate() - self._state.trigger_decision = decision - - if decision.action == TriggerAction.IGNORE: - continue - - if decision.action == TriggerAction.AUTO_START: - # Auto-start if connected - if self._state.connected: - logger.info( - "Auto-starting recording (confidence=%.2f)", decision.confidence - ) - self._start_recording() - elif decision.action == TriggerAction.NOTIFY: - # Show prompt to user - self._show_trigger_prompt(decision) - except asyncio.CancelledError: - logger.debug("Trigger loop cancelled") - raise - - def _show_trigger_prompt(self: TriggerHost, decision: TriggerDecision) -> None: - """Show trigger notification prompt to user. - - Args: - decision: Trigger decision with confidence and signals. - """ - self._state.trigger_pending = True - - # Build signal description - signal_desc = ", ".join(s.app_name or s.source.value for s in decision.signals) - - def handle_start(_: ft.ControlEvent) -> None: - self._state.trigger_pending = False - if dialog.open: - dialog.open = False - self._state.request_update() - if self._state.connected: - self._start_recording() - - def handle_snooze(_: ft.ControlEvent) -> None: - self._state.trigger_pending = False - if self._trigger_service: - self._trigger_service.snooze() - if dialog.open: - dialog.open = False - self._state.request_update() - - def handle_dismiss(_: ft.ControlEvent) -> None: - self._state.trigger_pending = False - if dialog.open: - dialog.open = False - self._state.request_update() - - dialog = ft.AlertDialog( - title=ft.Text("Meeting Detected"), - content=ft.Text( - "Detected: " - f"{signal_desc}\n" - f"Confidence: {decision.confidence:.0%}\n\n" - "Start recording?" - ), - actions=[ - ft.TextButton("Start", on_click=handle_start), - ft.TextButton("Snooze", on_click=handle_snooze), - ft.TextButton("Dismiss", on_click=handle_dismiss), - ], - actions_alignment=ft.MainAxisAlignment.END, - ) - - if self._state._page: - self._state._page.dialog = dialog - dialog.open = True - self._state.request_update() +from typing import Final +DEFAULT_SAMPLE_RATE: Final[int] = 16000 +POSITION_UPDATE_INTERVAL: Final[float] = 0.1 +DEFAULT_GRPC_PORT: Final[int] = 50051 +MAX_GRPC_MESSAGE_SIZE: Final[int] = 100 * 1024 * 1024 ```` ## File: src/noteflow/core/__init__.py ````python -"""Core types and protocols for NoteFlow.""" + ```` ## File: src/noteflow/domain/entities/__init__.py ````python -"""Domain entities for NoteFlow.""" - from .annotation import Annotation from .meeting import Meeting from .segment import Segment, WordTiming from .summary import ActionItem, KeyPoint, Summary - __all__ = [ "ActionItem", "Annotation", @@ -3217,30 +754,14 @@ __all__ = [ ## File: src/noteflow/domain/entities/annotation.py ````python -"""Annotation entity for user-created annotations during recording. - -Distinct from LLM-extracted ActionItem/KeyPoint in summaries. -""" - from __future__ import annotations - from dataclasses import dataclass, field from datetime import datetime from typing import TYPE_CHECKING - if TYPE_CHECKING: from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId - - @dataclass class Annotation: - """User-created annotation during recording. - - Evidence-linked to specific transcript segments for navigation. - Unlike ActionItem/KeyPoint (LLM-extracted from Summary), annotations - are created in real-time during recording and belong directly to Meeting. - """ - id: AnnotationId meeting_id: MeetingId annotation_type: AnnotationType @@ -3249,53 +770,32 @@ class Annotation: end_time: float segment_ids: list[int] = field(default_factory=list) created_at: datetime = field(default_factory=datetime.now) - - # Database primary key (set after persistence) db_id: int | None = None - def __post_init__(self) -> None: - """Validate annotation data.""" if self.end_time < self.start_time: raise ValueError( f"end_time ({self.end_time}) must be >= start_time ({self.start_time})" ) - @property def duration(self) -> float: - """Annotation duration in seconds.""" return self.end_time - self.start_time - def has_segments(self) -> bool: - """Check if annotation is linked to transcript segments.""" return len(self.segment_ids) > 0 ```` ## File: src/noteflow/domain/entities/meeting.py ````python -"""Meeting aggregate root entity.""" - from __future__ import annotations - from dataclasses import dataclass, field from datetime import datetime from typing import TYPE_CHECKING from uuid import UUID, uuid4 - from noteflow.domain.value_objects import MeetingId, MeetingState - if TYPE_CHECKING: from noteflow.domain.entities.segment import Segment from noteflow.domain.entities.summary import Summary - - @dataclass class Meeting: - """Meeting aggregate root. - - The central entity representing a recorded meeting with its - transcript segments and optional summary. - """ - id: MeetingId title: str state: MeetingState = MeetingState.CREATED @@ -3305,29 +805,17 @@ class Meeting: segments: list[Segment] = field(default_factory=list) summary: Summary | None = None metadata: dict[str, str] = field(default_factory=dict) - wrapped_dek: bytes | None = None # Encrypted data encryption key - + wrapped_dek: bytes | None = None @classmethod def create( cls, title: str = "", metadata: dict[str, str] | None = None, ) -> Meeting: - """Factory method to create a new meeting. - - Args: - title: Optional meeting title. - metadata: Optional metadata dictionary. - - Returns: - New Meeting instance. - """ meeting_id = MeetingId(uuid4()) now = datetime.now() - if not title: title = f"Meeting {now.strftime('%Y-%m-%d %H:%M')}" - return cls( id=meeting_id, title=title, @@ -3335,7 +823,6 @@ class Meeting: created_at=now, metadata=metadata or {}, ) - @classmethod def from_uuid_str( cls, @@ -3348,21 +835,6 @@ class Meeting: metadata: dict[str, str] | None = None, wrapped_dek: bytes | None = None, ) -> Meeting: - """Create meeting with existing UUID string. - - Args: - uuid_str: UUID string for meeting ID. - title: Meeting title. - state: Meeting state. - created_at: Creation timestamp. - started_at: Start timestamp. - ended_at: End timestamp. - metadata: Meeting metadata. - wrapped_dek: Encrypted data encryption key. - - Returns: - Meeting instance with specified ID. - """ meeting_id = MeetingId(UUID(uuid_str)) return cls( id=meeting_id, @@ -3374,227 +846,115 @@ class Meeting: metadata=metadata or {}, wrapped_dek=wrapped_dek, ) - def start_recording(self) -> None: - """Transition to recording state. - - Raises: - ValueError: If transition is not valid. - """ if not self.state.can_transition_to(MeetingState.RECORDING): raise ValueError(f"Cannot start recording from state {self.state.name}") self.state = MeetingState.RECORDING self.started_at = datetime.now() - def begin_stopping(self) -> None: - """Transition to stopping state for graceful shutdown. - - This intermediate state allows audio writers and other resources - to flush and close properly before the meeting is fully stopped. - - Raises: - ValueError: If transition is not valid. - """ if not self.state.can_transition_to(MeetingState.STOPPING): raise ValueError(f"Cannot begin stopping from state {self.state.name}") self.state = MeetingState.STOPPING - def stop_recording(self) -> None: - """Transition to stopped state (from STOPPING). - - Raises: - ValueError: If transition is not valid. - """ if not self.state.can_transition_to(MeetingState.STOPPED): raise ValueError(f"Cannot stop recording from state {self.state.name}") self.state = MeetingState.STOPPED if self.ended_at is None: self.ended_at = datetime.now() - def complete(self) -> None: - """Transition to completed state. - - Raises: - ValueError: If transition is not valid. - """ if not self.state.can_transition_to(MeetingState.COMPLETED): raise ValueError(f"Cannot complete from state {self.state.name}") self.state = MeetingState.COMPLETED - def mark_error(self) -> None: - """Transition to error state.""" self.state = MeetingState.ERROR - def add_segment(self, segment: Segment) -> None: - """Add a transcript segment. - - Args: - segment: Segment to add. - """ self.segments.append(segment) - def set_summary(self, summary: Summary) -> None: - """Set the meeting summary. - - Args: - summary: Summary to set. - """ self.summary = summary - @property def duration_seconds(self) -> float: - """Calculate meeting duration in seconds.""" if self.ended_at and self.started_at: return (self.ended_at - self.started_at).total_seconds() if self.started_at: return (datetime.now() - self.started_at).total_seconds() return 0.0 - @property def next_segment_id(self) -> int: - """Get the next available segment ID.""" return max(s.segment_id for s in self.segments) + 1 if self.segments else 0 - @property def segment_count(self) -> int: - """Number of transcript segments.""" return len(self.segments) - @property def full_transcript(self) -> str: - """Concatenate all segment text.""" return " ".join(s.text for s in self.segments) - def is_active(self) -> bool: - """Check if meeting is in an active state (created or recording). - - Note: STOPPING is not considered active as it's transitioning to stopped. - """ return self.state in (MeetingState.CREATED, MeetingState.RECORDING) - def has_summary(self) -> bool: - """Check if meeting has a summary.""" return self.summary is not None ```` ## File: src/noteflow/domain/entities/summary.py ````python -"""Summary-related entities for meeting summaries.""" - from __future__ import annotations - from dataclasses import dataclass, field from datetime import datetime from typing import TYPE_CHECKING - if TYPE_CHECKING: from noteflow.domain.value_objects import MeetingId - - @dataclass class KeyPoint: - """A key point extracted from the meeting. - - Evidence-linked to specific transcript segments for verification. - """ - text: str segment_ids: list[int] = field(default_factory=list) start_time: float = 0.0 end_time: float = 0.0 - - # Database primary key (set after persistence) db_id: int | None = None - def has_evidence(self) -> bool: - """Check if key point is backed by transcript evidence.""" return len(self.segment_ids) > 0 - - @dataclass class ActionItem: - """An action item extracted from the meeting. - - Evidence-linked to specific transcript segments for verification. - """ - text: str assignee: str = "" due_date: datetime | None = None - priority: int = 0 # 0=unspecified, 1=low, 2=medium, 3=high + priority: int = 0 segment_ids: list[int] = field(default_factory=list) - - # Database primary key (set after persistence) db_id: int | None = None - def has_evidence(self) -> bool: - """Check if action item is backed by transcript evidence.""" return len(self.segment_ids) > 0 - def is_assigned(self) -> bool: - """Check if action item has an assignee.""" return bool(self.assignee) - def has_due_date(self) -> bool: - """Check if action item has a due date.""" return self.due_date is not None - - @dataclass class Summary: - """Meeting summary entity. - - Contains executive summary, key points, and action items, - all evidence-linked to transcript segments. - """ - meeting_id: MeetingId executive_summary: str = "" key_points: list[KeyPoint] = field(default_factory=list) action_items: list[ActionItem] = field(default_factory=list) generated_at: datetime | None = None model_version: str = "" - - # Database primary key (set after persistence) db_id: int | None = None - def all_points_have_evidence(self) -> bool: - """Check if all key points have transcript evidence.""" return all(kp.has_evidence() for kp in self.key_points) - def all_actions_have_evidence(self) -> bool: - """Check if all action items have transcript evidence.""" return all(ai.has_evidence() for ai in self.action_items) - def is_fully_evidenced(self) -> bool: - """Check if entire summary is backed by transcript evidence.""" return self.all_points_have_evidence() and self.all_actions_have_evidence() - @property def key_point_count(self) -> int: - """Number of key points.""" return len(self.key_points) - @property def action_item_count(self) -> int: - """Number of action items.""" return len(self.action_items) - @property def unevidenced_points(self) -> list[KeyPoint]: - """Key points without transcript evidence.""" return [kp for kp in self.key_points if not kp.has_evidence()] - @property def unevidenced_actions(self) -> list[ActionItem]: - """Action items without transcript evidence.""" return [ai for ai in self.action_items if not ai.has_evidence()] ```` ## File: src/noteflow/domain/ports/__init__.py ````python -"""Domain ports (interfaces) for NoteFlow.""" - from .repositories import ( AnnotationRepository, MeetingRepository, @@ -3602,7 +962,6 @@ from .repositories import ( SummaryRepository, ) from .unit_of_work import UnitOfWork - __all__ = [ "AnnotationRepository", "MeetingRepository", @@ -3614,12 +973,8 @@ __all__ = [ ## File: src/noteflow/domain/ports/unit_of_work.py ````python -"""Unit of Work protocol for transaction management.""" - from __future__ import annotations - from typing import TYPE_CHECKING, Protocol, Self - if TYPE_CHECKING: from .repositories import ( AnnotationRepository, @@ -3627,73 +982,28 @@ if TYPE_CHECKING: SegmentRepository, SummaryRepository, ) - - class UnitOfWork(Protocol): - """Unit of Work protocol for managing transactions across repositories. - - Provides transactional consistency when operating on multiple - aggregates. Use as a context manager for automatic commit/rollback. - - Example: - async with uow: - meeting = await uow.meetings.get(meeting_id) - await uow.segments.add(meeting_id, segment) - await uow.commit() - """ - annotations: AnnotationRepository meetings: MeetingRepository segments: SegmentRepository summaries: SummaryRepository - async def __aenter__(self) -> Self: - """Enter the unit of work context. - - Returns: - Self for use in async with statement. - """ ... - async def __aexit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: object, ) -> None: - """Exit the unit of work context. - - Rolls back on exception, otherwise commits. - - Args: - exc_type: Exception type if raised. - exc_val: Exception value if raised. - exc_tb: Exception traceback if raised. - """ ... - async def commit(self) -> None: - """Commit the current transaction. - - Persists all changes made within the unit of work. - """ ... - async def rollback(self) -> None: - """Rollback the current transaction. - - Discards all changes made within the unit of work. - """ ... ```` ## File: src/noteflow/domain/summarization/__init__.py ````python -"""Summarization domain module. - -Provides protocols and data transfer objects for meeting summarization. -""" - from noteflow.domain.summarization.ports import ( CitationVerificationResult, CitationVerifier, @@ -3705,7 +1015,6 @@ from noteflow.domain.summarization.ports import ( SummarizationTimeoutError, SummarizerProvider, ) - __all__ = [ "CitationVerificationResult", "CitationVerifier", @@ -3721,177 +1030,80 @@ __all__ = [ ## File: src/noteflow/domain/summarization/ports.py ````python -"""Summarization provider port protocols.""" - from __future__ import annotations - from dataclasses import dataclass, field from typing import TYPE_CHECKING, Protocol - if TYPE_CHECKING: from collections.abc import Sequence - from noteflow.domain.entities import Segment, Summary from noteflow.domain.value_objects import MeetingId - - @dataclass(frozen=True) class SummarizationRequest: - """Request for meeting summarization. - - Contains the meeting context needed for summary generation. - """ - meeting_id: MeetingId segments: Sequence[Segment] max_key_points: int = 5 max_action_items: int = 10 - @property def transcript_text(self) -> str: - """Concatenate all segment text into a single transcript.""" return " ".join(seg.text for seg in self.segments) - @property def segment_count(self) -> int: - """Number of segments in the request.""" return len(self.segments) - @property def total_duration(self) -> float: - """Total duration of all segments in seconds.""" if not self.segments: return 0.0 return self.segments[-1].end_time - self.segments[0].start_time - - @dataclass(frozen=True) class SummarizationResult: - """Result from summarization provider. - - Contains the generated summary along with metadata. - """ - summary: Summary model_name: str provider_name: str tokens_used: int | None = None latency_ms: float = 0.0 - @property def is_success(self) -> bool: - """Check if summarization succeeded with content.""" return bool(self.summary.executive_summary) - - @dataclass(frozen=True) class CitationVerificationResult: - """Result of citation verification. - - Identifies which citations are valid and which are invalid. - """ - is_valid: bool invalid_key_point_indices: tuple[int, ...] = field(default_factory=tuple) invalid_action_item_indices: tuple[int, ...] = field(default_factory=tuple) missing_segment_ids: tuple[int, ...] = field(default_factory=tuple) - @property def invalid_count(self) -> int: - """Total number of invalid citations.""" return len(self.invalid_key_point_indices) + len(self.invalid_action_item_indices) - - class SummarizerProvider(Protocol): - """Protocol for LLM summarization providers. - - Implementations must provide async summarization with evidence linking. - """ - @property def provider_name(self) -> str: - """Provider identifier (e.g., 'mock', 'ollama', 'openai').""" ... - @property def is_available(self) -> bool: - """Check if provider is configured and available.""" ... - @property def requires_cloud_consent(self) -> bool: - """Return True if data is sent to external services. - - Cloud providers must return True to ensure explicit user consent. - """ ... - async def summarize(self, request: SummarizationRequest) -> SummarizationResult: - """Generate evidence-linked summary from transcript segments. - - Args: - request: Summarization request with segments and constraints. - - Returns: - SummarizationResult with generated summary and metadata. - - Raises: - SummarizationError: If summarization fails. - """ ... - - class CitationVerifier(Protocol): - """Protocol for verifying evidence citations. - - Validates that segment_ids in summaries reference actual segments. - """ - def verify_citations( self, summary: Summary, segments: Sequence[Segment], ) -> CitationVerificationResult: - """Verify all segment_ids exist in the transcript. - - Args: - summary: Summary with key points and action items to verify. - segments: Available transcript segments. - - Returns: - CitationVerificationResult with validation status and details. - """ ... - - class SummarizationError(Exception): - """Base exception for summarization errors.""" - pass - - class ProviderUnavailableError(SummarizationError): - """Provider is not available or not configured.""" - pass - - class SummarizationTimeoutError(SummarizationError): - """Summarization operation timed out.""" - pass - - class InvalidResponseError(SummarizationError): - """Provider returned an invalid or unparseable response.""" - pass ```` ## File: src/noteflow/domain/triggers/__init__.py ````python -"""Trigger domain package.""" - from noteflow.domain.triggers.entities import ( TriggerAction, TriggerDecision, @@ -3899,7 +1111,6 @@ from noteflow.domain.triggers.entities import ( TriggerSource, ) from noteflow.domain.triggers.ports import SignalProvider - __all__ = [ "SignalProvider", "TriggerAction", @@ -3911,114 +1122,1527 @@ __all__ = [ ## File: src/noteflow/domain/triggers/ports.py ````python -"""Trigger signal provider port protocol. - -Define the interface for signal providers that detect meeting conditions. -""" - from __future__ import annotations - from typing import TYPE_CHECKING, Protocol - if TYPE_CHECKING: from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource - - class SignalProvider(Protocol): - """Protocol for trigger signal providers. - - Signal providers detect specific conditions (audio activity, foreground app, etc.) - and return weighted signals used in trigger evaluation. - - Each provider: - - Has a specific source type - - Has a maximum weight contribution - - Can be enabled/disabled - - Returns a signal when conditions are met, None otherwise - """ - @property def source(self) -> TriggerSource: - """Get the source type for this provider.""" ... - @property def max_weight(self) -> float: - """Get the maximum weight this provider can contribute.""" ... - def get_signal(self) -> TriggerSignal | None: - """Get current signal if conditions are met. - - Returns: - TriggerSignal if provider conditions are satisfied, None otherwise. - """ ... - def is_enabled(self) -> bool: - """Check if this provider is enabled. - - Returns: - True if provider is enabled and can produce signals. - """ ... ```` ## File: src/noteflow/domain/__init__.py ````python -"""NoteFlow domain layer.""" - from .value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState - __all__ = ["AnnotationId", "AnnotationType", "MeetingId", "MeetingState"] ```` +## File: src/noteflow/grpc/_mixins/__init__.py +````python +from .annotation import AnnotationMixin +from .diarization import DiarizationMixin +from .export import ExportMixin +from .meeting import MeetingMixin +from .streaming import StreamingMixin +from .summarization import SummarizationMixin +__all__ = [ + "AnnotationMixin", + "DiarizationMixin", + "ExportMixin", + "MeetingMixin", + "StreamingMixin", + "SummarizationMixin", +] +```` + +## File: src/noteflow/grpc/_mixins/annotation.py +````python +from __future__ import annotations +from typing import TYPE_CHECKING +from uuid import UUID, uuid4 +import grpc.aio +from noteflow.domain.entities import Annotation +from noteflow.domain.value_objects import AnnotationId, MeetingId +from ..proto import noteflow_pb2 +from .converters import annotation_to_proto, proto_to_annotation_type +if TYPE_CHECKING: + from .protocols import ServicerHost +class AnnotationMixin: + async def AddAnnotation( + self: ServicerHost, + request: noteflow_pb2.AddAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + annotation_type = proto_to_annotation_type(request.annotation_type) + annotation = Annotation( + id=AnnotationId(uuid4()), + meeting_id=MeetingId(UUID(request.meeting_id)), + annotation_type=annotation_type, + text=request.text, + start_time=request.start_time, + end_time=request.end_time, + segment_ids=list(request.segment_ids), + ) + async with self._create_uow() as uow: + saved = await uow.annotations.add(annotation) + await uow.commit() + return annotation_to_proto(saved) + async def GetAnnotation( + self: ServicerHost, + request: noteflow_pb2.GetAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + async with self._create_uow() as uow: + annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) + if annotation is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) + return annotation_to_proto(annotation) + async def ListAnnotations( + self: ServicerHost, + request: noteflow_pb2.ListAnnotationsRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ListAnnotationsResponse: + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + async with self._create_uow() as uow: + meeting_id = MeetingId(UUID(request.meeting_id)) + if request.start_time > 0 or request.end_time > 0: + annotations = await uow.annotations.get_by_time_range( + meeting_id, + request.start_time, + request.end_time, + ) + else: + annotations = await uow.annotations.get_by_meeting(meeting_id) + return noteflow_pb2.ListAnnotationsResponse( + annotations=[annotation_to_proto(a) for a in annotations] + ) + async def UpdateAnnotation( + self: ServicerHost, + request: noteflow_pb2.UpdateAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Annotation: + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + async with self._create_uow() as uow: + annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) + if annotation is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) + if request.annotation_type != noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED: + annotation.annotation_type = proto_to_annotation_type(request.annotation_type) + if request.text: + annotation.text = request.text + if request.start_time > 0: + annotation.start_time = request.start_time + if request.end_time > 0: + annotation.end_time = request.end_time + if request.segment_ids: + annotation.segment_ids = list(request.segment_ids) + updated = await uow.annotations.update(annotation) + await uow.commit() + return annotation_to_proto(updated) + async def DeleteAnnotation( + self: ServicerHost, + request: noteflow_pb2.DeleteAnnotationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DeleteAnnotationResponse: + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Annotations require database persistence", + ) + async with self._create_uow() as uow: + success = await uow.annotations.delete(AnnotationId(UUID(request.annotation_id))) + if success: + await uow.commit() + return noteflow_pb2.DeleteAnnotationResponse(success=True) + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Annotation {request.annotation_id} not found", + ) +```` + +## File: src/noteflow/grpc/_mixins/converters.py +````python +from __future__ import annotations +import time +from typing import TYPE_CHECKING +from noteflow.application.services.export_service import ExportFormat +from noteflow.domain.entities import Annotation, Meeting, Segment, Summary +from noteflow.domain.value_objects import AnnotationType, MeetingId +from noteflow.infrastructure.converters import AsrConverter +from ..proto import noteflow_pb2 +if TYPE_CHECKING: + from noteflow.infrastructure.asr.dto import AsrResult +def meeting_to_proto( + meeting: Meeting, + include_segments: bool = True, + include_summary: bool = True, +) -> noteflow_pb2.Meeting: + segments = [] + if include_segments: + for seg in meeting.segments: + words = [ + noteflow_pb2.WordTiming( + word=w.word, + start_time=w.start_time, + end_time=w.end_time, + probability=w.probability, + ) + for w in seg.words + ] + segments.append( + noteflow_pb2.FinalSegment( + segment_id=seg.segment_id, + text=seg.text, + start_time=seg.start_time, + end_time=seg.end_time, + words=words, + language=seg.language, + language_confidence=seg.language_confidence, + avg_logprob=seg.avg_logprob, + no_speech_prob=seg.no_speech_prob, + speaker_id=seg.speaker_id or "", + speaker_confidence=seg.speaker_confidence, + ) + ) + summary = None + if include_summary and meeting.summary: + summary = summary_to_proto(meeting.summary) + return noteflow_pb2.Meeting( + id=str(meeting.id), + title=meeting.title, + state=meeting.state.value, + created_at=meeting.created_at.timestamp(), + started_at=meeting.started_at.timestamp() if meeting.started_at else 0, + ended_at=meeting.ended_at.timestamp() if meeting.ended_at else 0, + duration_seconds=meeting.duration_seconds, + segments=segments, + summary=summary, + metadata=meeting.metadata, + ) +def summary_to_proto(summary: Summary) -> noteflow_pb2.Summary: + key_points = [ + noteflow_pb2.KeyPoint( + text=kp.text, + segment_ids=kp.segment_ids, + start_time=kp.start_time, + end_time=kp.end_time, + ) + for kp in summary.key_points + ] + action_items = [ + noteflow_pb2.ActionItem( + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date.timestamp() if ai.due_date is not None else 0, + priority=ai.priority, + segment_ids=ai.segment_ids, + ) + for ai in summary.action_items + ] + return noteflow_pb2.Summary( + meeting_id=str(summary.meeting_id), + executive_summary=summary.executive_summary, + key_points=key_points, + action_items=action_items, + generated_at=(summary.generated_at.timestamp() if summary.generated_at is not None else 0), + model_version=summary.model_version, + ) +def segment_to_proto_update( + meeting_id: str, + segment: Segment, +) -> noteflow_pb2.TranscriptUpdate: + words = [ + noteflow_pb2.WordTiming( + word=w.word, + start_time=w.start_time, + end_time=w.end_time, + probability=w.probability, + ) + for w in segment.words + ] + final_segment = noteflow_pb2.FinalSegment( + segment_id=segment.segment_id, + text=segment.text, + start_time=segment.start_time, + end_time=segment.end_time, + words=words, + language=segment.language, + language_confidence=segment.language_confidence, + avg_logprob=segment.avg_logprob, + no_speech_prob=segment.no_speech_prob, + speaker_id=segment.speaker_id or "", + speaker_confidence=segment.speaker_confidence, + ) + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=noteflow_pb2.UPDATE_TYPE_FINAL, + segment=final_segment, + server_timestamp=time.time(), + ) +def annotation_to_proto(annotation: Annotation) -> noteflow_pb2.Annotation: + return noteflow_pb2.Annotation( + id=str(annotation.id), + meeting_id=str(annotation.meeting_id), + annotation_type=annotation_type_to_proto(annotation.annotation_type), + text=annotation.text, + start_time=annotation.start_time, + end_time=annotation.end_time, + segment_ids=annotation.segment_ids, + created_at=annotation.created_at.timestamp(), + ) +def annotation_type_to_proto(annotation_type: AnnotationType) -> int: + mapping = { + AnnotationType.ACTION_ITEM: noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM, + AnnotationType.DECISION: noteflow_pb2.ANNOTATION_TYPE_DECISION, + AnnotationType.NOTE: noteflow_pb2.ANNOTATION_TYPE_NOTE, + AnnotationType.RISK: noteflow_pb2.ANNOTATION_TYPE_RISK, + } + return mapping.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED) +def proto_to_annotation_type(proto_type: int) -> AnnotationType: + mapping: dict[int, AnnotationType] = { + int(noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM): AnnotationType.ACTION_ITEM, + int(noteflow_pb2.ANNOTATION_TYPE_DECISION): AnnotationType.DECISION, + int(noteflow_pb2.ANNOTATION_TYPE_NOTE): AnnotationType.NOTE, + int(noteflow_pb2.ANNOTATION_TYPE_RISK): AnnotationType.RISK, + } + return mapping.get(proto_type, AnnotationType.NOTE) +def create_vad_update( + meeting_id: str, + update_type: int, +) -> noteflow_pb2.TranscriptUpdate: + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=update_type, + server_timestamp=time.time(), + ) +def create_segment_from_asr( + meeting_id: MeetingId, + segment_id: int, + result: AsrResult, + segment_start_time: float, +) -> Segment: + words = AsrConverter.result_to_domain_words(result) + if segment_start_time: + for word in words: + word.start_time += segment_start_time + word.end_time += segment_start_time + return Segment( + segment_id=segment_id, + text=result.text, + start_time=result.start + segment_start_time, + end_time=result.end + segment_start_time, + meeting_id=meeting_id, + words=words, + language=result.language, + language_confidence=result.language_probability, + avg_logprob=result.avg_logprob, + no_speech_prob=result.no_speech_prob, + ) +def proto_to_export_format(proto_format: int) -> ExportFormat: + if proto_format == noteflow_pb2.EXPORT_FORMAT_HTML: + return ExportFormat.HTML + return ExportFormat.MARKDOWN +```` + +## File: src/noteflow/grpc/_mixins/diarization.py +````python +from __future__ import annotations +import asyncio +import logging +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING +from uuid import UUID, uuid4 +import grpc.aio +import numpy as np +from numpy.typing import NDArray +from noteflow.domain.entities import Segment +from noteflow.domain.value_objects import MeetingId, MeetingState +from noteflow.infrastructure.audio.reader import MeetingAudioReader +from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker +from ..proto import noteflow_pb2 +if TYPE_CHECKING: + from .protocols import ServicerHost +logger = logging.getLogger(__name__) +@dataclass +class _DiarizationJob: + job_id: str + meeting_id: str + status: int + segments_updated: int = 0 + speaker_ids: list[str] = field(default_factory=list) + error_message: str = "" + created_at: float = field(default_factory=time.time) + updated_at: float = field(default_factory=time.time) + task: asyncio.Task[None] | None = None + def to_proto(self) -> noteflow_pb2.DiarizationJobStatus: + return noteflow_pb2.DiarizationJobStatus( + job_id=self.job_id, + status=self.status, + segments_updated=self.segments_updated, + speaker_ids=self.speaker_ids, + error_message=self.error_message, + ) +class DiarizationMixin: + DIARIZATION_JOB_TTL_SECONDS: float = 60 * 60 + def _process_streaming_diarization( + self: ServicerHost, + meeting_id: str, + audio: NDArray[np.float32], + ) -> None: + if self._diarization_engine is None: + return + if meeting_id in self._diarization_streaming_failed: + return + if audio.size == 0: + return + if not self._diarization_engine.is_streaming_loaded: + try: + self._diarization_engine.load_streaming_model() + except (RuntimeError, ValueError) as exc: + logger.warning( + "Streaming diarization disabled for meeting %s: %s", + meeting_id, + exc, + ) + self._diarization_streaming_failed.add(meeting_id) + return + stream_time = self._diarization_stream_time.get(meeting_id, 0.0) + duration = len(audio) / self.DEFAULT_SAMPLE_RATE + try: + turns = self._diarization_engine.process_chunk( + audio, + sample_rate=self.DEFAULT_SAMPLE_RATE, + ) + except Exception as exc: + logger.warning( + "Streaming diarization failed for meeting %s: %s", + meeting_id, + exc, + ) + self._diarization_streaming_failed.add(meeting_id) + return + diarization_turns = self._diarization_turns.setdefault(meeting_id, []) + for turn in turns: + diarization_turns.append( + SpeakerTurn( + speaker=turn.speaker, + start=turn.start + stream_time, + end=turn.end + stream_time, + confidence=turn.confidence, + ) + ) + self._diarization_stream_time[meeting_id] = stream_time + duration + def _maybe_assign_speaker( + self: ServicerHost, + meeting_id: str, + segment: Segment, + ) -> None: + if self._diarization_engine is None: + return + if meeting_id in self._diarization_streaming_failed: + return + turns = self._diarization_turns.get(meeting_id) + if not turns: + return + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + return + segment.speaker_id = speaker_id + segment.speaker_confidence = confidence + def _prune_diarization_jobs(self: ServicerHost) -> None: + if not self._diarization_jobs: + return + now = time.time() + terminal_statuses = { + noteflow_pb2.JOB_STATUS_COMPLETED, + noteflow_pb2.JOB_STATUS_FAILED, + } + expired = [ + job_id + for job_id, job in self._diarization_jobs.items() + if job.status in terminal_statuses + and now - job.updated_at > self.DIARIZATION_JOB_TTL_SECONDS + ] + for job_id in expired: + self._diarization_jobs.pop(job_id, None) + async def RefineSpeakerDiarization( + self: ServicerHost, + request: noteflow_pb2.RefineSpeakerDiarizationRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.RefineSpeakerDiarizationResponse: + self._prune_diarization_jobs() + if not self._diarization_refinement_enabled: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Diarization refinement disabled on server" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + if self._diarization_engine is None: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Diarization not enabled on server" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + try: + meeting_uuid = UUID(request.meeting_id) + except ValueError: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Invalid meeting_id" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(meeting_uuid)) + else: + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "Meeting not found" + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + meeting_state = meeting.state + if meeting_state in ( + MeetingState.UNSPECIFIED, + MeetingState.CREATED, + MeetingState.RECORDING, + MeetingState.STOPPING, + ): + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = ( + f"Meeting must be stopped before refinement (state: {meeting_state.name.lower()})" + ) + response.job_id = "" + response.status = noteflow_pb2.JOB_STATUS_FAILED + return response + num_speakers = request.num_speakers if request.num_speakers > 0 else None + job_id = str(uuid4()) + job = _DiarizationJob( + job_id=job_id, + meeting_id=request.meeting_id, + status=noteflow_pb2.JOB_STATUS_QUEUED, + ) + self._diarization_jobs[job_id] = job + task = asyncio.create_task(self._run_diarization_job(job_id, num_speakers)) + job.task = task + response = noteflow_pb2.RefineSpeakerDiarizationResponse() + response.segments_updated = 0 + response.speaker_ids[:] = [] + response.error_message = "" + response.job_id = job_id + response.status = noteflow_pb2.JOB_STATUS_QUEUED + return response + async def _run_diarization_job( + self: ServicerHost, + job_id: str, + num_speakers: int | None, + ) -> None: + job = self._diarization_jobs.get(job_id) + if job is None: + return + job.status = noteflow_pb2.JOB_STATUS_RUNNING + job.updated_at = time.time() + try: + updated_count = await self.refine_speaker_diarization( + meeting_id=job.meeting_id, + num_speakers=num_speakers, + ) + speaker_ids = await self._collect_speaker_ids(job.meeting_id) + job.segments_updated = updated_count + job.speaker_ids = speaker_ids + job.status = noteflow_pb2.JOB_STATUS_COMPLETED + except Exception as exc: + logger.exception("Diarization failed for meeting %s", job.meeting_id) + job.error_message = str(exc) + job.status = noteflow_pb2.JOB_STATUS_FAILED + finally: + job.updated_at = time.time() + async def refine_speaker_diarization( + self: ServicerHost, + meeting_id: str, + num_speakers: int | None = None, + ) -> int: + turns = await asyncio.to_thread( + self._run_diarization_inference, + meeting_id, + num_speakers, + ) + updated_count = await self._apply_diarization_turns(meeting_id, turns) + logger.info( + "Updated %d segments with speaker labels for meeting %s", + updated_count, + meeting_id, + ) + return updated_count + def _run_diarization_inference( + self: ServicerHost, + meeting_id: str, + num_speakers: int | None, + ) -> list[SpeakerTurn]: + if self._diarization_engine is None: + raise RuntimeError("Diarization engine not configured") + if not self._diarization_engine.is_offline_loaded: + logger.info("Loading offline diarization model for refinement...") + self._diarization_engine.load_offline_model() + audio_reader = MeetingAudioReader(self._crypto, self._meetings_dir) + if not audio_reader.audio_exists(meeting_id): + raise RuntimeError("No audio file found for meeting") + logger.info("Loading audio for meeting %s", meeting_id) + try: + audio_chunks = audio_reader.load_meeting_audio(meeting_id) + except (FileNotFoundError, ValueError) as exc: + raise RuntimeError(f"Failed to load audio: {exc}") from exc + if not audio_chunks: + raise RuntimeError("No audio chunks loaded for meeting") + sample_rate = audio_reader.sample_rate + all_audio = np.concatenate([chunk.frames for chunk in audio_chunks]) + logger.info( + "Running offline diarization on %.2f seconds of audio", + len(all_audio) / sample_rate, + ) + turns = self._diarization_engine.diarize_full( + all_audio, + sample_rate=sample_rate, + num_speakers=num_speakers, + ) + logger.info("Diarization found %d speaker turns", len(turns)) + return list(turns) + async def _apply_diarization_turns( + self: ServicerHost, + meeting_id: str, + turns: list[SpeakerTurn], + ) -> int: + updated_count = 0 + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) + for segment in segments: + if segment.db_id is None: + continue + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + continue + await uow.segments.update_speaker( + segment.db_id, + speaker_id, + confidence, + ) + updated_count += 1 + await uow.commit() + else: + store = self._get_memory_store() + if meeting := store.get(meeting_id): + for segment in meeting.segments: + speaker_id, confidence = assign_speaker( + segment.start_time, + segment.end_time, + turns, + ) + if speaker_id is None: + continue + segment.speaker_id = speaker_id + segment.speaker_confidence = confidence + updated_count += 1 + return updated_count + async def _collect_speaker_ids(self: ServicerHost, meeting_id: str) -> list[str]: + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) + return sorted({s.speaker_id for s in segments if s.speaker_id}) + store = self._get_memory_store() + if meeting := store.get(meeting_id): + return sorted({s.speaker_id for s in meeting.segments if s.speaker_id}) + return [] + async def RenameSpeaker( + self: ServicerHost, + request: noteflow_pb2.RenameSpeakerRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.RenameSpeakerResponse: + if not request.old_speaker_id or not request.new_speaker_name: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "old_speaker_id and new_speaker_name are required", + ) + try: + meeting_uuid = UUID(request.meeting_id) + except ValueError: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "Invalid meeting_id", + ) + updated_count = 0 + if self._use_database(): + async with self._create_uow() as uow: + segments = await uow.segments.get_by_meeting(MeetingId(meeting_uuid)) + for segment in segments: + if segment.speaker_id == request.old_speaker_id and segment.db_id: + await uow.segments.update_speaker( + segment.db_id, + request.new_speaker_name, + segment.speaker_confidence, + ) + updated_count += 1 + await uow.commit() + else: + store = self._get_memory_store() + if meeting := store.get(request.meeting_id): + for segment in meeting.segments: + if segment.speaker_id == request.old_speaker_id: + segment.speaker_id = request.new_speaker_name + updated_count += 1 + return noteflow_pb2.RenameSpeakerResponse( + segments_updated=updated_count, + success=updated_count > 0, + ) + async def GetDiarizationJobStatus( + self: ServicerHost, + request: noteflow_pb2.GetDiarizationJobStatusRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DiarizationJobStatus: + self._prune_diarization_jobs() + job = self._diarization_jobs.get(request.job_id) + if job is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + "Diarization job not found", + ) + return job.to_proto() +```` + +## File: src/noteflow/grpc/_mixins/export.py +````python +from __future__ import annotations +from typing import TYPE_CHECKING +from uuid import UUID +import grpc.aio +from noteflow.application.services.export_service import ExportFormat, ExportService +from noteflow.domain.value_objects import MeetingId +from ..proto import noteflow_pb2 +from .converters import proto_to_export_format +if TYPE_CHECKING: + from .protocols import ServicerHost +class ExportMixin: + async def ExportTranscript( + self: ServicerHost, + request: noteflow_pb2.ExportTranscriptRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ExportTranscriptResponse: + if not self._use_database(): + await context.abort( + grpc.StatusCode.UNIMPLEMENTED, + "Export requires database persistence", + ) + fmt = proto_to_export_format(request.format) + export_service = ExportService(self._create_uow()) + try: + content = await export_service.export_transcript( + MeetingId(UUID(request.meeting_id)), + fmt, + ) + exporter_info = export_service.get_supported_formats() + fmt_name = "" + fmt_ext = "" + for name, ext in exporter_info: + if fmt == ExportFormat.MARKDOWN and ext == ".md": + fmt_name, fmt_ext = name, ext + break + if fmt == ExportFormat.HTML and ext == ".html": + fmt_name, fmt_ext = name, ext + break + return noteflow_pb2.ExportTranscriptResponse( + content=content, + format_name=fmt_name, + file_extension=fmt_ext, + ) + except ValueError as e: + await context.abort( + grpc.StatusCode.NOT_FOUND, + str(e), + ) +```` + +## File: src/noteflow/grpc/_mixins/meeting.py +````python +from __future__ import annotations +from typing import TYPE_CHECKING +from uuid import UUID +import grpc.aio +from noteflow.domain.entities import Meeting +from noteflow.domain.value_objects import MeetingId, MeetingState +from ..proto import noteflow_pb2 +from .converters import meeting_to_proto +if TYPE_CHECKING: + from .protocols import ServicerHost +class MeetingMixin: + async def CreateMeeting( + self: ServicerHost, + request: noteflow_pb2.CreateMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + metadata = dict(request.metadata) if request.metadata else {} + if self._use_database(): + async with self._create_uow() as uow: + meeting = Meeting.create(title=request.title, metadata=metadata) + saved = await uow.meetings.create(meeting) + await uow.commit() + return meeting_to_proto(saved) + else: + store = self._get_memory_store() + meeting = store.create(title=request.title, metadata=metadata) + return meeting_to_proto(meeting) + async def StopMeeting( + self: ServicerHost, + request: noteflow_pb2.StopMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + meeting_id = request.meeting_id + if meeting_id in self._audio_writers: + self._close_audio_writer(meeting_id) + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {meeting_id} not found", + ) + try: + meeting.begin_stopping() + meeting.stop_recording() + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + await uow.meetings.update(meeting) + await uow.commit() + return meeting_to_proto(meeting) + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {meeting_id} not found", + ) + try: + meeting.begin_stopping() + meeting.stop_recording() + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + store.update(meeting) + return meeting_to_proto(meeting) + async def ListMeetings( + self: ServicerHost, + request: noteflow_pb2.ListMeetingsRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.ListMeetingsResponse: + limit = request.limit or 100 + offset = request.offset or 0 + sort_desc = request.sort_order != noteflow_pb2.SORT_ORDER_CREATED_ASC + if self._use_database(): + states = [MeetingState(s) for s in request.states] if request.states else None + async with self._create_uow() as uow: + meetings, total = await uow.meetings.list_all( + states=states, + limit=limit, + offset=offset, + sort_desc=sort_desc, + ) + return noteflow_pb2.ListMeetingsResponse( + meetings=[meeting_to_proto(m, include_segments=False) for m in meetings], + total_count=total, + ) + else: + store = self._get_memory_store() + states = [MeetingState(s) for s in request.states] if request.states else None + meetings, total = store.list_all( + states=states, + limit=limit, + offset=offset, + sort_desc=sort_desc, + ) + return noteflow_pb2.ListMeetingsResponse( + meetings=[meeting_to_proto(m, include_segments=False) for m in meetings], + total_count=total, + ) + async def GetMeeting( + self: ServicerHost, + request: noteflow_pb2.GetMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Meeting: + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(request.meeting_id))) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + if request.include_segments: + segments = await uow.segments.get_by_meeting(meeting.id) + meeting.segments = list(segments) + if request.include_summary: + summary = await uow.summaries.get_by_meeting(meeting.id) + meeting.summary = summary + return meeting_to_proto( + meeting, + include_segments=request.include_segments, + include_summary=request.include_summary, + ) + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + return meeting_to_proto( + meeting, + include_segments=request.include_segments, + include_summary=request.include_summary, + ) + async def DeleteMeeting( + self: ServicerHost, + request: noteflow_pb2.DeleteMeetingRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.DeleteMeetingResponse: + if self._use_database(): + async with self._create_uow() as uow: + success = await uow.meetings.delete(MeetingId(UUID(request.meeting_id))) + if success: + await uow.commit() + return noteflow_pb2.DeleteMeetingResponse(success=True) + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + store = self._get_memory_store() + success = store.delete(request.meeting_id) + if not success: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + return noteflow_pb2.DeleteMeetingResponse(success=True) +```` + +## File: src/noteflow/grpc/_mixins/protocols.py +````python +from __future__ import annotations +from pathlib import Path +from typing import TYPE_CHECKING, Protocol +import numpy as np +from numpy.typing import NDArray +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + from noteflow.domain.entities import Meeting + from noteflow.infrastructure.asr import FasterWhisperEngine, Segmenter, StreamingVad + from noteflow.infrastructure.audio.writer import MeetingAudioWriter + from noteflow.infrastructure.diarization import DiarizationEngine, SpeakerTurn + from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork + from noteflow.infrastructure.security.crypto import AesGcmCryptoBox + from ..meeting_store import MeetingStore +class ServicerHost(Protocol): + _session_factory: async_sessionmaker[AsyncSession] | None + _memory_store: MeetingStore | None + _meetings_dir: Path + _crypto: AesGcmCryptoBox + _asr_engine: FasterWhisperEngine | None + _diarization_engine: DiarizationEngine | None + _summarization_service: object | None + _diarization_refinement_enabled: bool + _audio_writers: dict[str, MeetingAudioWriter] + _audio_write_failed: set[str] + _vad_instances: dict[str, StreamingVad] + _segmenters: dict[str, Segmenter] + _was_speaking: dict[str, bool] + _segment_counters: dict[str, int] + _stream_formats: dict[str, tuple[int, int]] + _active_streams: set[str] + _partial_buffers: dict[str, list[NDArray[np.float32]]] + _last_partial_time: dict[str, float] + _last_partial_text: dict[str, str] + _diarization_turns: dict[str, list[SpeakerTurn]] + _diarization_stream_time: dict[str, float] + _diarization_streaming_failed: set[str] + DEFAULT_SAMPLE_RATE: int + SUPPORTED_SAMPLE_RATES: list[int] + PARTIAL_CADENCE_SECONDS: float + MIN_PARTIAL_AUDIO_SECONDS: float + def _use_database(self) -> bool: + ... + def _get_memory_store(self) -> MeetingStore: + ... + def _create_uow(self) -> SqlAlchemyUnitOfWork: + ... + def _next_segment_id(self, meeting_id: str, fallback: int = 0) -> int: + ... + def _init_streaming_state(self, meeting_id: str, next_segment_id: int) -> None: + ... + def _cleanup_streaming_state(self, meeting_id: str) -> None: + ... + def _ensure_meeting_dek(self, meeting: Meeting) -> tuple[bytes, bytes, bool]: + ... + def _start_meeting_if_needed(self, meeting: Meeting) -> tuple[bool, str | None]: + ... + def _open_meeting_audio_writer( + self, + meeting_id: str, + dek: bytes, + wrapped_dek: bytes, + ) -> None: + ... + def _close_audio_writer(self, meeting_id: str) -> None: + ... +```` + +## File: src/noteflow/grpc/_mixins/streaming.py +````python +from __future__ import annotations +import logging +import struct +import time +from collections.abc import AsyncIterator +from dataclasses import dataclass +from typing import TYPE_CHECKING +from uuid import UUID +import grpc.aio +import numpy as np +from numpy.typing import NDArray +from noteflow.domain.value_objects import MeetingId +from ..proto import noteflow_pb2 +from .converters import create_segment_from_asr, create_vad_update, segment_to_proto_update +if TYPE_CHECKING: + from .protocols import ServicerHost +logger = logging.getLogger(__name__) +@dataclass +class _StreamSessionInit: + next_segment_id: int + error_code: int | None = None + error_message: str | None = None + @property + def success(self) -> bool: + return self.error_code is None +class StreamingMixin: + async def StreamTranscription( + self: ServicerHost, + request_iterator: AsyncIterator[noteflow_pb2.AudioChunk], + context: grpc.aio.ServicerContext, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + if self._asr_engine is None or not self._asr_engine.is_loaded: + await context.abort( + grpc.StatusCode.FAILED_PRECONDITION, + "ASR engine not loaded", + ) + current_meeting_id: str | None = None + try: + async for chunk in request_iterator: + meeting_id = chunk.meeting_id + if not meeting_id: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "meeting_id required", + ) + if current_meeting_id is None: + init_result = await self._init_stream_for_meeting(meeting_id, context) + if init_result is None: + return + current_meeting_id = meeting_id + elif meeting_id != current_meeting_id: + await context.abort( + grpc.StatusCode.INVALID_ARGUMENT, + "Stream may only contain a single meeting_id", + ) + async for update in self._process_stream_chunk(current_meeting_id, chunk, context): + yield update + if current_meeting_id and current_meeting_id in self._segmenters: + async for update in self._flush_segmenter(current_meeting_id): + yield update + finally: + if current_meeting_id: + self._cleanup_streaming_state(current_meeting_id) + self._close_audio_writer(current_meeting_id) + self._active_streams.discard(current_meeting_id) + async def _init_stream_for_meeting( + self: ServicerHost, + meeting_id: str, + context: grpc.aio.ServicerContext, + ) -> _StreamSessionInit | None: + if meeting_id in self._active_streams: + await context.abort( + grpc.StatusCode.FAILED_PRECONDITION, + f"Meeting {meeting_id} already streaming", + ) + self._active_streams.add(meeting_id) + if self._use_database(): + init_result = await self._init_stream_session_db(meeting_id) + else: + init_result = self._init_stream_session_memory(meeting_id) + if not init_result.success: + self._active_streams.discard(meeting_id) + await context.abort(init_result.error_code, init_result.error_message or "") + return init_result + async def _init_stream_session_db( + self: ServicerHost, + meeting_id: str, + ) -> _StreamSessionInit: + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.NOT_FOUND, + error_message=f"Meeting {meeting_id} not found", + ) + dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) + recording_updated, error_msg = self._start_meeting_if_needed(meeting) + if error_msg: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.INVALID_ARGUMENT, + error_message=error_msg, + ) + if dek_updated or recording_updated: + await uow.meetings.update(meeting) + await uow.commit() + next_segment_id = await uow.segments.get_next_segment_id(meeting.id) + self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) + self._init_streaming_state(meeting_id, next_segment_id) + return _StreamSessionInit(next_segment_id=next_segment_id) + def _init_stream_session_memory( + self: ServicerHost, + meeting_id: str, + ) -> _StreamSessionInit: + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.NOT_FOUND, + error_message=f"Meeting {meeting_id} not found", + ) + dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) + recording_updated, error_msg = self._start_meeting_if_needed(meeting) + if error_msg: + return _StreamSessionInit( + next_segment_id=0, + error_code=grpc.StatusCode.INVALID_ARGUMENT, + error_message=error_msg, + ) + if dek_updated or recording_updated: + store.update(meeting) + next_segment_id = meeting.next_segment_id + self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) + self._init_streaming_state(meeting_id, next_segment_id) + return _StreamSessionInit(next_segment_id=next_segment_id) + async def _process_stream_chunk( + self: ServicerHost, + meeting_id: str, + chunk: noteflow_pb2.AudioChunk, + context: grpc.aio.ServicerContext, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + try: + sample_rate, channels = self._normalize_stream_format( + meeting_id, + chunk.sample_rate, + chunk.channels, + ) + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + audio = self._decode_audio_chunk(chunk) + if audio is None: + return + try: + audio = self._convert_audio_format(audio, sample_rate, channels) + except ValueError as e: + await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) + self._write_audio_chunk_safe(meeting_id, audio) + async for update in self._process_audio_with_vad(meeting_id, audio): + yield update + def _normalize_stream_format( + self: ServicerHost, + meeting_id: str, + sample_rate: int, + channels: int, + ) -> tuple[int, int]: + normalized_rate = sample_rate or self.DEFAULT_SAMPLE_RATE + normalized_channels = channels or 1 + if normalized_rate not in self.SUPPORTED_SAMPLE_RATES: + raise ValueError( + "Unsupported sample_rate " + f"{normalized_rate}; supported: {self.SUPPORTED_SAMPLE_RATES}" + ) + if normalized_channels < 1: + raise ValueError("channels must be >= 1") + existing = self._stream_formats.get(meeting_id) + if existing and existing != (normalized_rate, normalized_channels): + raise ValueError("Stream audio format cannot change mid-stream") + self._stream_formats.setdefault(meeting_id, (normalized_rate, normalized_channels)) + return normalized_rate, normalized_channels + def _convert_audio_format( + self: ServicerHost, + audio: NDArray[np.float32], + sample_rate: int, + channels: int, + ) -> NDArray[np.float32]: + if channels > 1: + if audio.size % channels != 0: + raise ValueError("Audio buffer size is not divisible by channel count") + audio = audio.reshape(-1, channels).mean(axis=1) + if sample_rate != self.DEFAULT_SAMPLE_RATE: + audio = self._resample_audio(audio, sample_rate, self.DEFAULT_SAMPLE_RATE) + return audio + @staticmethod + def _resample_audio( + audio: NDArray[np.float32], + src_rate: int, + dst_rate: int, + ) -> NDArray[np.float32]: + if src_rate == dst_rate or audio.size == 0: + return audio + ratio = dst_rate / src_rate + new_length = round(audio.shape[0] * ratio) + if new_length <= 0: + return np.array([], dtype=np.float32) + old_indices = np.arange(audio.shape[0]) + new_indices = np.arange(new_length) / ratio + return np.interp(new_indices, old_indices, audio).astype(np.float32) + def _decode_audio_chunk( + self: ServicerHost, + chunk: noteflow_pb2.AudioChunk, + ) -> NDArray[np.float32] | None: + if not chunk.audio_data: + return None + try: + return np.frombuffer(chunk.audio_data, dtype=np.float32) + except (ValueError, struct.error) as e: + logger.warning("Failed to decode audio chunk: %s", e) + return None + def _write_audio_chunk_safe( + self: ServicerHost, + meeting_id: str, + audio: NDArray[np.float32], + ) -> None: + if meeting_id not in self._audio_writers: + return + if meeting_id in self._audio_write_failed: + return + try: + self._audio_writers[meeting_id].write_chunk(audio) + except Exception as e: + logger.error( + "Audio write failed for meeting %s: %s. Recording may be incomplete.", + meeting_id, + e, + ) + self._audio_write_failed.add(meeting_id) + async def _process_audio_with_vad( + self: ServicerHost, + meeting_id: str, + audio: NDArray[np.float32], + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + vad = self._vad_instances.get(meeting_id) + segmenter = self._segmenters.get(meeting_id) + if vad is None or segmenter is None: + return + is_speech = vad.process_chunk(audio) + if hasattr(self, "_process_streaming_diarization"): + self._process_streaming_diarization(meeting_id, audio) + was_speaking = self._was_speaking.get(meeting_id, False) + if is_speech and not was_speaking: + yield create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_START) + self._was_speaking[meeting_id] = True + elif not is_speech and was_speaking: + yield create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_END) + self._was_speaking[meeting_id] = False + if is_speech: + if meeting_id in self._partial_buffers: + self._partial_buffers[meeting_id].append(audio.copy()) + partial_update = await self._maybe_emit_partial(meeting_id) + if partial_update is not None: + yield partial_update + for audio_segment in segmenter.process_audio(audio, is_speech): + self._clear_partial_buffer(meeting_id) + async for update in self._process_audio_segment( + meeting_id, + audio_segment.audio, + audio_segment.start_time, + ): + yield update + async def _maybe_emit_partial( + self: ServicerHost, + meeting_id: str, + ) -> noteflow_pb2.TranscriptUpdate | None: + if self._asr_engine is None or not self._asr_engine.is_loaded: + return None + last_time = self._last_partial_time.get(meeting_id, 0) + now = time.time() + if now - last_time < self.PARTIAL_CADENCE_SECONDS: + return None + buffer = self._partial_buffers.get(meeting_id, []) + if not buffer: + return None + combined = np.concatenate(buffer) + audio_seconds = len(combined) / self.DEFAULT_SAMPLE_RATE + if audio_seconds < self.MIN_PARTIAL_AUDIO_SECONDS: + return None + results = await self._asr_engine.transcribe_async(combined) + partial_text = " ".join(result.text for result in results) + self._partial_buffers[meeting_id] = [] + last_text = self._last_partial_text.get(meeting_id, "") + if partial_text and partial_text != last_text: + self._last_partial_time[meeting_id] = now + self._last_partial_text[meeting_id] = partial_text + return noteflow_pb2.TranscriptUpdate( + meeting_id=meeting_id, + update_type=noteflow_pb2.UPDATE_TYPE_PARTIAL, + partial_text=partial_text, + server_timestamp=now, + ) + self._last_partial_time[meeting_id] = now + return None + def _clear_partial_buffer(self: ServicerHost, meeting_id: str) -> None: + if meeting_id in self._partial_buffers: + self._partial_buffers[meeting_id] = [] + if meeting_id in self._last_partial_text: + self._last_partial_text[meeting_id] = "" + if meeting_id in self._last_partial_time: + self._last_partial_time[meeting_id] = time.time() + async def _flush_segmenter( + self: ServicerHost, + meeting_id: str, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + segmenter = self._segmenters.get(meeting_id) + if segmenter is None: + return + self._clear_partial_buffer(meeting_id) + final_segment = segmenter.flush() + if final_segment is not None: + async for update in self._process_audio_segment( + meeting_id, + final_segment.audio, + final_segment.start_time, + ): + yield update + async def _process_audio_segment( + self: ServicerHost, + meeting_id: str, + audio: NDArray[np.float32], + segment_start_time: float, + ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: + if len(audio) == 0 or self._asr_engine is None: + return + if self._use_database(): + async with self._create_uow() as uow: + meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) + if meeting is None: + return + results = await self._asr_engine.transcribe_async(audio) + for result in results: + segment_id = self._next_segment_id( + meeting_id, + fallback=meeting.next_segment_id, + ) + segment = create_segment_from_asr( + meeting.id, + segment_id, + result, + segment_start_time, + ) + if hasattr(self, "_maybe_assign_speaker"): + self._maybe_assign_speaker(meeting_id, segment) + meeting.add_segment(segment) + await uow.segments.add(meeting.id, segment) + await uow.commit() + yield segment_to_proto_update(meeting_id, segment) + else: + store = self._get_memory_store() + meeting = store.get(meeting_id) + if meeting is None: + return + results = await self._asr_engine.transcribe_async(audio) + for result in results: + segment_id = self._next_segment_id( + meeting_id, + fallback=meeting.next_segment_id, + ) + segment = create_segment_from_asr( + meeting.id, + segment_id, + result, + segment_start_time, + ) + if hasattr(self, "_maybe_assign_speaker"): + self._maybe_assign_speaker(meeting_id, segment) + store.add_segment(meeting_id, segment) + yield segment_to_proto_update(meeting_id, segment) +```` + +## File: src/noteflow/grpc/_mixins/summarization.py +````python +from __future__ import annotations +import logging +from typing import TYPE_CHECKING +from uuid import UUID +import grpc.aio +from noteflow.domain.entities import Segment, Summary +from noteflow.domain.summarization import ProviderUnavailableError +from noteflow.domain.value_objects import MeetingId +from ..proto import noteflow_pb2 +from .converters import summary_to_proto +if TYPE_CHECKING: + from noteflow.application.services.summarization_service import SummarizationService + from .protocols import ServicerHost +logger = logging.getLogger(__name__) +class SummarizationMixin: + _summarization_service: SummarizationService | None + async def GenerateSummary( + self: ServicerHost, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + if self._use_database(): + return await self._generate_summary_db(request, context) + return await self._generate_summary_memory(request, context) + async def _generate_summary_db( + self: ServicerHost, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + meeting_id = MeetingId(UUID(request.meeting_id)) + async with self._create_uow() as uow: + meeting = await uow.meetings.get(meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + existing = await uow.summaries.get_by_meeting(meeting.id) + if existing and not request.force_regenerate: + return summary_to_proto(existing) + segments = list(await uow.segments.get_by_meeting(meeting.id)) + summary = await self._summarize_or_placeholder(meeting_id, segments) + async with self._create_uow() as uow: + saved = await uow.summaries.save(summary) + await uow.commit() + return summary_to_proto(saved) + async def _generate_summary_memory( + self: ServicerHost, + request: noteflow_pb2.GenerateSummaryRequest, + context: grpc.aio.ServicerContext, + ) -> noteflow_pb2.Summary: + store = self._get_memory_store() + meeting = store.get(request.meeting_id) + if meeting is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + f"Meeting {request.meeting_id} not found", + ) + if meeting.summary and not request.force_regenerate: + return summary_to_proto(meeting.summary) + summary = await self._summarize_or_placeholder(meeting.id, meeting.segments) + store.set_summary(request.meeting_id, summary) + return summary_to_proto(summary) + async def _summarize_or_placeholder( + self: ServicerHost, + meeting_id: MeetingId, + segments: list[Segment], + ) -> Summary: + if self._summarization_service is None: + logger.warning("SummarizationService not configured; using placeholder summary") + return self._generate_placeholder_summary(meeting_id, segments) + try: + result = await self._summarization_service.summarize( + meeting_id=meeting_id, + segments=segments, + ) + logger.info( + "Generated summary using %s (fallback=%s)", + result.provider_used, + result.fallback_used, + ) + return result.summary + except ProviderUnavailableError as exc: + logger.warning("Summarization provider unavailable; using placeholder: %s", exc) + except (TimeoutError, RuntimeError, ValueError) as exc: + logger.exception( + "Summarization failed (%s); using placeholder summary", type(exc).__name__ + ) + return self._generate_placeholder_summary(meeting_id, segments) + def _generate_placeholder_summary( + self: ServicerHost, + meeting_id: MeetingId, + segments: list[Segment], + ) -> Summary: + full_text = " ".join(s.text for s in segments) + executive = f"{full_text[:200]}..." if len(full_text) > 200 else full_text + executive = executive or "No transcript available." + return Summary( + meeting_id=meeting_id, + executive_summary=executive, + key_points=[], + action_items=[], + model_version="placeholder-v0", + ) +```` + ## File: src/noteflow/grpc/proto/__init__.py ````python -"""Generated protobuf and gRPC code.""" -```` -## File: src/noteflow/grpc/__init__.py -````python -"""NoteFlow gRPC server and client components.""" - -from noteflow.domain.value_objects import MeetingState - -from .client import ( - AnnotationInfo, - DiarizationResult, - ExportResult, - MeetingInfo, - NoteFlowClient, - RenameSpeakerResult, - ServerInfo, - TranscriptSegment, -) -from .meeting_store import MeetingStore -from .service import NoteFlowServicer - -__all__ = [ - "AnnotationInfo", - "DiarizationResult", - "ExportResult", - "MeetingInfo", - "MeetingState", - "MeetingStore", - "NoteFlowClient", - "NoteFlowServicer", - "RenameSpeakerResult", - "ServerInfo", - "TranscriptSegment", -] ```` ## File: src/noteflow/infrastructure/asr/__init__.py ````python -"""ASR infrastructure module. - -Provides speech-to-text transcription using faster-whisper. -""" - from noteflow.infrastructure.asr.dto import ( AsrResult, PartialUpdate, @@ -4040,7 +2664,6 @@ from noteflow.infrastructure.asr.streaming_vad import ( StreamingVad, VadEngine, ) - __all__ = [ "AsrEngine", "AsrResult", @@ -4062,1369 +2685,168 @@ __all__ = [ ## File: src/noteflow/infrastructure/asr/dto.py ````python -"""Data Transfer Objects for ASR. - -These DTOs define the data structures used by ASR components. -""" - from __future__ import annotations - from dataclasses import dataclass, field from enum import Enum - - @dataclass(frozen=True) class WordTiming: - """Word-level timing information.""" - word: str - start: float # Start time in seconds - end: float # End time in seconds - probability: float # Confidence (0.0-1.0) - + start: float + end: float + probability: float def __post_init__(self) -> None: - """Validate timing data.""" if self.end < self.start: raise ValueError(f"Word end ({self.end}) < start ({self.start})") if not 0.0 <= self.probability <= 1.0: raise ValueError(f"Probability must be 0.0-1.0, got {self.probability}") - - @dataclass(frozen=True) class AsrResult: - """ASR transcription result for a segment.""" - text: str - start: float # Start time in seconds - end: float # End time in seconds + start: float + end: float words: tuple[WordTiming, ...] = field(default_factory=tuple) language: str = "en" language_probability: float = 1.0 avg_logprob: float = 0.0 no_speech_prob: float = 0.0 - def __post_init__(self) -> None: - """Validate result data.""" if self.end < self.start: raise ValueError(f"Segment end ({self.end}) < start ({self.start})") - @property def duration(self) -> float: - """Duration of the segment in seconds.""" return self.end - self.start - - @dataclass class PartialUpdate: - """Unstable partial transcript (may be replaced).""" - text: str start: float end: float - def __post_init__(self) -> None: - """Validate partial data.""" if self.end < self.start: raise ValueError(f"Partial end ({self.end}) < start ({self.start})") - - class VadEventType(Enum): - """Voice Activity Detection event types.""" - SPEECH_START = "speech_start" SPEECH_END = "speech_end" - - @dataclass(frozen=True) class VadEvent: - """Voice Activity Detection event. - - Represents a speech/silence transition detected by VAD. - """ - event_type: VadEventType - timestamp: float # Seconds from stream start - confidence: float = 1.0 # Detection confidence (0.0-1.0) - + timestamp: float + confidence: float = 1.0 def __post_init__(self) -> None: - """Validate event data.""" if self.timestamp < 0: raise ValueError(f"Timestamp must be non-negative, got {self.timestamp}") if not 0.0 <= self.confidence <= 1.0: raise ValueError(f"Confidence must be 0.0-1.0, got {self.confidence}") ```` -## File: src/noteflow/infrastructure/asr/engine.py -````python -"""ASR engine implementation using faster-whisper. - -Provides Whisper-based transcription with word-level timestamps. -""" - -from __future__ import annotations - -import asyncio -import logging -from collections.abc import Iterator -from functools import partial -from typing import TYPE_CHECKING, Final - -if TYPE_CHECKING: - import numpy as np - from numpy.typing import NDArray - -from noteflow.infrastructure.asr.dto import AsrResult, WordTiming - -logger = logging.getLogger(__name__) - -# Available model sizes -VALID_MODEL_SIZES: Final[tuple[str, ...]] = ( - "tiny", - "tiny.en", - "base", - "base.en", - "small", - "small.en", - "medium", - "medium.en", - "large-v1", - "large-v2", - "large-v3", -) - - -class FasterWhisperEngine: - """faster-whisper based ASR engine. - - Uses CTranslate2 for efficient Whisper inference on CPU or GPU. - """ - - def __init__( - self, - compute_type: str = "int8", - device: str = "cpu", - num_workers: int = 1, - ) -> None: - """Initialize the engine. - - Args: - compute_type: Computation type ("int8", "float16", "float32"). - device: Device to use ("cpu" or "cuda"). - num_workers: Number of worker threads. - """ - self._compute_type = compute_type - self._device = device - self._num_workers = num_workers - self._model = None - self._model_size: str | None = None - - def load_model(self, model_size: str = "base") -> None: - """Load the ASR model. - - Args: - model_size: Model size (e.g., "tiny", "base", "small"). - - Raises: - ValueError: If model_size is invalid. - RuntimeError: If model loading fails. - """ - from faster_whisper import WhisperModel - - if model_size not in VALID_MODEL_SIZES: - raise ValueError( - f"Invalid model size: {model_size}. Valid sizes: {', '.join(VALID_MODEL_SIZES)}" - ) - - logger.info( - "Loading Whisper model '%s' on %s with %s compute...", - model_size, - self._device, - self._compute_type, - ) - - try: - self._model = WhisperModel( - model_size, - device=self._device, - compute_type=self._compute_type, - num_workers=self._num_workers, - ) - self._model_size = model_size - logger.info("Model loaded successfully") - except Exception as e: - raise RuntimeError(f"Failed to load model: {e}") from e - - def transcribe( - self, - audio: NDArray[np.float32], - language: str | None = None, - ) -> Iterator[AsrResult]: - """Transcribe audio and yield results. - - Args: - audio: Audio samples as float32 array (16kHz mono, normalized). - language: Optional language code (e.g., "en"). - - Yields: - AsrResult segments with word-level timestamps. - """ - if self._model is None: - raise RuntimeError("Model not loaded. Call load_model() first.") - - # Transcribe with word timestamps - segments, info = self._model.transcribe( - audio, - language=language, - word_timestamps=True, - beam_size=5, - vad_filter=True, # Filter out non-speech - ) - - logger.debug( - "Detected language: %s (prob: %.2f)", - info.language, - info.language_probability, - ) - - for segment in segments: - # Convert word info to WordTiming objects - words: list[WordTiming] = [] - if segment.words: - words = [ - WordTiming( - word=word.word, - start=word.start, - end=word.end, - probability=word.probability, - ) - for word in segment.words - ] - - yield AsrResult( - text=segment.text.strip(), - start=segment.start, - end=segment.end, - words=tuple(words), - language=info.language, - language_probability=info.language_probability, - avg_logprob=segment.avg_logprob, - no_speech_prob=segment.no_speech_prob, - ) - - async def transcribe_async( - self, - audio: NDArray[np.float32], - language: str | None = None, - ) -> list[AsrResult]: - """Transcribe audio asynchronously using executor. - - Offloads blocking transcription to a thread pool executor to avoid - blocking the asyncio event loop. - - Args: - audio: Audio samples as float32 array (16kHz mono, normalized). - language: Optional language code (e.g., "en"). - - Returns: - List of AsrResult segments with word-level timestamps. - """ - loop = asyncio.get_running_loop() - return await loop.run_in_executor( - None, - partial(lambda a, lang: list(self.transcribe(a, lang)), audio, language), - ) - - @property - def is_loaded(self) -> bool: - """Return True if model is loaded.""" - return self._model is not None - - @property - def model_size(self) -> str | None: - """Return the loaded model size, or None if not loaded.""" - return self._model_size - - def unload(self) -> None: - """Unload the model to free memory.""" - self._model = None - self._model_size = None - logger.info("Model unloaded") - - @property - def compute_type(self) -> str: - """Return the compute type.""" - return self._compute_type - - @property - def device(self) -> str: - """Return the device.""" - return self._device -```` - ## File: src/noteflow/infrastructure/asr/protocols.py ````python -"""ASR protocols defining contracts for ASR components.""" - from __future__ import annotations - from collections.abc import Iterator from typing import TYPE_CHECKING, Protocol - if TYPE_CHECKING: import numpy as np from numpy.typing import NDArray - from noteflow.infrastructure.asr.dto import AsrResult - - class AsrEngine(Protocol): - """Protocol for ASR transcription engine. - - Implementations should handle model loading, caching, and inference. - """ - def load_model(self, model_size: str = "base") -> None: - """Load the ASR model. - - Downloads the model if not cached. - - Args: - model_size: Model size ("tiny", "base", "small", "medium", "large"). - - Raises: - ValueError: If model_size is invalid. - RuntimeError: If model loading fails. - """ ... - def transcribe( self, audio: NDArray[np.float32], language: str | None = None, ) -> Iterator[AsrResult]: - """Transcribe audio and yield results. - - Args: - audio: Audio samples as float32 array (16kHz mono, normalized). - language: Optional language code (e.g., "en"). Auto-detected if None. - - Yields: - AsrResult segments. - - Raises: - RuntimeError: If model not loaded. - """ ... - @property def is_loaded(self) -> bool: - """Return True if model is loaded.""" ... - @property def model_size(self) -> str | None: - """Return the loaded model size, or None if not loaded.""" ... - def unload(self) -> None: - """Unload the model to free memory.""" ... ```` -## File: src/noteflow/infrastructure/audio/capture.py -````python -"""Audio capture implementation using sounddevice. - -Provide cross-platform audio input capture with device handling. -""" - -from __future__ import annotations - -import logging -import time -from typing import TYPE_CHECKING - -import numpy as np -import sounddevice as sd - -from noteflow.infrastructure.audio.dto import AudioDeviceInfo, AudioFrameCallback - -if TYPE_CHECKING: - from numpy.typing import NDArray - -logger = logging.getLogger(__name__) - - -class SoundDeviceCapture: - """sounddevice-based implementation of AudioCapture. - - Handle device enumeration, stream management, and device change detection. - Use PortAudio under the hood for cross-platform audio capture. - """ - - def __init__(self) -> None: - """Initialize the capture instance.""" - self._stream: sd.InputStream | None = None - self._callback: AudioFrameCallback | None = None - self._device_id: int | None = None - self._sample_rate: int = 16000 - self._channels: int = 1 - - def list_devices(self) -> list[AudioDeviceInfo]: - """List available audio input devices. - - Returns: - List of AudioDeviceInfo for all available input devices. - """ - devices: list[AudioDeviceInfo] = [] - device_list = sd.query_devices() - - # Get default input device index - try: - default_input = sd.default.device[0] # Input device index - except (TypeError, IndexError): - default_input = -1 - - devices.extend( - AudioDeviceInfo( - device_id=idx, - name=dev["name"], - channels=int(dev["max_input_channels"]), - sample_rate=int(dev["default_samplerate"]), - is_default=(idx == default_input), - ) - for idx, dev in enumerate(device_list) - if int(dev["max_input_channels"]) > 0 - ) - return devices - - def get_default_device(self) -> AudioDeviceInfo | None: - """Get the default input device. - - Returns: - Default input device info, or None if no input devices available. - """ - devices = self.list_devices() - for dev in devices: - if dev.is_default: - return dev - return devices[0] if devices else None - - def start( - self, - device_id: int | None, - on_frames: AudioFrameCallback, - sample_rate: int = 16000, - channels: int = 1, - chunk_duration_ms: int = 100, - ) -> None: - """Start capturing audio from the specified device. - - Args: - device_id: Device ID to capture from, or None for default device. - on_frames: Callback receiving (frames, timestamp) for each chunk. - sample_rate: Sample rate in Hz (default 16kHz for ASR). - channels: Number of channels (default 1 for mono). - chunk_duration_ms: Duration of each audio chunk in milliseconds. - - Raises: - RuntimeError: If already capturing. - ValueError: If device_id is invalid. - """ - if self._stream is not None: - raise RuntimeError("Already capturing audio") - - self._callback = on_frames - self._device_id = device_id - self._sample_rate = sample_rate - self._channels = channels - - # Calculate block size from chunk duration - blocksize = int(sample_rate * chunk_duration_ms / 1000) - - def _stream_callback( - indata: NDArray[np.float32], - frames: int, - time_info: object, # cffi CData from sounddevice, unused - status: sd.CallbackFlags, - ) -> None: - """Internal sounddevice callback.""" - # Suppress unused parameter warnings - _ = frames, time_info - - if status: - logger.warning("Audio stream status: %s", status) - - if self._callback is not None: - # Copy the data and flatten to 1D array - audio_data = indata.copy().flatten().astype(np.float32) - timestamp = time.monotonic() - self._callback(audio_data, timestamp) - - try: - self._stream = sd.InputStream( - device=device_id, - channels=channels, - samplerate=sample_rate, - blocksize=blocksize, - dtype=np.float32, - callback=_stream_callback, - ) - self._stream.start() - logger.info( - "Started audio capture: device=%s, rate=%d, channels=%d, blocksize=%d", - device_id, - sample_rate, - channels, - blocksize, - ) - except sd.PortAudioError as e: - self._stream = None - self._callback = None - raise RuntimeError(f"Failed to start audio capture: {e}") from e - - def stop(self) -> None: - """Stop audio capture. - - Safe to call even if not capturing. - """ - if self._stream is not None: - try: - self._stream.stop() - self._stream.close() - except sd.PortAudioError as e: - logger.warning("Error stopping audio stream: %s", e) - finally: - self._stream = None - self._callback = None - logger.info("Stopped audio capture") - - def is_capturing(self) -> bool: - """Check if currently capturing audio. - - Returns: - True if capture is active. - """ - return self._stream is not None and self._stream.active - - @property - def current_device_id(self) -> int | None: - """Get the current device ID being used for capture.""" - return self._device_id - - @property - def sample_rate(self) -> int: - """Get the current sample rate.""" - return self._sample_rate - - @property - def channels(self) -> int: - """Get the current number of channels.""" - return self._channels -```` - ## File: src/noteflow/infrastructure/audio/dto.py ````python -"""Data Transfer Objects for audio capture. - -Define data structures used by audio capture components. -""" - from __future__ import annotations - from collections.abc import Callable from dataclasses import dataclass - import numpy as np from numpy.typing import NDArray - - @dataclass(frozen=True) class AudioDeviceInfo: - """Information about an audio input device.""" - device_id: int name: str channels: int sample_rate: int is_default: bool - - @dataclass class TimestampedAudio: - """Audio frames with capture timestamp.""" - frames: NDArray[np.float32] - timestamp: float # Monotonic time when captured - duration: float # Duration in seconds - + timestamp: float + duration: float def __post_init__(self) -> None: - """Validate audio data.""" if self.duration < 0: raise ValueError("Duration must be non-negative") if self.timestamp < 0: raise ValueError("Timestamp must be non-negative") - - -# Type alias for audio frame callback AudioFrameCallback = Callable[[NDArray[np.float32], float], None] ```` -## File: src/noteflow/infrastructure/audio/playback.py -````python -"""Audio playback implementation using sounddevice. - -Provide cross-platform audio output playback from ring buffer audio. -""" - -from __future__ import annotations - -import logging -import threading -from enum import Enum, auto -from typing import TYPE_CHECKING - -import numpy as np -import sounddevice as sd -from numpy.typing import NDArray - -if TYPE_CHECKING: - from noteflow.infrastructure.audio.dto import TimestampedAudio - -logger = logging.getLogger(__name__) - - -class PlaybackState(Enum): - """Playback state machine states.""" - - STOPPED = auto() - PLAYING = auto() - PAUSED = auto() - - -class SoundDevicePlayback: - """sounddevice-based implementation of AudioPlayback. - - Handle audio output playback with position tracking and state management. - Thread-safe for UI callbacks. - """ - - def __init__(self, sample_rate: int = 16000, channels: int = 1) -> None: - """Initialize the playback instance. - - Args: - sample_rate: Sample rate in Hz (default 16kHz for ASR audio). - channels: Number of channels (default 1 for mono). - """ - self._sample_rate = sample_rate - self._channels = channels - - # Playback state - self._state = PlaybackState.STOPPED - self._lock = threading.Lock() - - # Audio data - self._audio_data: NDArray[np.float32] | None = None - self._total_samples: int = 0 - self._current_sample: int = 0 - - # Stream - self._stream: sd.OutputStream | None = None - - def play(self, audio: list[TimestampedAudio]) -> None: - """Start playback of audio chunks. - - Args: - audio: List of TimestampedAudio chunks to play, ordered oldest to newest. - """ - if not audio: - logger.warning("No audio chunks to play") - return - - with self._lock: - # Stop any existing playback - self._stop_internal() - - # Concatenate all audio frames - frames = [chunk.frames for chunk in audio] - self._audio_data = np.concatenate(frames).astype(np.float32) - self._total_samples = len(self._audio_data) - self._current_sample = 0 - - # Create and start stream - self._start_stream() - self._state = PlaybackState.PLAYING - - logger.info( - "Started playback: %d samples (%.2f seconds)", - self._total_samples, - self.total_duration, - ) - - def pause(self) -> None: - """Pause playback. - - Safe to call even if not playing. - """ - with self._lock: - if self._state == PlaybackState.PLAYING and self._stream is not None: - self._stream.stop() - self._state = PlaybackState.PAUSED - logger.debug("Paused playback at %.2f seconds", self.current_position) - - def resume(self) -> None: - """Resume paused playback. - - No-op if not paused. - """ - with self._lock: - if self._state == PlaybackState.PAUSED and self._stream is not None: - self._stream.start() - self._state = PlaybackState.PLAYING - logger.debug("Resumed playback from %.2f seconds", self.current_position) - - def stop(self) -> None: - """Stop playback and reset position. - - Safe to call even if not playing. - """ - with self._lock: - self._stop_internal() - - def _stop_internal(self) -> None: - """Internal stop without lock (caller must hold lock).""" - if self._stream is not None: - try: - self._stream.stop() - self._stream.close() - except sd.PortAudioError as e: - logger.warning("Error stopping playback stream: %s", e) - finally: - self._stream = None - - self._state = PlaybackState.STOPPED - self._current_sample = 0 - self._audio_data = None - self._total_samples = 0 - logger.debug("Stopped playback") - - def _start_stream(self) -> None: - """Start the output stream (caller must hold lock).""" - - def _stream_callback( - outdata: NDArray[np.float32], - frames: int, - time_info: object, - status: sd.CallbackFlags, - ) -> None: - """Internal sounddevice output callback.""" - _ = time_info # Unused - - if status: - logger.warning("Playback stream status: %s", status) - - with self._lock: - if self._audio_data is None or self._state != PlaybackState.PLAYING: - # Output silence - outdata.fill(0) - return - - # Calculate how many samples we can provide - available = self._total_samples - self._current_sample - to_copy = min(frames, available) - - if to_copy > 0: - # Copy audio data to output buffer - outdata[:to_copy, 0] = self._audio_data[ - self._current_sample : self._current_sample + to_copy - ] - self._current_sample += to_copy - - # Fill remaining with silence - if to_copy < frames: - outdata[to_copy:] = 0 - - # Check if playback is complete - if self._current_sample >= self._total_samples: - # Schedule stop on another thread to avoid deadlock - threading.Thread(target=self._on_playback_complete, daemon=True).start() - - try: - self._stream = sd.OutputStream( - channels=self._channels, - samplerate=self._sample_rate, - dtype=np.float32, - callback=_stream_callback, - ) - self._stream.start() - except sd.PortAudioError as e: - self._stream = None - raise RuntimeError(f"Failed to start playback stream: {e}") from e - - def _on_playback_complete(self) -> None: - """Handle playback completion.""" - logger.info("Playback completed") - self.stop() - - def seek(self, position: float) -> bool: - """Seek to a specific position in the audio. - - Thread-safe. Can be called from any thread. - - Args: - position: Position in seconds from start of audio. - - Returns: - True if seek was successful, False if no audio loaded or position out of bounds. - """ - with self._lock: - if self._audio_data is None: - logger.warning("Cannot seek: no audio loaded") - return False - - # Clamp position to valid range - max_position = self._total_samples / self._sample_rate - clamped_position = max(0.0, min(position, max_position)) - - # Convert to sample position - self._current_sample = int(clamped_position * self._sample_rate) - - logger.debug( - "Seeked to %.2f seconds (sample %d)", - clamped_position, - self._current_sample, - ) - return True - - def is_playing(self) -> bool: - """Check if currently playing audio. - - Returns: - True if playback is active (not paused or stopped). - """ - with self._lock: - return self._state == PlaybackState.PLAYING - - @property - def current_position(self) -> float: - """Current playback position in seconds from start of loaded audio.""" - with self._lock: - return self._current_sample / self._sample_rate - - @property - def total_duration(self) -> float: - """Total duration of loaded audio in seconds.""" - with self._lock: - return self._total_samples / self._sample_rate - - @property - def state(self) -> PlaybackState: - """Current playback state.""" - with self._lock: - return self._state - - @property - def sample_rate(self) -> int: - """Sample rate in Hz.""" - return self._sample_rate - - @property - def channels(self) -> int: - """Number of channels.""" - return self._channels -```` - -## File: src/noteflow/infrastructure/audio/protocols.py -````python -"""Audio protocols defining contracts for audio components. - -Define Protocol interfaces for audio capture, level metering, and buffering. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Protocol - -if TYPE_CHECKING: - import numpy as np - from numpy.typing import NDArray - - from noteflow.infrastructure.audio.dto import ( - AudioDeviceInfo, - AudioFrameCallback, - TimestampedAudio, - ) - - -class AudioCapture(Protocol): - """Protocol for audio input capture. - - Implementations should handle device enumeration, stream management, - and device change detection. - """ - - def list_devices(self) -> list[AudioDeviceInfo]: - """List available audio input devices. - - Returns: - List of AudioDeviceInfo for all available input devices. - """ - ... - - def start( - self, - device_id: int | None, - on_frames: AudioFrameCallback, - sample_rate: int = 16000, - channels: int = 1, - chunk_duration_ms: int = 100, - ) -> None: - """Start capturing audio from the specified device. - - Args: - device_id: Device ID to capture from, or None for default device. - on_frames: Callback receiving (frames, timestamp) for each chunk. - sample_rate: Sample rate in Hz (default 16kHz for ASR). - channels: Number of channels (default 1 for mono). - chunk_duration_ms: Duration of each audio chunk in milliseconds. - - Raises: - RuntimeError: If already capturing. - ValueError: If device_id is invalid. - """ - ... - - def stop(self) -> None: - """Stop audio capture. - - Safe to call even if not capturing. - """ - ... - - def is_capturing(self) -> bool: - """Check if currently capturing audio. - - Returns: - True if capture is active. - """ - ... - - -class AudioLevelProvider(Protocol): - """Protocol for computing audio levels (VU meter data).""" - - def get_rms(self, frames: NDArray[np.float32]) -> float: - """Calculate RMS level from audio frames. - - Args: - frames: Audio samples as float32 array (normalized -1.0 to 1.0). - - Returns: - RMS level normalized to 0.0-1.0 range. - """ - ... - - def get_db(self, frames: NDArray[np.float32]) -> float: - """Calculate dB level from audio frames. - - Args: - frames: Audio samples as float32 array (normalized -1.0 to 1.0). - - Returns: - Level in dB (typically -60 to 0 range). - """ - ... - - -class RingBuffer(Protocol): - """Protocol for timestamped audio ring buffer. - - Ring buffers store recent audio with timestamps for ASR processing - and playback sync. - """ - - def push(self, audio: TimestampedAudio) -> None: - """Add audio to the buffer. - - Old audio is discarded if buffer exceeds max_duration. - - Args: - audio: Timestamped audio chunk to add. - """ - ... - - def get_window(self, duration_seconds: float) -> list[TimestampedAudio]: - """Get the last N seconds of audio. - - Args: - duration_seconds: How many seconds of audio to retrieve. - - Returns: - List of TimestampedAudio chunks, ordered oldest to newest. - """ - ... - - def clear(self) -> None: - """Clear all audio from the buffer.""" - ... - - @property - def duration(self) -> float: - """Total duration of buffered audio in seconds.""" - ... - - @property - def max_duration(self) -> float: - """Maximum buffer duration in seconds.""" - ... - - -class AudioPlayback(Protocol): - """Protocol for audio output playback. - - Implementations should handle output device management, playback state, - and position tracking for sync with UI. - """ - - def play(self, audio: list[TimestampedAudio]) -> None: - """Start playback of audio chunks. - - Args: - audio: List of TimestampedAudio chunks to play, ordered oldest to newest. - """ - ... - - def pause(self) -> None: - """Pause playback. - - Safe to call even if not playing. - """ - ... - - def resume(self) -> None: - """Resume paused playback. - - No-op if not paused. - """ - ... - - def stop(self) -> None: - """Stop playback and reset position. - - Safe to call even if not playing. - """ - ... - - def is_playing(self) -> bool: - """Check if currently playing audio. - - Returns: - True if playback is active (not paused or stopped). - """ - ... - - @property - def current_position(self) -> float: - """Current playback position in seconds from start of loaded audio.""" - ... - - @property - def total_duration(self) -> float: - """Total duration of loaded audio in seconds.""" - ... -```` - ## File: src/noteflow/infrastructure/audio/ring_buffer.py ````python -"""Timestamped audio ring buffer implementation. - -Store recent audio with timestamps for ASR processing and playback sync. -""" - from __future__ import annotations - from collections import deque from typing import TYPE_CHECKING - if TYPE_CHECKING: from noteflow.infrastructure.audio.dto import TimestampedAudio - - class TimestampedRingBuffer: - """Ring buffer for timestamped audio chunks. - - Automatically discard old audio when the buffer exceeds max_duration. - Thread-safe for single-producer, single-consumer use. - """ - def __init__(self, max_duration: float = 30.0) -> None: - """Initialize ring buffer. - - Args: - max_duration: Maximum audio duration to keep in seconds. - - Raises: - ValueError: If max_duration is not positive. - """ if max_duration <= 0: raise ValueError("max_duration must be positive") - self._max_duration = max_duration self._buffer: deque[TimestampedAudio] = deque() self._total_duration: float = 0.0 - def push(self, audio: TimestampedAudio) -> None: - """Add audio to the buffer. - - Old audio is discarded if buffer exceeds max_duration. - - Args: - audio: Timestamped audio chunk to add. - """ self._buffer.append(audio) self._total_duration += audio.duration - - # Evict old chunks if over capacity while self._total_duration > self._max_duration and self._buffer: old = self._buffer.popleft() self._total_duration -= old.duration - def get_window(self, duration_seconds: float) -> list[TimestampedAudio]: - """Get the last N seconds of audio. - - Args: - duration_seconds: How many seconds of audio to retrieve. - - Returns: - List of TimestampedAudio chunks, ordered oldest to newest. - """ if duration_seconds <= 0: return [] - result: list[TimestampedAudio] = [] accumulated_duration = 0.0 - - # Iterate from newest to oldest for audio in reversed(self._buffer): result.append(audio) accumulated_duration += audio.duration if accumulated_duration >= duration_seconds: break - - # Return in chronological order (oldest first) result.reverse() return result - def get_all(self) -> list[TimestampedAudio]: - """Get all buffered audio. - - Returns: - List of all TimestampedAudio chunks, ordered oldest to newest. - """ return list(self._buffer) - def clear(self) -> None: - """Clear all audio from the buffer.""" self._buffer.clear() self._total_duration = 0.0 - @property def duration(self) -> float: - """Total duration of buffered audio in seconds.""" return self._total_duration - @property def max_duration(self) -> float: - """Maximum buffer duration in seconds.""" return self._max_duration - @property def chunk_count(self) -> int: - """Number of audio chunks in the buffer.""" return len(self._buffer) - def __len__(self) -> int: - """Return number of chunks in buffer.""" return len(self._buffer) ```` -## File: src/noteflow/infrastructure/audio/writer.py -````python -"""Streaming encrypted audio file writer for meetings.""" - -from __future__ import annotations - -import json -import logging -from datetime import UTC, datetime -from pathlib import Path -from typing import TYPE_CHECKING - -import numpy as np - -from noteflow.infrastructure.security.crypto import ChunkedAssetWriter - -if TYPE_CHECKING: - from numpy.typing import NDArray - - from noteflow.infrastructure.security.crypto import AesGcmCryptoBox - -logger = logging.getLogger(__name__) - - -class MeetingAudioWriter: - """Write audio chunks to encrypted meeting file. - - Manage meeting directory creation, manifest file, and encrypted audio storage. - Uses ChunkedAssetWriter for the actual encryption. - - Directory structure: - ~/.noteflow/meetings// - ├── manifest.json # Meeting metadata + wrapped DEK - └── audio.enc # Encrypted PCM16 chunks (NFAE format) - """ - - def __init__( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Initialize audio writer. - - Args: - crypto: CryptoBox instance for encryption operations. - meetings_dir: Root directory for all meetings (e.g., ~/.noteflow/meetings). - """ - self._crypto = crypto - self._meetings_dir = meetings_dir - self._asset_writer: ChunkedAssetWriter | None = None - self._meeting_dir: Path | None = None - self._sample_rate: int = 16000 - self._chunk_count: int = 0 - - def open( - self, - meeting_id: str, - dek: bytes, - wrapped_dek: bytes, - sample_rate: int = 16000, - ) -> None: - """Open meeting for audio writing. - - Create meeting directory, write manifest, open encrypted audio file. - - Args: - meeting_id: Meeting UUID string. - dek: Unwrapped data encryption key (32 bytes). - wrapped_dek: Encrypted DEK to store in manifest. - sample_rate: Audio sample rate (default 16000 Hz). - - Raises: - RuntimeError: If already open. - OSError: If directory creation fails. - """ - if self._asset_writer is not None: - raise RuntimeError("Writer already open") - - # Create meeting directory - self._meeting_dir = self._meetings_dir / meeting_id - self._meeting_dir.mkdir(parents=True, exist_ok=True) - - # Write manifest.json - manifest = { - "meeting_id": meeting_id, - "created_at": datetime.now(UTC).isoformat(), - "sample_rate": sample_rate, - "channels": 1, - "format": "pcm16", - "wrapped_dek": wrapped_dek.hex(), # Store as hex string - } - manifest_path = self._meeting_dir / "manifest.json" - manifest_path.write_text(json.dumps(manifest, indent=2)) - - # Open encrypted audio file - audio_path = self._meeting_dir / "audio.enc" - self._asset_writer = ChunkedAssetWriter(self._crypto) - self._asset_writer.open(audio_path, dek) - - self._sample_rate = sample_rate - self._chunk_count = 0 - - logger.info( - "Opened audio writer: meeting=%s, dir=%s", - meeting_id, - self._meeting_dir, - ) - - def write_chunk(self, audio: NDArray[np.float32]) -> None: - """Write audio chunk (convert float32 → PCM16). - - Args: - audio: Audio samples as float32 array (-1.0 to 1.0). - - Raises: - RuntimeError: If not open. - """ - if self._asset_writer is None or not self._asset_writer.is_open: - raise RuntimeError("Writer not open") - - # Convert float32 [-1.0, 1.0] to int16 [-32768, 32767] - # Clamp to prevent overflow on conversion - audio_clamped = np.clip(audio, -1.0, 1.0) - pcm16 = (audio_clamped * 32767.0).astype(np.int16) - - # Write as raw bytes (platform-native endianness, typically little-endian) - self._asset_writer.write_chunk(pcm16.tobytes()) - self._chunk_count += 1 - - def close(self) -> None: - """Close audio writer and finalize files. - - Safe to call if already closed or never opened. - """ - if self._asset_writer is not None: - bytes_written = self._asset_writer.bytes_written - self._asset_writer.close() - self._asset_writer = None - - logger.info( - "Closed audio writer: dir=%s, chunks=%d, bytes=%d", - self._meeting_dir, - self._chunk_count, - bytes_written, - ) - - self._meeting_dir = None - self._chunk_count = 0 - - @property - def is_open(self) -> bool: - """Check if writer is currently open for writing.""" - return self._asset_writer is not None and self._asset_writer.is_open - - @property - def bytes_written(self) -> int: - """Total encrypted bytes written to audio.enc file.""" - return 0 if self._asset_writer is None else self._asset_writer.bytes_written - - @property - def chunk_count(self) -> int: - """Number of audio chunks written.""" - return self._chunk_count - - @property - def meeting_dir(self) -> Path | None: - """Current meeting directory, or None if not open.""" - return self._meeting_dir -```` - ## File: src/noteflow/infrastructure/converters/__init__.py ````python -"""Infrastructure converters for data transformation between layers.""" - from noteflow.infrastructure.converters.asr_converters import AsrConverter from noteflow.infrastructure.converters.orm_converters import OrmConverter - __all__ = [ "AsrConverter", "OrmConverter", @@ -5433,72 +2855,34 @@ __all__ = [ ## File: src/noteflow/infrastructure/converters/asr_converters.py ````python -"""Convert ASR DTOs to domain entities.""" - from __future__ import annotations - from typing import TYPE_CHECKING - from noteflow.domain.entities import WordTiming - if TYPE_CHECKING: from noteflow.infrastructure.asr import dto from noteflow.infrastructure.asr.dto import AsrResult - - class AsrConverter: - """Convert ASR DTOs to domain entities.""" - @staticmethod def word_timing_to_domain(asr_word: dto.WordTiming) -> WordTiming: - """Convert ASR WordTiming DTO to domain WordTiming entity. - - Map field names from ASR convention (start/end) to domain - convention (start_time/end_time). - - Args: - asr_word: ASR WordTiming DTO from faster-whisper engine. - - Returns: - Domain WordTiming entity with validated timing. - - Raises: - ValueError: If timing validation fails. - """ return WordTiming( word=asr_word.word, start_time=asr_word.start, end_time=asr_word.end, probability=asr_word.probability, ) - @staticmethod def result_to_domain_words(result: AsrResult) -> list[WordTiming]: - """Convert all words from ASR result to domain entities. - - Args: - result: ASR transcription result with word timings. - - Returns: - List of domain WordTiming entities. - """ return [AsrConverter.word_timing_to_domain(word) for word in result.words] ```` ## File: src/noteflow/infrastructure/diarization/__init__.py ````python -"""Speaker diarization infrastructure module. - -Provides speaker diarization using pyannote.audio (offline) and diart (streaming). -""" - from noteflow.infrastructure.diarization.assigner import ( assign_speaker, assign_speakers_batch, ) from noteflow.infrastructure.diarization.dto import SpeakerTurn from noteflow.infrastructure.diarization.engine import DiarizationEngine - __all__ = [ "DiarizationEngine", "SpeakerTurn", @@ -5509,470 +2893,68 @@ __all__ = [ ## File: src/noteflow/infrastructure/diarization/assigner.py ````python -"""Speaker assignment utilities for mapping diarization to segments. - -Provides functions to assign speaker labels to transcript segments based on -diarization output using timestamp overlap matching. -""" - from __future__ import annotations - from collections.abc import Sequence - from noteflow.infrastructure.diarization.dto import SpeakerTurn - - def assign_speaker( start_time: float, end_time: float, turns: Sequence[SpeakerTurn], ) -> tuple[str | None, float]: - """Assign a speaker to a time range based on diarization turns. - - Uses maximum overlap duration to determine the dominant speaker - for the given time range. - - Args: - start_time: Segment start time in seconds. - end_time: Segment end time in seconds. - turns: Sequence of speaker turns from diarization. - - Returns: - Tuple of (speaker_id, confidence) where speaker_id is None if - no overlapping turns found. Confidence is the ratio of overlap - duration to segment duration. - """ if not turns: return None, 0.0 - segment_duration = end_time - start_time if segment_duration <= 0: return None, 0.0 - best_speaker: str | None = None best_overlap: float = 0.0 - for turn in turns: overlap = turn.overlap_duration(start_time, end_time) if overlap > best_overlap: best_overlap = overlap best_speaker = turn.speaker - if best_speaker is None: return None, 0.0 - confidence = best_overlap / segment_duration return best_speaker, confidence - - def assign_speakers_batch( segments: Sequence[tuple[float, float]], turns: Sequence[SpeakerTurn], ) -> list[tuple[str | None, float]]: - """Assign speakers to multiple segments in batch. - - Args: - segments: Sequence of (start_time, end_time) tuples. - turns: Sequence of speaker turns from diarization. - - Returns: - List of (speaker_id, confidence) tuples, one per segment. - """ return [assign_speaker(start, end, turns) for start, end in segments] ```` ## File: src/noteflow/infrastructure/diarization/dto.py ````python -"""Data Transfer Objects for speaker diarization. - -These DTOs define the data structures used by diarization components. -""" - from __future__ import annotations - from dataclasses import dataclass - - @dataclass(frozen=True) class SpeakerTurn: - """Speaker turn from diarization output. - - Represents a time segment where a specific speaker is talking. - """ - - speaker: str # Speaker label (e.g., "SPEAKER_00") - start: float # Start time in seconds - end: float # End time in seconds - confidence: float = 1.0 # Confidence score (0.0-1.0) - + speaker: str + start: float + end: float + confidence: float = 1.0 def __post_init__(self) -> None: - """Validate turn data.""" if self.end < self.start: raise ValueError(f"Turn end ({self.end}) < start ({self.start})") if not 0.0 <= self.confidence <= 1.0: raise ValueError(f"Confidence must be 0.0-1.0, got {self.confidence}") - @property def duration(self) -> float: - """Duration of the turn in seconds.""" return self.end - self.start - def overlaps(self, start: float, end: float) -> bool: - """Check if this turn overlaps with a time range. - - Args: - start: Range start time in seconds. - end: Range end time in seconds. - - Returns: - True if there is any overlap. - """ return self.start < end and self.end > start - def overlap_duration(self, start: float, end: float) -> float: - """Calculate overlap duration with a time range. - - Args: - start: Range start time in seconds. - end: Range end time in seconds. - - Returns: - Overlap duration in seconds (0.0 if no overlap). - """ overlap_start = max(self.start, start) overlap_end = min(self.end, end) return max(0.0, overlap_end - overlap_start) ```` -## File: src/noteflow/infrastructure/diarization/engine.py -````python -"""Diarization engine implementation using pyannote.audio and diart. - -Provides speaker diarization for both streaming (real-time) and -offline (post-meeting) processing. - -Requires optional dependencies: pip install noteflow[diarization] -""" - -from __future__ import annotations - -import logging -from typing import TYPE_CHECKING - -from noteflow.infrastructure.diarization.dto import SpeakerTurn - -if TYPE_CHECKING: - from collections.abc import Sequence - - import numpy as np - from numpy.typing import NDArray - from pyannote.core import Annotation - -logger = logging.getLogger(__name__) - - -class DiarizationEngine: - """Speaker diarization engine using pyannote.audio and diart. - - Supports both streaming (real-time via diart) and offline - (post-meeting via pyannote.audio) diarization modes. - """ - - def __init__( - self, - device: str = "auto", - hf_token: str | None = None, - streaming_latency: float = 0.5, - min_speakers: int = 1, - max_speakers: int = 10, - ) -> None: - """Initialize the diarization engine. - - Args: - device: Device to use ("auto", "cpu", "cuda", "mps"). - "auto" selects CUDA > MPS > CPU based on availability. - hf_token: HuggingFace token for pyannote model access. - streaming_latency: Latency for streaming diarization in seconds. - min_speakers: Minimum expected speakers for offline diarization. - max_speakers: Maximum expected speakers for offline diarization. - """ - self._device_preference = device - self._device: str | None = None - self._hf_token = hf_token - self._streaming_latency = streaming_latency - self._min_speakers = min_speakers - self._max_speakers = max_speakers - - # Lazy-loaded models - self._streaming_pipeline = None - self._offline_pipeline = None - - def _resolve_device(self) -> str: - """Resolve the actual device to use based on availability. - - Returns: - Device string ("cuda", "mps", or "cpu"). - """ - if self._device is not None: - return self._device - - import torch - - if self._device_preference == "auto": - if torch.cuda.is_available(): - self._device = "cuda" - elif torch.backends.mps.is_available(): - self._device = "mps" - else: - self._device = "cpu" - else: - self._device = self._device_preference - - logger.info("Diarization device resolved to: %s", self._device) - return self._device - - def load_streaming_model(self) -> None: - """Load the streaming diarization model (diart). - - Raises: - RuntimeError: If model loading fails. - ValueError: If HuggingFace token is not provided. - """ - if self._streaming_pipeline is not None: - logger.debug("Streaming model already loaded") - return - - if not self._hf_token: - raise ValueError("HuggingFace token required for pyannote models") - - device = self._resolve_device() - - logger.info( - "Loading streaming diarization model on %s with latency %.2fs...", - device, - self._streaming_latency, - ) - - try: - from diart import SpeakerDiarization, SpeakerDiarizationConfig - from diart.models import EmbeddingModel, SegmentationModel - - segmentation = SegmentationModel.from_pretrained( - "pyannote/segmentation-3.0", - use_hf_token=self._hf_token, - ) - embedding = EmbeddingModel.from_pretrained( - "pyannote/wespeaker-voxceleb-resnet34-LM", - use_hf_token=self._hf_token, - ) - - config = SpeakerDiarizationConfig( - segmentation=segmentation, - embedding=embedding, - step=self._streaming_latency, - latency=self._streaming_latency, - device=device, - ) - - self._streaming_pipeline = SpeakerDiarization(config) - logger.info("Streaming diarization model loaded successfully") - - except Exception as e: - raise RuntimeError(f"Failed to load streaming diarization model: {e}") from e - - def load_offline_model(self) -> None: - """Load the offline diarization model (pyannote.audio). - - Raises: - RuntimeError: If model loading fails. - ValueError: If HuggingFace token is not provided. - """ - if self._offline_pipeline is not None: - logger.debug("Offline model already loaded") - return - - if not self._hf_token: - raise ValueError("HuggingFace token required for pyannote models") - - device = self._resolve_device() - - logger.info("Loading offline diarization model on %s...", device) - - try: - import torch - from pyannote.audio import Pipeline - - self._offline_pipeline = Pipeline.from_pretrained( - "pyannote/speaker-diarization-3.1", - use_auth_token=self._hf_token, - ) - - torch_device = torch.device(device) - self._offline_pipeline.to(torch_device) - - logger.info("Offline diarization model loaded successfully") - - except Exception as e: - raise RuntimeError(f"Failed to load offline diarization model: {e}") from e - - def process_chunk( - self, - audio: NDArray[np.float32], - sample_rate: int = 16000, - ) -> Sequence[SpeakerTurn]: - """Process an audio chunk for streaming diarization. - - Args: - audio: Audio samples as float32 array (mono). - sample_rate: Audio sample rate in Hz. - - Returns: - Sequence of speaker turns detected in this chunk. - - Raises: - RuntimeError: If streaming model not loaded. - """ - if self._streaming_pipeline is None: - raise RuntimeError("Streaming model not loaded. Call load_streaming_model() first.") - - from pyannote.core import SlidingWindowFeature - - # Reshape audio for diart: (samples,) -> (1, samples) - if audio.ndim == 1: - audio = audio.reshape(1, -1) - - # Create SlidingWindowFeature for diart - from pyannote.core import SlidingWindow - - duration = audio.shape[1] / sample_rate - window = SlidingWindow(start=0.0, duration=duration, step=duration) - waveform = SlidingWindowFeature(audio, window) - - # Process through pipeline - results = self._streaming_pipeline([waveform]) - - turns: list[SpeakerTurn] = [] - for annotation, _ in results: - turns.extend(self._annotation_to_turns(annotation)) - - return turns - - def diarize_full( - self, - audio: NDArray[np.float32], - sample_rate: int = 16000, - num_speakers: int | None = None, - ) -> Sequence[SpeakerTurn]: - """Diarize a complete audio recording. - - Args: - audio: Audio samples as float32 array (mono). - sample_rate: Audio sample rate in Hz. - num_speakers: Known number of speakers (None for auto-detect). - - Returns: - Sequence of speaker turns for the full recording. - - Raises: - RuntimeError: If offline model not loaded. - """ - if self._offline_pipeline is None: - raise RuntimeError("Offline model not loaded. Call load_offline_model() first.") - - import torch - - # Prepare audio tensor: (samples,) -> (channels, samples) - if audio.ndim == 1: - audio_tensor = torch.from_numpy(audio).unsqueeze(0) - else: - audio_tensor = torch.from_numpy(audio) - - # Create waveform dict for pyannote - waveform = {"waveform": audio_tensor, "sample_rate": sample_rate} - - logger.debug( - "Running offline diarization on %.2fs audio", - audio_tensor.shape[1] / sample_rate, - ) - - # Run diarization with speaker hints - if num_speakers is not None: - annotation = self._offline_pipeline(waveform, num_speakers=num_speakers) - else: - annotation = self._offline_pipeline( - waveform, - min_speakers=self._min_speakers, - max_speakers=self._max_speakers, - ) - - return self._annotation_to_turns(annotation) - - def _annotation_to_turns(self, annotation: Annotation) -> list[SpeakerTurn]: - """Convert pyannote Annotation to SpeakerTurn list. - - Args: - annotation: Pyannote diarization annotation. - - Returns: - List of SpeakerTurn objects. - """ - turns: list[SpeakerTurn] = [] - - # itertracks(yield_label=True) returns 3-tuples: (segment, track, label) - for track in annotation.itertracks(yield_label=True): - # Unpack with len check for type safety with pyannote's union return - if len(track) == 3: - segment, _, speaker = track - turns.append( - SpeakerTurn( - speaker=str(speaker), - start=segment.start, - end=segment.end, - ) - ) - - return turns - - def reset_streaming(self) -> None: - """Reset streaming pipeline state for a new recording.""" - if self._streaming_pipeline is not None: - self._streaming_pipeline.reset() - logger.debug("Streaming pipeline state reset") - - def unload(self) -> None: - """Unload all models to free memory.""" - self._streaming_pipeline = None - self._offline_pipeline = None - self._device = None - logger.info("Diarization models unloaded") - - @property - def is_streaming_loaded(self) -> bool: - """Return True if streaming model is loaded.""" - return self._streaming_pipeline is not None - - @property - def is_offline_loaded(self) -> bool: - """Return True if offline model is loaded.""" - return self._offline_pipeline is not None - - @property - def device(self) -> str | None: - """Return the resolved device, or None if not yet resolved.""" - return self._device -```` - ## File: src/noteflow/infrastructure/export/__init__.py ````python -"""Export infrastructure module. - -Provide transcript export functionality to various file formats. -""" - from noteflow.infrastructure.export.html import HtmlExporter from noteflow.infrastructure.export.markdown import MarkdownExporter from noteflow.infrastructure.export.protocols import TranscriptExporter - __all__ = [ "HtmlExporter", "MarkdownExporter", @@ -5982,352 +2964,53 @@ __all__ = [ ## File: src/noteflow/infrastructure/export/_formatting.py ````python -"""Shared formatting utilities for export modules.""" - from __future__ import annotations - from datetime import datetime - - def format_timestamp(seconds: float) -> str: - """Format seconds as MM:SS or HH:MM:SS. - - Args: - seconds: Time in seconds. - - Returns: - Formatted time string. - """ total_seconds = int(seconds) hours, remainder = divmod(total_seconds, 3600) minutes, secs = divmod(remainder, 60) - if hours > 0: return f"{hours:d}:{minutes:02d}:{secs:02d}" return f"{minutes:d}:{secs:02d}" - - def format_datetime(dt: datetime | None) -> str: - """Format datetime for display. - - Args: - dt: Datetime to format. - - Returns: - Formatted datetime string or empty string. - """ return "" if dt is None else dt.strftime("%Y-%m-%d %H:%M:%S") ```` ## File: src/noteflow/infrastructure/export/protocols.py ````python -"""Export protocols defining contracts for transcript exporters. - -Define Protocol interfaces for exporting meeting transcripts to various formats. -""" - from __future__ import annotations - from typing import TYPE_CHECKING, Protocol - if TYPE_CHECKING: from collections.abc import Sequence - from noteflow.domain.entities.meeting import Meeting from noteflow.domain.entities.segment import Segment - - class TranscriptExporter(Protocol): - """Protocol for exporting meeting transcripts to file formats. - - Implementations should produce formatted output for the target format - (e.g., Markdown, HTML) from meeting data. - """ - def export( self, meeting: Meeting, segments: Sequence[Segment], ) -> str: - """Export meeting transcript to formatted string. - - Args: - meeting: Meeting entity with metadata. - segments: Ordered list of transcript segments. - - Returns: - Formatted transcript string in target format. - """ ... - @property def format_name(self) -> str: - """Human-readable format name (e.g., 'Markdown', 'HTML').""" ... - @property def file_extension(self) -> str: - """File extension for this format (e.g., '.md', '.html').""" ... ```` -## File: src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py -````python -"""initial_schema - -Revision ID: 6a9d9f408f40 -Revises: -Create Date: 2025-12-16 19:10:55.135444 - -""" - -from collections.abc import Sequence - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision: str = "6a9d9f408f40" -down_revision: str | Sequence[str] | None = None -branch_labels: str | Sequence[str] | None = None -depends_on: str | Sequence[str] | None = None - -# Vector dimension for embeddings (OpenAI compatible) -EMBEDDING_DIM = 1536 - - -def upgrade() -> None: - """Create NoteFlow schema and tables.""" - # Create schema - op.execute("CREATE SCHEMA IF NOT EXISTS noteflow") - - # Enable pgvector extension - try: - op.execute("CREATE EXTENSION IF NOT EXISTS vector") - except sa.exc.ProgrammingError as e: - raise RuntimeError( - f"Failed to create pgvector extension: {e}. " - "Ensure the database user has CREATE EXTENSION privileges, or " - "install pgvector manually: CREATE EXTENSION vector;" - ) from e - - # Create meetings table - op.create_table( - "meetings", - sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), - sa.Column("title", sa.String(255), nullable=False), - sa.Column("state", sa.Integer(), nullable=False, server_default="1"), - sa.Column( - "created_at", - sa.DateTime(timezone=True), - nullable=False, - server_default=sa.text("now()"), - ), - sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True), - sa.Column( - "metadata", - postgresql.JSONB(astext_type=sa.Text()), - nullable=False, - server_default="{}", - ), - sa.Column("wrapped_dek", sa.LargeBinary(), nullable=True), - schema="noteflow", - ) - - # Create segments table - op.create_table( - "segments", - sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), - sa.Column( - "meeting_id", - postgresql.UUID(as_uuid=True), - sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), - nullable=False, - ), - sa.Column("segment_id", sa.Integer(), nullable=False), - sa.Column("text", sa.Text(), nullable=False), - sa.Column("start_time", sa.Float(), nullable=False), - sa.Column("end_time", sa.Float(), nullable=False), - sa.Column("language", sa.String(10), nullable=False, server_default="en"), - sa.Column("language_confidence", sa.Float(), nullable=False, server_default="0.0"), - sa.Column("avg_logprob", sa.Float(), nullable=False, server_default="0.0"), - sa.Column("no_speech_prob", sa.Float(), nullable=False, server_default="0.0"), - sa.Column( - "created_at", - sa.DateTime(timezone=True), - nullable=False, - server_default=sa.text("now()"), - ), - schema="noteflow", - ) - - # Add vector column for embeddings (pgvector) - op.execute(f"ALTER TABLE noteflow.segments ADD COLUMN embedding vector({EMBEDDING_DIM})") - - # Create index for vector similarity search - op.execute( - "CREATE INDEX IF NOT EXISTS ix_segments_embedding " - "ON noteflow.segments USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)" - ) - - # Create index for meeting_id lookups - op.create_index( - "ix_segments_meeting_id", - "segments", - ["meeting_id"], - schema="noteflow", - ) - - # Create word_timings table - op.create_table( - "word_timings", - sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), - sa.Column( - "segment_pk", - sa.Integer(), - sa.ForeignKey("noteflow.segments.id", ondelete="CASCADE"), - nullable=False, - ), - sa.Column("word", sa.String(255), nullable=False), - sa.Column("start_time", sa.Float(), nullable=False), - sa.Column("end_time", sa.Float(), nullable=False), - sa.Column("probability", sa.Float(), nullable=False), - schema="noteflow", - ) - - # Create index for segment_pk lookups - op.create_index( - "ix_word_timings_segment_pk", - "word_timings", - ["segment_pk"], - schema="noteflow", - ) - - # Create summaries table - op.create_table( - "summaries", - sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), - sa.Column( - "meeting_id", - postgresql.UUID(as_uuid=True), - sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), - nullable=False, - unique=True, - ), - sa.Column("executive_summary", sa.Text(), nullable=True), - sa.Column( - "generated_at", - sa.DateTime(timezone=True), - nullable=False, - server_default=sa.text("now()"), - ), - sa.Column("model_version", sa.String(50), nullable=True), - schema="noteflow", - ) - - # Create key_points table - op.create_table( - "key_points", - sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), - sa.Column( - "summary_id", - sa.Integer(), - sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), - nullable=False, - ), - sa.Column("text", sa.Text(), nullable=False), - sa.Column("start_time", sa.Float(), nullable=False, server_default="0.0"), - sa.Column("end_time", sa.Float(), nullable=False, server_default="0.0"), - sa.Column( - "segment_ids", - postgresql.JSONB(astext_type=sa.Text()), - nullable=False, - server_default="[]", - ), - schema="noteflow", - ) - - # Create index for summary_id lookups - op.create_index( - "ix_key_points_summary_id", - "key_points", - ["summary_id"], - schema="noteflow", - ) - - # Create action_items table - op.create_table( - "action_items", - sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), - sa.Column( - "summary_id", - sa.Integer(), - sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), - nullable=False, - ), - sa.Column("text", sa.Text(), nullable=False), - sa.Column("assignee", sa.String(255), nullable=False, server_default=""), - sa.Column("due_date", sa.DateTime(timezone=True), nullable=True), - sa.Column("priority", sa.Integer(), nullable=False, server_default="0"), - sa.Column( - "segment_ids", - postgresql.JSONB(astext_type=sa.Text()), - nullable=False, - server_default="[]", - ), - schema="noteflow", - ) - - # Create index for summary_id lookups - op.create_index( - "ix_action_items_summary_id", - "action_items", - ["summary_id"], - schema="noteflow", - ) - - -def downgrade() -> None: - """Drop all NoteFlow tables and schema.""" - # Drop tables in reverse order (respecting foreign keys) - op.drop_table("action_items", schema="noteflow") - op.drop_table("key_points", schema="noteflow") - op.drop_table("summaries", schema="noteflow") - op.drop_table("word_timings", schema="noteflow") - op.drop_table("segments", schema="noteflow") - op.drop_table("meetings", schema="noteflow") - - # Drop schema - op.execute("DROP SCHEMA IF EXISTS noteflow CASCADE") -```` - ## File: src/noteflow/infrastructure/persistence/migrations/versions/b5c3e8a2d1f0_add_annotations_table.py ````python -"""add_annotations_table - -Revision ID: b5c3e8a2d1f0 -Revises: 6a9d9f408f40 -Create Date: 2025-12-17 10:00:00.000000 - -""" - from collections.abc import Sequence - import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. revision: str = "b5c3e8a2d1f0" down_revision: str | Sequence[str] | None = "6a9d9f408f40" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None - - def upgrade() -> None: - """Create annotations table for user-created annotations during recording.""" op.create_table( "annotations", sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), @@ -6361,26 +3044,19 @@ def upgrade() -> None: ), schema="noteflow", ) - - # Create index for meeting_id lookups op.create_index( "ix_annotations_meeting_id", "annotations", ["meeting_id"], schema="noteflow", ) - - # Create index for time-based queries op.create_index( "ix_annotations_time_range", "annotations", ["meeting_id", "start_time", "end_time"], schema="noteflow", ) - - def downgrade() -> None: - """Drop annotations table.""" op.drop_index("ix_annotations_time_range", table_name="annotations", schema="noteflow") op.drop_index("ix_annotations_meeting_id", table_name="annotations", schema="noteflow") op.drop_table("annotations", schema="noteflow") @@ -6388,28 +3064,14 @@ def downgrade() -> None: ## File: src/noteflow/infrastructure/persistence/migrations/versions/c7d4e9f3a2b1_add_speaker_fields_to_segments.py ````python -"""add_speaker_fields_to_segments - -Revision ID: c7d4e9f3a2b1 -Revises: b5c3e8a2d1f0 -Create Date: 2025-12-18 16:00:00.000000 - -""" - from collections.abc import Sequence - import sqlalchemy as sa from alembic import op - -# revision identifiers, used by Alembic. revision: str = "c7d4e9f3a2b1" down_revision: str | Sequence[str] | None = "b5c3e8a2d1f0" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None - - def upgrade() -> None: - """Add speaker_id and speaker_confidence columns to segments table.""" op.add_column( "segments", sa.Column("speaker_id", sa.String(50), nullable=True), @@ -6420,17 +3082,14 @@ def upgrade() -> None: sa.Column("speaker_confidence", sa.Float(), nullable=False, server_default="0.0"), schema="noteflow", ) - - def downgrade() -> None: - """Remove speaker_id and speaker_confidence columns from segments table.""" op.drop_column("segments", "speaker_confidence", schema="noteflow") op.drop_column("segments", "speaker_id", schema="noteflow") ```` ## File: src/noteflow/infrastructure/persistence/migrations/__init__.py ````python -"""Alembic database migrations for NoteFlow.""" + ```` ## File: src/noteflow/infrastructure/persistence/migrations/README @@ -6472,13 +3131,10 @@ def downgrade() -> None: ## File: src/noteflow/infrastructure/persistence/repositories/__init__.py ````python -"""Repository implementations for NoteFlow.""" - from .annotation_repo import SqlAlchemyAnnotationRepository from .meeting_repo import SqlAlchemyMeetingRepository from .segment_repo import SqlAlchemySegmentRepository from .summary_repo import SqlAlchemySummaryRepository - __all__ = [ "SqlAlchemyAnnotationRepository", "SqlAlchemyMeetingRepository", @@ -6489,95 +3145,40 @@ __all__ = [ ## File: src/noteflow/infrastructure/persistence/repositories/_base.py ````python -"""Base repository providing common SQLAlchemy patterns.""" - from __future__ import annotations - from typing import TYPE_CHECKING, TypeVar - from sqlalchemy.ext.asyncio import AsyncSession - if TYPE_CHECKING: from sqlalchemy.sql import Select - TModel = TypeVar("TModel") - - class BaseRepository: - """Base class for SQLAlchemy repositories. - - Provides common session management and helper methods for - executing queries and persisting models. - """ - def __init__(self, session: AsyncSession) -> None: - """Initialize repository with database session. - - Args: - session: SQLAlchemy async session. - """ self._session = session - async def _execute_scalar( self, stmt: Select[tuple[TModel]], ) -> TModel | None: - """Execute statement and return single scalar result. - - Args: - stmt: SQLAlchemy select statement. - - Returns: - Single model instance or None if not found. - """ result = await self._session.execute(stmt) return result.scalar_one_or_none() - async def _execute_scalars( self, stmt: Select[tuple[TModel]], ) -> list[TModel]: - """Execute statement and return all scalar results. - - Args: - stmt: SQLAlchemy select statement. - - Returns: - List of model instances. - """ result = await self._session.execute(stmt) return list(result.scalars().all()) - async def _add_and_flush(self, model: TModel) -> TModel: - """Add model to session and flush. - - Args: - model: ORM model instance to persist. - - Returns: - The persisted model with generated fields populated. - """ self._session.add(model) await self._session.flush() return model - async def _delete_and_flush(self, model: object) -> None: - """Delete model from session and flush. - - Args: - model: ORM model instance to delete. - """ await self._session.delete(model) await self._session.flush() ```` ## File: src/noteflow/infrastructure/persistence/__init__.py ````python -"""Persistence infrastructure for NoteFlow.""" - from .database import create_async_engine, get_async_session_factory from .unit_of_work import SqlAlchemyUnitOfWork - __all__ = [ "SqlAlchemyUnitOfWork", "create_async_engine", @@ -6587,13 +3188,9 @@ __all__ = [ ## File: src/noteflow/infrastructure/persistence/database.py ````python -"""Database connection and session management.""" - from __future__ import annotations - from collections.abc import AsyncGenerator from typing import TYPE_CHECKING - from sqlalchemy.ext.asyncio import ( AsyncEngine, AsyncSession, @@ -6602,39 +3199,18 @@ from sqlalchemy.ext.asyncio import ( from sqlalchemy.ext.asyncio import ( create_async_engine as sa_create_async_engine, ) - if TYPE_CHECKING: from noteflow.config import Settings - - def create_async_engine(settings: Settings) -> AsyncEngine: - """Create an async SQLAlchemy engine. - - Args: - settings: Application settings with database URL. - - Returns: - Configured async engine. - """ return sa_create_async_engine( settings.database_url_str, pool_size=settings.db_pool_size, echo=settings.db_echo, - pool_pre_ping=True, # Verify connections before use + pool_pre_ping=True, ) - - def get_async_session_factory( engine: AsyncEngine, ) -> async_sessionmaker[AsyncSession]: - """Create an async session factory. - - Args: - engine: SQLAlchemy async engine. - - Returns: - Session factory for creating async sessions. - """ return async_sessionmaker( engine, class_=AsyncSession, @@ -6642,40 +3218,16 @@ def get_async_session_factory( autocommit=False, autoflush=False, ) - - async def get_async_session( session_factory: async_sessionmaker[AsyncSession], ) -> AsyncGenerator[AsyncSession, None]: - """Yield an async database session. - - Args: - session_factory: Factory for creating sessions. - - Yields: - Async database session that is closed after use. - """ async with session_factory() as session: yield session - - def create_async_session_factory( database_url: str, pool_size: int = 5, echo: bool = False, ) -> async_sessionmaker[AsyncSession]: - """Create an async session factory from a database URL string. - - Convenience function for creating a session factory directly from a URL. - - Args: - database_url: PostgreSQL database URL. - pool_size: Connection pool size. - echo: Enable SQL echo logging. - - Returns: - Async session factory. - """ engine = sa_create_async_engine( database_url, pool_size=pool_size, @@ -6693,11 +3245,6 @@ def create_async_session_factory( ## File: src/noteflow/infrastructure/security/__init__.py ````python -"""Security infrastructure module. - -Provides encryption and key management using OS credential stores. -""" - from noteflow.infrastructure.security.crypto import ( AesGcmCryptoBox, ChunkedAssetReader, @@ -6711,7 +3258,6 @@ from noteflow.infrastructure.security.protocols import ( EncryptedChunk, KeyStore, ) - __all__ = [ "AesGcmCryptoBox", "ChunkedAssetReader", @@ -6728,790 +3274,252 @@ __all__ = [ ## File: src/noteflow/infrastructure/security/crypto.py ````python -"""Cryptographic operations implementation using cryptography library. - -Provides AES-GCM encryption for audio data with envelope encryption. -""" - from __future__ import annotations - import logging import secrets import struct from collections.abc import Iterator from pathlib import Path from typing import TYPE_CHECKING, BinaryIO, Final - from cryptography.hazmat.primitives.ciphers.aead import AESGCM - from noteflow.infrastructure.security.protocols import EncryptedChunk - if TYPE_CHECKING: from noteflow.infrastructure.security.keystore import InMemoryKeyStore, KeyringKeyStore - logger = logging.getLogger(__name__) - -# Constants -KEY_SIZE: Final[int] = 32 # 256-bit key -NONCE_SIZE: Final[int] = 12 # 96-bit nonce for AES-GCM -TAG_SIZE: Final[int] = 16 # 128-bit authentication tag - -# File format magic number and version -FILE_MAGIC: Final[bytes] = b"NFAE" # NoteFlow Audio Encrypted +KEY_SIZE: Final[int] = 32 +NONCE_SIZE: Final[int] = 12 +TAG_SIZE: Final[int] = 16 +FILE_MAGIC: Final[bytes] = b"NFAE" FILE_VERSION: Final[int] = 1 - - class AesGcmCryptoBox: - """AES-GCM based encryption with envelope encryption. - - Uses a master key to wrap/unwrap per-meeting Data Encryption Keys (DEKs). - Each audio chunk is encrypted with AES-256-GCM using the DEK. - """ - def __init__(self, keystore: KeyringKeyStore | InMemoryKeyStore) -> None: - """Initialize the crypto box. - - Args: - keystore: KeyStore instance for master key access. - """ self._keystore = keystore self._master_cipher: AESGCM | None = None - def _get_master_cipher(self) -> AESGCM: - """Get or create the master key cipher.""" if self._master_cipher is None: master_key = self._keystore.get_or_create_master_key() self._master_cipher = AESGCM(master_key) return self._master_cipher - def generate_dek(self) -> bytes: - """Generate a new Data Encryption Key. - - Returns: - 32-byte random DEK. - """ return secrets.token_bytes(KEY_SIZE) - def wrap_dek(self, dek: bytes) -> bytes: - """Encrypt DEK with master key. - - Args: - dek: Data Encryption Key to wrap. - - Returns: - Encrypted DEK (nonce || ciphertext || tag). - """ cipher = self._get_master_cipher() nonce = secrets.token_bytes(NONCE_SIZE) ciphertext = cipher.encrypt(nonce, dek, associated_data=None) - # Return nonce || ciphertext (tag is appended by AESGCM) return nonce + ciphertext - def unwrap_dek(self, wrapped_dek: bytes) -> bytes: - """Decrypt DEK with master key. - - Args: - wrapped_dek: Encrypted DEK from wrap_dek(). - - Returns: - Original DEK. - - Raises: - ValueError: If decryption fails. - """ if len(wrapped_dek) < NONCE_SIZE + KEY_SIZE + TAG_SIZE: raise ValueError("Invalid wrapped DEK: too short") - cipher = self._get_master_cipher() nonce = wrapped_dek[:NONCE_SIZE] ciphertext = wrapped_dek[NONCE_SIZE:] - try: return cipher.decrypt(nonce, ciphertext, associated_data=None) except Exception as e: raise ValueError(f"DEK unwrap failed: {e}") from e - def encrypt_chunk(self, plaintext: bytes, dek: bytes) -> EncryptedChunk: - """Encrypt a chunk of data with AES-GCM. - - Args: - plaintext: Data to encrypt. - dek: Data Encryption Key. - - Returns: - EncryptedChunk with nonce, ciphertext, and tag. - """ cipher = AESGCM(dek) nonce = secrets.token_bytes(NONCE_SIZE) - - # AESGCM appends the tag to ciphertext ciphertext_with_tag = cipher.encrypt(nonce, plaintext, associated_data=None) - - # Split ciphertext and tag ciphertext = ciphertext_with_tag[:-TAG_SIZE] tag = ciphertext_with_tag[-TAG_SIZE:] - return EncryptedChunk(nonce=nonce, ciphertext=ciphertext, tag=tag) - def decrypt_chunk(self, chunk: EncryptedChunk, dek: bytes) -> bytes: - """Decrypt a chunk of data. - - Args: - chunk: EncryptedChunk to decrypt. - dek: Data Encryption Key. - - Returns: - Original plaintext. - - Raises: - ValueError: If decryption fails. - """ cipher = AESGCM(dek) - - # Reconstruct ciphertext with tag for AESGCM ciphertext_with_tag = chunk.ciphertext + chunk.tag - try: return cipher.decrypt(chunk.nonce, ciphertext_with_tag, associated_data=None) except Exception as e: raise ValueError(f"Chunk decryption failed: {e}") from e - - class ChunkedAssetWriter: - """Streaming encrypted asset writer. - - File format: - - 4 bytes: magic ("NFAE") - - 1 byte: version - - For each chunk: - - 4 bytes: chunk length (big-endian) - - 12 bytes: nonce - - N bytes: ciphertext - - 16 bytes: tag - """ - def __init__(self, crypto: AesGcmCryptoBox) -> None: - """Initialize the writer. - - Args: - crypto: CryptoBox instance for encryption. - """ self._crypto = crypto self._file: Path | None = None self._dek: bytes | None = None self._handle: BinaryIO | None = None self._bytes_written: int = 0 - def open(self, path: Path, dek: bytes) -> None: - """Open file for writing. - - Args: - path: Path to the encrypted file. - dek: Data Encryption Key for this file. - """ if self._handle is not None: raise RuntimeError("Already open") - self._file = path self._dek = dek self._handle = path.open("wb") self._bytes_written = 0 - - # Write header self._handle.write(FILE_MAGIC) self._handle.write(struct.pack("B", FILE_VERSION)) - logger.debug("Opened encrypted file for writing: %s", path) - def write_chunk(self, audio_bytes: bytes) -> None: - """Write and encrypt an audio chunk.""" if self._handle is None or self._dek is None: raise RuntimeError("File not open") - - # Encrypt the chunk chunk = self._crypto.encrypt_chunk(audio_bytes, self._dek) - - # Calculate total chunk size (nonce + ciphertext + tag) chunk_data = chunk.nonce + chunk.ciphertext + chunk.tag chunk_length = len(chunk_data) - - # Write length prefix and chunk data self._handle.write(struct.pack(">I", chunk_length)) self._handle.write(chunk_data) self._handle.flush() - self._bytes_written += 4 + chunk_length - def close(self) -> None: - """Finalize and close the file.""" if self._handle is not None: self._handle.close() self._handle = None logger.debug("Closed encrypted file, wrote %d bytes", self._bytes_written) - self._dek = None - @property def is_open(self) -> bool: - """Check if file is open for writing.""" return self._handle is not None - @property def bytes_written(self) -> int: - """Total encrypted bytes written.""" return self._bytes_written - - class ChunkedAssetReader: - """Streaming encrypted asset reader.""" - def __init__(self, crypto: AesGcmCryptoBox) -> None: - """Initialize the reader. - - Args: - crypto: CryptoBox instance for decryption. - """ self._crypto = crypto self._file: Path | None = None self._dek: bytes | None = None self._handle: BinaryIO | None = None - def open(self, path: Path, dek: bytes) -> None: - """Open file for reading.""" if self._handle is not None: raise RuntimeError("Already open") - self._file = path self._dek = dek self._handle = path.open("rb") - - # Read and validate header magic = self._handle.read(4) if magic != FILE_MAGIC: self._handle.close() self._handle = None raise ValueError(f"Invalid file format: expected {FILE_MAGIC!r}, got {magic!r}") - version = struct.unpack("B", self._handle.read(1))[0] if version != FILE_VERSION: self._handle.close() self._handle = None raise ValueError(f"Unsupported file version: {version}") - logger.debug("Opened encrypted file for reading: %s", path) - def read_chunks(self) -> Iterator[bytes]: - """Yield decrypted audio chunks.""" if self._handle is None or self._dek is None: raise RuntimeError("File not open") - while True: - # Read chunk length length_bytes = self._handle.read(4) if len(length_bytes) < 4: - break # End of file - + break chunk_length = struct.unpack(">I", length_bytes)[0] - - # Read chunk data chunk_data = self._handle.read(chunk_length) if len(chunk_data) < chunk_length: raise ValueError("Truncated chunk") - - # Parse chunk (nonce + ciphertext + tag) nonce = chunk_data[:NONCE_SIZE] ciphertext = chunk_data[NONCE_SIZE:-TAG_SIZE] tag = chunk_data[-TAG_SIZE:] - chunk = EncryptedChunk(nonce=nonce, ciphertext=ciphertext, tag=tag) - - # Decrypt and yield yield self._crypto.decrypt_chunk(chunk, self._dek) - def close(self) -> None: - """Close the file.""" if self._handle is not None: self._handle.close() self._handle = None logger.debug("Closed encrypted file") - self._dek = None - @property def is_open(self) -> bool: - """Check if file is open for reading.""" return self._handle is not None ```` -## File: src/noteflow/infrastructure/security/keystore.py -````python -"""Keystore implementation using the keyring library. - -Provides secure master key storage using OS credential stores. -""" - -from __future__ import annotations - -import base64 -import logging -import os -import secrets -from typing import Final - -import keyring - -logger = logging.getLogger(__name__) - -# Constants -KEY_SIZE: Final[int] = 32 # 256-bit key -SERVICE_NAME: Final[str] = "noteflow" -KEY_NAME: Final[str] = "master_key" -ENV_VAR_NAME: Final[str] = "NOTEFLOW_MASTER_KEY" - - -class KeyringKeyStore: - """keyring-based key storage using OS credential store. - - Uses: - - macOS: Keychain - - Windows: Credential Manager - - Linux: SecretService (GNOME Keyring, KWallet) - """ - - def __init__( - self, - service_name: str = SERVICE_NAME, - key_name: str = KEY_NAME, - ) -> None: - """Initialize the keystore. - - Args: - service_name: Service identifier for keyring. - key_name: Key identifier within the service. - """ - self._service_name = service_name - self._key_name = key_name - - def get_or_create_master_key(self) -> bytes: - """Retrieve or generate the master encryption key. - - Checks for an environment variable first (for headless/container deployments), - then falls back to the OS keyring. - - Returns: - 32-byte master key. - - Raises: - RuntimeError: If keychain is unavailable and no env var is set. - """ - # Check environment variable first (for headless/container deployments) - if env_key := os.environ.get(ENV_VAR_NAME): - logger.debug("Using master key from environment variable") - return base64.b64decode(env_key) - - try: - # Try to retrieve existing key from keyring - stored = keyring.get_password(self._service_name, self._key_name) - if stored is not None: - logger.debug("Retrieved existing master key from keyring") - return base64.b64decode(stored) - - # Generate new key - new_key = secrets.token_bytes(KEY_SIZE) - encoded = base64.b64encode(new_key).decode("ascii") - - # Store in keyring - keyring.set_password(self._service_name, self._key_name, encoded) - logger.info("Generated and stored new master key in keyring") - return new_key - - except keyring.errors.KeyringError as e: - raise RuntimeError( - f"Keyring unavailable: {e}. " - f"Set {ENV_VAR_NAME} environment variable for headless mode." - ) from e - - def delete_master_key(self) -> None: - """Delete the master key from the keychain. - - Safe to call if key doesn't exist. - """ - try: - keyring.delete_password(self._service_name, self._key_name) - logger.info("Deleted master key") - except keyring.errors.PasswordDeleteError: - # Key doesn't exist, that's fine - logger.debug("Master key not found, nothing to delete") - except keyring.errors.KeyringError as e: - logger.warning("Failed to delete master key: %s", e) - - def has_master_key(self) -> bool: - """Check if master key exists in the keychain. - - Returns: - True if master key exists. - """ - try: - stored = keyring.get_password(self._service_name, self._key_name) - return stored is not None - except keyring.errors.KeyringError: - return False - - @property - def service_name(self) -> str: - """Get the service name used for keyring.""" - return self._service_name - - @property - def key_name(self) -> str: - """Get the key name used for keyring.""" - return self._key_name - - -class InMemoryKeyStore: - """In-memory key storage for testing. - - Keys are lost when the process exits. - """ - - def __init__(self) -> None: - """Initialize the in-memory keystore.""" - self._key: bytes | None = None - - def get_or_create_master_key(self) -> bytes: - """Retrieve or generate the master encryption key.""" - if self._key is None: - self._key = secrets.token_bytes(KEY_SIZE) - logger.debug("Generated in-memory master key") - return self._key - - def delete_master_key(self) -> None: - """Delete the master key.""" - self._key = None - logger.debug("Deleted in-memory master key") - - def has_master_key(self) -> bool: - """Check if master key exists.""" - return self._key is not None -```` - ## File: src/noteflow/infrastructure/security/protocols.py ````python -"""Security protocols and data types. - -These protocols define the contracts for key storage and encryption components. -""" - from __future__ import annotations - from collections.abc import Iterator from dataclasses import dataclass from pathlib import Path from typing import Protocol - - @dataclass(frozen=True) class EncryptedChunk: - """An encrypted chunk of data with authentication tag.""" - - nonce: bytes # Unique nonce for this chunk - ciphertext: bytes # Encrypted data - tag: bytes # Authentication tag - - + nonce: bytes + ciphertext: bytes + tag: bytes class KeyStore(Protocol): - """Protocol for OS keychain access. - - Implementations should use the OS credential store (Keychain, Credential Manager) - to securely store the master encryption key. - """ - def get_or_create_master_key(self) -> bytes: - """Retrieve or generate the master encryption key. - - If the master key doesn't exist, generates a new 32-byte key - and stores it in the OS keychain. - - Returns: - 32-byte master key. - - Raises: - RuntimeError: If keychain is unavailable or locked. - """ ... - def delete_master_key(self) -> None: - """Delete the master key from the keychain. - - This renders all encrypted data permanently unrecoverable. - - Safe to call if key doesn't exist. - """ ... - def has_master_key(self) -> bool: - """Check if master key exists in the keychain. - - Returns: - True if master key exists. - """ ... - - class CryptoBox(Protocol): - """Protocol for envelope encryption with per-meeting keys. - - Uses a master key to wrap/unwrap Data Encryption Keys (DEKs), - which are used to encrypt actual meeting data. - """ - def generate_dek(self) -> bytes: - """Generate a new Data Encryption Key. - - Returns: - 32-byte random DEK. - """ ... - def wrap_dek(self, dek: bytes) -> bytes: - """Encrypt DEK with master key. - - Args: - dek: Data Encryption Key to wrap. - - Returns: - Encrypted DEK (can be stored in DB). - """ ... - def unwrap_dek(self, wrapped_dek: bytes) -> bytes: - """Decrypt DEK with master key. - - Args: - wrapped_dek: Encrypted DEK from wrap_dek(). - - Returns: - Original DEK. - - Raises: - ValueError: If decryption fails (invalid or tampered). - """ ... - def encrypt_chunk(self, plaintext: bytes, dek: bytes) -> EncryptedChunk: - """Encrypt a chunk of data with AES-GCM. - - Args: - plaintext: Data to encrypt. - dek: Data Encryption Key. - - Returns: - EncryptedChunk with nonce, ciphertext, and tag. - """ ... - def decrypt_chunk(self, chunk: EncryptedChunk, dek: bytes) -> bytes: - """Decrypt a chunk of data. - - Args: - chunk: EncryptedChunk to decrypt. - dek: Data Encryption Key. - - Returns: - Original plaintext. - - Raises: - ValueError: If decryption fails (invalid or tampered). - """ ... - - class EncryptedAssetWriter(Protocol): - """Protocol for streaming encrypted audio writer. - - Writes audio chunks encrypted with a DEK to a file. - """ - def open(self, path: Path, dek: bytes) -> None: - """Open file for writing. - - Args: - path: Path to the encrypted file. - dek: Data Encryption Key for this file. - - Raises: - RuntimeError: If already open. - OSError: If file cannot be created. - """ ... - def write_chunk(self, audio_bytes: bytes) -> None: - """Write and encrypt an audio chunk. - - Args: - audio_bytes: Raw audio data to encrypt and write. - - Raises: - RuntimeError: If not open. - """ ... - def close(self) -> None: - """Finalize and close the file. - - Safe to call if already closed. - """ ... - @property def is_open(self) -> bool: - """Check if file is open for writing.""" ... - @property def bytes_written(self) -> int: - """Total encrypted bytes written.""" ... - - class EncryptedAssetReader(Protocol): - """Protocol for streaming encrypted audio reader. - - Reads and decrypts audio chunks from a file. - """ - def open(self, path: Path, dek: bytes) -> None: - """Open file for reading. - - Args: - path: Path to the encrypted file. - dek: Data Encryption Key for this file. - - Raises: - RuntimeError: If already open. - OSError: If file cannot be read. - ValueError: If file format is invalid. - """ ... - def read_chunks(self) -> Iterator[bytes]: - """Yield decrypted audio chunks. - - Yields: - Decrypted audio data chunks. - - Raises: - RuntimeError: If not open. - ValueError: If decryption fails. - """ ... - def close(self) -> None: - """Close the file. - - Safe to call if already closed. - """ ... - @property def is_open(self) -> bool: - """Check if file is open for reading.""" ... ```` ## File: src/noteflow/infrastructure/summarization/citation_verifier.py ````python -"""Citation verification implementation.""" - from __future__ import annotations - from typing import TYPE_CHECKING - from noteflow.domain.summarization import CitationVerificationResult - if TYPE_CHECKING: from collections.abc import Sequence - from noteflow.domain.entities import Segment, Summary - - class SegmentCitationVerifier: - """Verify that summary citations reference valid segments. - - Checks that all segment_ids in key points and action items - correspond to actual segments in the transcript. - """ - def verify_citations( self, summary: Summary, segments: Sequence[Segment], ) -> CitationVerificationResult: - """Verify all segment_ids exist in the transcript. - - Args: - summary: Summary with key points and action items to verify. - segments: Available transcript segments. - - Returns: - CitationVerificationResult with validation status and details. - """ - # Build set of valid segment IDs valid_segment_ids = {seg.segment_id for seg in segments} - - # Track invalid citations invalid_key_point_indices: list[int] = [] invalid_action_item_indices: list[int] = [] missing_segment_ids: set[int] = set() - - # Verify key points for idx, key_point in enumerate(summary.key_points): for seg_id in key_point.segment_ids: if seg_id not in valid_segment_ids: if idx not in invalid_key_point_indices: invalid_key_point_indices.append(idx) missing_segment_ids.add(seg_id) - - # Verify action items for idx, action_item in enumerate(summary.action_items): for seg_id in action_item.segment_ids: if seg_id not in valid_segment_ids: if idx not in invalid_action_item_indices: invalid_action_item_indices.append(idx) missing_segment_ids.add(seg_id) - is_valid = not invalid_key_point_indices and not invalid_action_item_indices - return CitationVerificationResult( is_valid=is_valid, invalid_key_point_indices=tuple(invalid_key_point_indices), invalid_action_item_indices=tuple(invalid_action_item_indices), missing_segment_ids=tuple(sorted(missing_segment_ids)), ) - def filter_invalid_citations( self, summary: Summary, segments: Sequence[Segment], ) -> Summary: - """Return a copy of the summary with invalid citations removed. - - Invalid segment_ids are removed from key points and action items. - Items with no remaining citations keep empty segment_ids lists. - - Args: - summary: Summary to filter. - segments: Available transcript segments. - - Returns: - New Summary with invalid citations removed. - """ valid_segment_ids = {seg.segment_id for seg in segments} - - # Filter key point citations from noteflow.domain.entities import ActionItem, KeyPoint from noteflow.domain.entities import Summary as SummaryEntity - filtered_key_points = [ KeyPoint( text=kp.text, @@ -7522,8 +3530,6 @@ class SegmentCitationVerifier: ) for kp in summary.key_points ] - - # Filter action item citations filtered_action_items = [ ActionItem( text=ai.text, @@ -7535,7 +3541,6 @@ class SegmentCitationVerifier: ) for ai in summary.action_items ] - return SummaryEntity( meeting_id=summary.meeting_id, executive_summary=summary.executive_summary, @@ -7549,12 +3554,8 @@ class SegmentCitationVerifier: ## File: src/noteflow/infrastructure/summarization/factory.py ````python -"""Factory for creating configured SummarizationService instances.""" - from __future__ import annotations - import logging - from noteflow.application.services.summarization_service import ( SummarizationMode, SummarizationService, @@ -7563,10 +3564,7 @@ from noteflow.application.services.summarization_service import ( from noteflow.infrastructure.summarization.citation_verifier import SegmentCitationVerifier from noteflow.infrastructure.summarization.mock_provider import MockSummarizer from noteflow.infrastructure.summarization.ollama_provider import OllamaSummarizer - logger = logging.getLogger(__name__) - - def create_summarization_service( default_mode: SummarizationMode = SummarizationMode.LOCAL, include_local: bool = True, @@ -7574,35 +3572,17 @@ def create_summarization_service( verify_citations: bool = True, filter_invalid_citations: bool = True, ) -> SummarizationService: - """Create a fully-configured SummarizationService. - - Auto-detects provider availability. Falls back to MOCK if LOCAL unavailable. - - Args: - default_mode: Preferred summarization mode. - include_local: Register OllamaSummarizer (checked at runtime). - include_mock: Register MockSummarizer (always available). - verify_citations: Enable citation verification. - filter_invalid_citations: Remove invalid citations from output. - - Returns: - Configured SummarizationService ready for use. - """ service = SummarizationService( settings=SummarizationServiceSettings( default_mode=default_mode, - fallback_to_local=True, # Enables LOCAL → MOCK fallback + fallback_to_local=True, verify_citations=verify_citations, filter_invalid_citations=filter_invalid_citations, ), ) - - # Always register MOCK as fallback if include_mock: service.register_provider(SummarizationMode.MOCK, MockSummarizer()) logger.debug("Registered MOCK summarization provider") - - # Register LOCAL (Ollama) - availability checked at runtime if include_local: ollama = OllamaSummarizer() service.register_provider(SummarizationMode.LOCAL, ollama) @@ -7612,122 +3592,364 @@ def create_summarization_service( logger.info( "Registered LOCAL (Ollama) summarization provider - unavailable, will fallback" ) - - # Set citation verifier if verify_citations: service.set_verifier(SegmentCitationVerifier()) logger.debug("Citation verification enabled") - return service ```` -## File: src/noteflow/infrastructure/triggers/__init__.py +## File: src/noteflow/infrastructure/triggers/app_audio.py ````python -"""Trigger infrastructure module. - -Provide signal providers for meeting detection triggers. -""" - -from noteflow.infrastructure.triggers.app_audio import AppAudioProvider, AppAudioSettings +from __future__ import annotations +import logging +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING +from noteflow.config.constants import DEFAULT_SAMPLE_RATE +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource +from noteflow.infrastructure.audio.levels import RmsLevelProvider from noteflow.infrastructure.triggers.audio_activity import ( AudioActivityProvider, AudioActivitySettings, ) -from noteflow.infrastructure.triggers.calendar import CalendarProvider, CalendarSettings -from noteflow.infrastructure.triggers.foreground_app import ( - ForegroundAppProvider, - ForegroundAppSettings, -) +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray +logger = logging.getLogger(__name__) +@dataclass +class AppAudioSettings: + enabled: bool + threshold_db: float + window_seconds: float + min_active_ratio: float + min_samples: int + max_history: int + weight: float + meeting_apps: set[str] = field(default_factory=set) + suppressed_apps: set[str] = field(default_factory=set) + sample_rate: int = DEFAULT_SAMPLE_RATE + sample_duration_seconds: float = 0.5 + chunk_duration_seconds: float = 0.1 + def __post_init__(self) -> None: + self.meeting_apps = {app.lower() for app in self.meeting_apps} + self.suppressed_apps = {app.lower() for app in self.suppressed_apps} +class _SystemOutputSampler: + def __init__(self, sample_rate: int, channels: int = 1) -> None: + self._sample_rate = sample_rate + self._channels = channels + self._stream = None + self._extra_settings = None + self._device = None + self._available: bool | None = None + def _select_device(self) -> None: + try: + import sounddevice as sd + except ImportError: + return self._extracted_from__select_device_5( + "sounddevice not available - app audio detection disabled" + ) + try: + default_output = sd.default.device[1] + except (TypeError, IndexError): + default_output = None + try: + hostapi_index = sd.default.hostapi + hostapi = sd.query_hostapis(hostapi_index) if hostapi_index is not None else None + except Exception: + hostapi = None + if hostapi and hostapi.get("type") == "Windows WASAPI" and default_output is not None: + pass + try: + devices = sd.query_devices() + except Exception: + return self._extracted_from__select_device_5( + "Failed to query audio devices for app audio detection" + ) + for idx, dev in enumerate(devices): + name = str(dev.get("name", "")).lower() + if int(dev.get("max_input_channels", 0)) <= 0: + continue + if "monitor" in name or "loopback" in name: + return self._extracted_from__select_device_24(idx) + self._available = False + logger.warning("No loopback audio device found - app audio detection disabled") + def _extracted_from__select_device_24(self, arg0): + self._device = arg0 + self._available = True + return + def _extracted_from__select_device_5(self, arg0): + self._available = False + logger.warning(arg0) + return + def _ensure_stream(self) -> bool: + if self._available is False: + return False + if self._available is None: + self._select_device() + if self._available is False: + return False + if self._stream is not None: + return True + try: + import sounddevice as sd + self._stream = sd.InputStream( + device=self._device, + channels=self._channels, + samplerate=self._sample_rate, + dtype="float32", + extra_settings=self._extra_settings, + ) + self._stream.start() + return True + except Exception as exc: + logger.warning("Failed to start system output capture: %s", exc) + self._stream = None + self._available = False + return False + def read_frames(self, duration_seconds: float) -> NDArray[np.float32] | None: + if not self._ensure_stream(): + return None + if self._stream is None: + return None + frames = max(1, int(self._sample_rate * duration_seconds)) + try: + data, _ = self._stream.read(frames) + except Exception as exc: + logger.debug("System output read failed: %s", exc) + return None + return data.reshape(-1).astype("float32") + def close(self) -> None: + if self._stream is None: + return + try: + self._stream.stop() + self._stream.close() + except Exception: + logger.debug("Failed to close system output stream", exc_info=True) + finally: + self._stream = None +class AppAudioProvider: + def __init__(self, settings: AppAudioSettings) -> None: + self._settings = settings + self._sampler = _SystemOutputSampler(sample_rate=settings.sample_rate) + self._level_provider = RmsLevelProvider() + self._audio_activity = AudioActivityProvider( + self._level_provider, + AudioActivitySettings( + enabled=settings.enabled, + threshold_db=settings.threshold_db, + window_seconds=settings.window_seconds, + min_active_ratio=settings.min_active_ratio, + min_samples=settings.min_samples, + max_history=settings.max_history, + weight=settings.weight, + ), + ) + @property + def source(self) -> TriggerSource: + return TriggerSource.AUDIO_ACTIVITY + @property + def max_weight(self) -> float: + return self._settings.weight + def is_enabled(self) -> bool: + return self._settings.enabled + def get_signal(self) -> TriggerSignal | None: + if not self.is_enabled(): + return None + if not self._settings.meeting_apps: + return None + app_title = self._detect_meeting_app() + if not app_title: + return None + frames = self._sampler.read_frames(self._settings.sample_duration_seconds) + if frames is None or frames.size == 0: + return None + self._update_activity_history(frames) + if self._audio_activity.get_signal() is None: + return None + return TriggerSignal( + source=self.source, + weight=self.max_weight, + app_name=app_title, + ) + def _update_activity_history(self, frames: NDArray[np.float32]) -> None: + chunk_size = max(1, int(self._settings.sample_rate * self._settings.chunk_duration_seconds)) + now = time.monotonic() + for offset in range(0, len(frames), chunk_size): + chunk = frames[offset : offset + chunk_size] + if chunk.size == 0: + continue + self._audio_activity.update(chunk, now) + def _detect_meeting_app(self) -> str | None: + try: + import pywinctl + except ImportError: + return None + titles: list[str] = [] + try: + if hasattr(pywinctl, "getAllWindows"): + windows = pywinctl.getAllWindows() + titles = [w.title for w in windows if getattr(w, "title", None)] + elif hasattr(pywinctl, "getAllTitles"): + titles = [t for t in pywinctl.getAllTitles() if t] + except Exception as exc: + logger.debug("Failed to list windows for app detection: %s", exc) + return None + for title in titles: + title_lower = title.lower() + if any(suppressed in title_lower for suppressed in self._settings.suppressed_apps): + continue + if any(app in title_lower for app in self._settings.meeting_apps): + return title + return None + def close(self) -> None: + self._sampler.close() +```` -__all__ = [ - "AppAudioProvider", - "AppAudioSettings", - "AudioActivityProvider", - "AudioActivitySettings", - "CalendarProvider", - "CalendarSettings", - "ForegroundAppProvider", - "ForegroundAppSettings", -] +## File: src/noteflow/infrastructure/triggers/calendar.py +````python +from __future__ import annotations +import json +import logging +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import TYPE_CHECKING +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource +if TYPE_CHECKING: + from collections.abc import Iterable +logger = logging.getLogger(__name__) +@dataclass(frozen=True) +class CalendarEvent: + start: datetime + end: datetime + title: str | None = None +@dataclass +class CalendarSettings: + enabled: bool + weight: float + lookahead_minutes: int + lookbehind_minutes: int + events: list[CalendarEvent] +class CalendarProvider: + def __init__(self, settings: CalendarSettings) -> None: + self._settings = settings + @property + def source(self) -> TriggerSource: + return TriggerSource.CALENDAR + @property + def max_weight(self) -> float: + return self._settings.weight + def is_enabled(self) -> bool: + return self._settings.enabled + def get_signal(self) -> TriggerSignal | None: + if not self.is_enabled(): + return None + if not self._settings.events: + return None + now = datetime.now(timezone.utc) + window_start = now - timedelta(minutes=self._settings.lookbehind_minutes) + window_end = now + timedelta(minutes=self._settings.lookahead_minutes) + return next( + ( + TriggerSignal( + source=self.source, + weight=self.max_weight, + app_name=event.title, + ) + for event in self._settings.events + if self._event_overlaps_window(event, window_start, window_end) + ), + None, + ) + @staticmethod + def _event_overlaps_window( + event: CalendarEvent, + window_start: datetime, + window_end: datetime, + ) -> bool: + event_start = _ensure_tz(event.start) + event_end = _ensure_tz(event.end) + return event_start <= window_end and event_end >= window_start +def parse_calendar_events(raw_events: object) -> list[CalendarEvent]: + if raw_events is None: + return [] + if isinstance(raw_events, str): + raw_events = _load_events_from_json(raw_events) + if isinstance(raw_events, dict): + raw_events = [raw_events] + if not isinstance(raw_events, Iterable): + return [] + events: list[CalendarEvent] = [] + for item in raw_events: + if isinstance(item, CalendarEvent): + events.append(item) + continue + if isinstance(item, dict): + start = _parse_datetime(item.get("start")) + end = _parse_datetime(item.get("end")) + if start and end: + events.append(CalendarEvent(start=start, end=end, title=item.get("title"))) + return events +def _load_events_from_json(raw: str) -> list[dict[str, object]]: + try: + parsed = json.loads(raw) + except json.JSONDecodeError: + logger.debug("Failed to parse calendar events JSON") + return [] + if isinstance(parsed, list): + return [item for item in parsed if isinstance(item, dict)] + return [parsed] if isinstance(parsed, dict) else [] +def _parse_datetime(value: object) -> datetime | None: + if isinstance(value, datetime): + return value + if not isinstance(value, str) or not value: + return None + cleaned = value.strip() + if cleaned.endswith("Z"): + cleaned = f"{cleaned[:-1]}+00:00" + try: + return datetime.fromisoformat(cleaned) + except ValueError: + return None +def _ensure_tz(value: datetime) -> datetime: + if value.tzinfo is None: + return value.replace(tzinfo=timezone.utc) + return value.astimezone(timezone.utc) ```` ## File: src/noteflow/infrastructure/triggers/foreground_app.py ````python -"""Foreground app detection using PyWinCtl. - -Detect meeting applications in the foreground window. -""" - from __future__ import annotations - import logging from dataclasses import dataclass, field - from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource - logger = logging.getLogger(__name__) - - @dataclass class ForegroundAppSettings: - """Configuration for foreground app detection. - - Attributes: - enabled: Whether foreground app detection is enabled. - weight: Confidence weight contributed by this provider. - meeting_apps: Set of app name substrings to match (lowercase). - suppressed_apps: Apps to ignore even if they match meeting_apps. - """ - enabled: bool weight: float meeting_apps: set[str] = field(default_factory=set) suppressed_apps: set[str] = field(default_factory=set) - def __post_init__(self) -> None: self.meeting_apps = {app.lower() for app in self.meeting_apps} self.suppressed_apps = {app.lower() for app in self.suppressed_apps} - - class ForegroundAppProvider: - """Detect meeting apps in foreground using PyWinCtl. - - PyWinCtl provides cross-platform active window detection for - Linux (X11/Wayland), macOS, and Windows. - """ - def __init__(self, settings: ForegroundAppSettings) -> None: - """Initialize foreground app provider. - - Args: - settings: Configuration settings for foreground app detection. - """ self._settings = settings self._available: bool | None = None - @property def source(self) -> TriggerSource: - """Get the source type for this provider.""" return TriggerSource.FOREGROUND_APP - @property def max_weight(self) -> float: - """Get the maximum weight this provider can contribute.""" return self._settings.weight - def is_enabled(self) -> bool: - """Check if this provider is enabled and available.""" return self._settings.enabled and self._is_available() - def _is_available(self) -> bool: - """Check if PyWinCtl is available and working.""" if self._available is not None: return self._available - try: import pywinctl - - # Try to get active window to verify it works _ = pywinctl.getActiveWindow() self._available = True logger.debug("PyWinCtl available for foreground detection") @@ -7737,37 +3959,22 @@ class ForegroundAppProvider: except Exception as e: self._available = False logger.warning("PyWinCtl unavailable: %s - foreground detection disabled", e) - return self._available - def get_signal(self) -> TriggerSignal | None: - """Get current signal if meeting app is in foreground. - - Returns: - TriggerSignal if a meeting app is detected, None otherwise. - """ if not self.is_enabled(): return None - try: import pywinctl - window = pywinctl.getActiveWindow() if not window: return None - title = window.title if not title: return None - title_lower = title.lower() - - # Check if app is suppressed for suppressed in self._settings.suppressed_apps: if suppressed in title_lower: return None - - # Check if it's a meeting app for app in self._settings.meeting_apps: if app in title_lower: return TriggerSignal( @@ -7775,3753 +3982,40 @@ class ForegroundAppProvider: weight=self.max_weight, app_name=title, ) - except Exception as e: logger.debug("Foreground detection error: %s", e) - return None - def suppress_app(self, app_name: str) -> None: - """Add an app to the suppression list. - - Args: - app_name: App name substring to suppress (will be lowercased). - """ self._settings.suppressed_apps.add(app_name.lower()) logger.info("Suppressed app: %s", app_name) - def unsuppress_app(self, app_name: str) -> None: - """Remove an app from the suppression list. - - Args: - app_name: App name substring to unsuppress. - """ self._settings.suppressed_apps.discard(app_name.lower()) - def add_meeting_app(self, app_name: str) -> None: - """Add an app to the meeting apps list. - - Args: - app_name: App name substring to add (will be lowercased). - """ self._settings.meeting_apps.add(app_name.lower()) - @property def suppressed_apps(self) -> frozenset[str]: - """Get current suppressed apps.""" return frozenset(self._settings.suppressed_apps) ```` ## File: src/noteflow/__init__.py ````python -"""NoteFlow - Intelligent Meeting Notetaker.""" - __version__ = "0.1.0" ```` ## File: src/noteflow_pb2.py ````python -# Compatibility shim for generated gRPC stubs. -# The generated `noteflow_pb2_grpc.py` imports a top-level `noteflow_pb2` module. -# Re-export the packaged definitions to satisfy that import while keeping the -# compiled protobufs under `noteflow.grpc.proto`. -from noteflow.grpc.proto.noteflow_pb2 import * # noqa: F401,F403 -```` - -## File: tests/application/__init__.py -````python -"""Application layer unit tests.""" -```` - -## File: tests/application/test_recovery_service.py -````python -"""Tests for RecoveryService application service.""" - -from __future__ import annotations - -from unittest.mock import AsyncMock, MagicMock - -import pytest - -from noteflow.application.services.recovery_service import RecoveryService -from noteflow.domain.entities import Meeting -from noteflow.domain.value_objects import MeetingState - - -@pytest.fixture -def mock_uow() -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - return uow - - -class TestRecoveryServiceRecovery: - """Tests for crash recovery operations.""" - - async def test_recover_no_crashed_meetings(self, mock_uow: MagicMock) -> None: - """Test recovery with no crashed meetings.""" - mock_uow.meetings.list_all = AsyncMock(return_value=([], 0)) - - service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() - - assert result == [] - mock_uow.commit.assert_not_called() - - async def test_recover_single_recording_meeting(self, mock_uow: MagicMock) -> None: - """Test recovery of a meeting left in RECORDING state.""" - meeting = Meeting.create(title="Crashed Recording") - meeting.start_recording() # Put in RECORDING state - assert meeting.state == MeetingState.RECORDING - - mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1)) - mock_uow.meetings.update = AsyncMock(return_value=meeting) - - service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() - - assert len(result) == 1 - assert result[0].state == MeetingState.ERROR - assert result[0].metadata["crash_recovered"] == "true" - assert result[0].metadata["crash_previous_state"] == "RECORDING" - assert "crash_recovery_time" in result[0].metadata - mock_uow.meetings.update.assert_called_once() - mock_uow.commit.assert_called_once() - - async def test_recover_single_stopping_meeting(self, mock_uow: MagicMock) -> None: - """Test recovery of a meeting left in STOPPING state.""" - meeting = Meeting.create(title="Crashed Stopping") - meeting.start_recording() - meeting.begin_stopping() # Put in STOPPING state - assert meeting.state == MeetingState.STOPPING - - mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1)) - mock_uow.meetings.update = AsyncMock(return_value=meeting) - - service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() - - assert len(result) == 1 - assert result[0].state == MeetingState.ERROR - assert result[0].metadata["crash_previous_state"] == "STOPPING" - mock_uow.commit.assert_called_once() - - async def test_recover_multiple_crashed_meetings(self, mock_uow: MagicMock) -> None: - """Test recovery of multiple crashed meetings.""" - meeting1 = Meeting.create(title="Crashed 1") - meeting1.start_recording() - - meeting2 = Meeting.create(title="Crashed 2") - meeting2.start_recording() - meeting2.begin_stopping() - - meeting3 = Meeting.create(title="Crashed 3") - meeting3.start_recording() - - meetings = [meeting1, meeting2, meeting3] - mock_uow.meetings.list_all = AsyncMock(return_value=(meetings, 3)) - mock_uow.meetings.update = AsyncMock(side_effect=meetings) - - service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() - - assert len(result) == 3 - assert all(m.state == MeetingState.ERROR for m in result) - assert result[0].metadata["crash_previous_state"] == "RECORDING" - assert result[1].metadata["crash_previous_state"] == "STOPPING" - assert result[2].metadata["crash_previous_state"] == "RECORDING" - assert mock_uow.meetings.update.call_count == 3 - mock_uow.commit.assert_called_once() - - -class TestRecoveryServiceCounting: - """Tests for counting crashed meetings.""" - - async def test_count_no_crashed_meetings(self, mock_uow: MagicMock) -> None: - """Test counting with no crashed meetings.""" - mock_uow.meetings.count_by_state = AsyncMock(return_value=0) - - service = RecoveryService(mock_uow) - result = await service.count_crashed_meetings() - - assert result == 0 - assert mock_uow.meetings.count_by_state.call_count == 2 - - async def test_count_crashed_meetings_both_states(self, mock_uow: MagicMock) -> None: - """Test counting meetings in both active states.""" - - async def count_by_state(state: MeetingState) -> int: - state_counts = { - MeetingState.RECORDING: 3, - MeetingState.STOPPING: 2, - } - return state_counts.get(state, 0) - - mock_uow.meetings.count_by_state = AsyncMock(side_effect=count_by_state) - - service = RecoveryService(mock_uow) - result = await service.count_crashed_meetings() - - assert result == 5 # 3 RECORDING + 2 STOPPING - - -class TestRecoveryServiceMetadata: - """Tests for recovery metadata handling.""" - - async def test_recovery_preserves_existing_metadata(self, mock_uow: MagicMock) -> None: - """Test recovery preserves existing meeting metadata.""" - meeting = Meeting.create( - title="Has Metadata", - metadata={"project": "NoteFlow", "important": "yes"}, - ) - meeting.start_recording() - - mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1)) - mock_uow.meetings.update = AsyncMock(return_value=meeting) - - service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() - - assert len(result) == 1 - # Verify original metadata preserved - assert result[0].metadata["project"] == "NoteFlow" - assert result[0].metadata["important"] == "yes" - # Verify recovery metadata added - assert result[0].metadata["crash_recovered"] == "true" - assert result[0].metadata["crash_previous_state"] == "RECORDING" -```` - -## File: tests/domain/__init__.py -````python -"""Domain unit tests.""" -```` - -## File: tests/domain/test_annotation.py -````python -"""Tests for Annotation entity.""" - -from __future__ import annotations - -from uuid import uuid4 - -import pytest - -from noteflow.domain.entities.annotation import Annotation -from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId - - -class TestAnnotation: - """Tests for Annotation entity.""" - - def test_annotation_valid(self) -> None: - """Annotation can be created with valid fields.""" - annotation = Annotation( - id=AnnotationId(uuid4()), - meeting_id=MeetingId(uuid4()), - annotation_type=AnnotationType.NOTE, - text="Important point", - start_time=1.0, - end_time=2.0, - ) - - assert annotation.text == "Important point" - assert annotation.duration == 1.0 - assert annotation.has_segments() is False - - def test_annotation_invalid_times_raises(self) -> None: - """Annotation raises when end_time < start_time.""" - with pytest.raises(ValueError, match=r"end_time .* must be >= start_time"): - Annotation( - id=AnnotationId(uuid4()), - meeting_id=MeetingId(uuid4()), - annotation_type=AnnotationType.DECISION, - text="Bad timing", - start_time=5.0, - end_time=2.0, - ) - - def test_annotation_has_segments(self) -> None: - """has_segments reflects segment_ids list.""" - annotation = Annotation( - id=AnnotationId(uuid4()), - meeting_id=MeetingId(uuid4()), - annotation_type=AnnotationType.ACTION_ITEM, - text="Follow up", - start_time=0.0, - end_time=1.0, - segment_ids=[1, 2], - ) - - assert annotation.has_segments() is True - assert annotation.duration == 1.0 -```` - -## File: tests/domain/test_segment.py -````python -"""Tests for Segment and WordTiming entities.""" - -from __future__ import annotations - -import pytest - -from noteflow.domain.entities.segment import Segment, WordTiming - - -class TestWordTiming: - """Tests for WordTiming entity.""" - - def test_word_timing_valid(self) -> None: - """Test creating valid WordTiming.""" - word = WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=0.95) - assert word.word == "hello" - assert word.start_time == 0.0 - assert word.end_time == 0.5 - assert word.probability == 0.95 - - def test_word_timing_invalid_times_raises(self) -> None: - """Test WordTiming raises on end_time < start_time.""" - with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"): - WordTiming(word="hello", start_time=1.0, end_time=0.5, probability=0.9) - - @pytest.mark.parametrize("prob", [-0.1, 1.1, 2.0]) - def test_word_timing_invalid_probability_raises(self, prob: float) -> None: - """Test WordTiming raises on invalid probability.""" - with pytest.raises(ValueError, match="probability must be between 0 and 1"): - WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=prob) - - @pytest.mark.parametrize("prob", [0.0, 0.5, 1.0]) - def test_word_timing_valid_probability_bounds(self, prob: float) -> None: - """Test WordTiming accepts probability at boundaries.""" - word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=prob) - assert word.probability == prob - - -class TestSegment: - """Tests for Segment entity.""" - - def test_segment_valid(self) -> None: - """Test creating valid Segment.""" - segment = Segment( - segment_id=0, - text="Hello world", - start_time=0.0, - end_time=2.5, - language="en", - ) - assert segment.segment_id == 0 - assert segment.text == "Hello world" - assert segment.start_time == 0.0 - assert segment.end_time == 2.5 - assert segment.language == "en" - - def test_segment_invalid_times_raises(self) -> None: - """Test Segment raises on end_time < start_time.""" - with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"): - Segment(segment_id=0, text="test", start_time=5.0, end_time=1.0) - - def test_segment_invalid_id_raises(self) -> None: - """Test Segment raises on negative segment_id.""" - with pytest.raises(ValueError, match="segment_id must be non-negative"): - Segment(segment_id=-1, text="test", start_time=0.0, end_time=1.0) - - def test_segment_duration(self) -> None: - """Test duration property calculation.""" - segment = Segment(segment_id=0, text="test", start_time=1.5, end_time=4.0) - assert segment.duration == 2.5 - - def test_segment_word_count_from_text(self) -> None: - """Test word_count from text when no words list.""" - segment = Segment(segment_id=0, text="Hello beautiful world", start_time=0.0, end_time=1.0) - assert segment.word_count == 3 - - def test_segment_word_count_from_words(self) -> None: - """Test word_count from words list when provided.""" - words = [ - WordTiming(word="Hello", start_time=0.0, end_time=0.3, probability=0.9), - WordTiming(word="world", start_time=0.3, end_time=0.5, probability=0.95), - ] - segment = Segment( - segment_id=0, - text="Hello world", - start_time=0.0, - end_time=0.5, - words=words, - ) - assert segment.word_count == 2 - - def test_segment_has_embedding_false(self) -> None: - """Test has_embedding returns False when no embedding.""" - segment = Segment(segment_id=0, text="test", start_time=0.0, end_time=1.0) - assert segment.has_embedding() is False - - def test_segment_has_embedding_empty_list(self) -> None: - """Test has_embedding returns False for empty embedding list.""" - segment = Segment(segment_id=0, text="test", start_time=0.0, end_time=1.0, embedding=[]) - assert segment.has_embedding() is False - - def test_segment_has_embedding_true(self) -> None: - """Test has_embedding returns True when embedding exists.""" - segment = Segment( - segment_id=0, - text="test", - start_time=0.0, - end_time=1.0, - embedding=[0.1, 0.2, 0.3], - ) - assert segment.has_embedding() is True -```` - -## File: tests/domain/test_summary.py -````python -"""Tests for Summary, KeyPoint, and ActionItem entities.""" - -from __future__ import annotations - -from datetime import datetime -from uuid import uuid4 - -import pytest - -from noteflow.domain.entities.summary import ActionItem, KeyPoint, Summary -from noteflow.domain.value_objects import MeetingId - - -class TestKeyPoint: - """Tests for KeyPoint entity.""" - - def test_key_point_basic(self) -> None: - """Test creating basic KeyPoint.""" - kp = KeyPoint(text="Important discussion about architecture") - assert kp.text == "Important discussion about architecture" - assert kp.segment_ids == [] - assert kp.start_time == 0.0 - assert kp.end_time == 0.0 - - def test_key_point_has_evidence_false(self) -> None: - """Test has_evidence returns False when no segment_ids.""" - kp = KeyPoint(text="No evidence") - assert kp.has_evidence() is False - - def test_key_point_has_evidence_true(self) -> None: - """Test has_evidence returns True with segment_ids.""" - kp = KeyPoint(text="With evidence", segment_ids=[1, 2, 3]) - assert kp.has_evidence() is True - - def test_key_point_with_timing(self) -> None: - """Test KeyPoint with timing information.""" - kp = KeyPoint( - text="Timed point", - segment_ids=[0, 1], - start_time=10.5, - end_time=25.0, - ) - assert kp.start_time == 10.5 - assert kp.end_time == 25.0 - - -class TestActionItem: - """Tests for ActionItem entity.""" - - def test_action_item_basic(self) -> None: - """Test creating basic ActionItem.""" - ai = ActionItem(text="Review PR #123") - assert ai.text == "Review PR #123" - assert ai.assignee == "" - assert ai.due_date is None - assert ai.priority == 0 - assert ai.segment_ids == [] - - def test_action_item_has_evidence_false(self) -> None: - """Test has_evidence returns False when no segment_ids.""" - ai = ActionItem(text="Task without evidence") - assert ai.has_evidence() is False - - def test_action_item_has_evidence_true(self) -> None: - """Test has_evidence returns True with segment_ids.""" - ai = ActionItem(text="Task with evidence", segment_ids=[5]) - assert ai.has_evidence() is True - - def test_action_item_is_assigned_false(self) -> None: - """Test is_assigned returns False when no assignee.""" - ai = ActionItem(text="Unassigned task") - assert ai.is_assigned() is False - - def test_action_item_is_assigned_true(self) -> None: - """Test is_assigned returns True with assignee.""" - ai = ActionItem(text="Assigned task", assignee="Alice") - assert ai.is_assigned() is True - - def test_action_item_has_due_date_false(self) -> None: - """Test has_due_date returns False when no due_date.""" - ai = ActionItem(text="No deadline") - assert ai.has_due_date() is False - - def test_action_item_has_due_date_true(self) -> None: - """Test has_due_date returns True with due_date.""" - ai = ActionItem(text="With deadline", due_date=datetime(2024, 12, 31)) - assert ai.has_due_date() is True - - -class TestSummary: - """Tests for Summary entity.""" - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Provide a meeting ID for tests.""" - return MeetingId(uuid4()) - - def test_summary_basic(self, meeting_id: MeetingId) -> None: - """Test creating basic Summary.""" - summary = Summary(meeting_id=meeting_id) - assert summary.meeting_id == meeting_id - assert summary.executive_summary == "" - assert summary.key_points == [] - assert summary.action_items == [] - assert summary.generated_at is None - assert summary.model_version == "" - - def test_summary_key_point_count(self, meeting_id: MeetingId) -> None: - """Test key_point_count property.""" - summary = Summary( - meeting_id=meeting_id, - key_points=[ - KeyPoint(text="Point 1"), - KeyPoint(text="Point 2"), - KeyPoint(text="Point 3"), - ], - ) - assert summary.key_point_count == 3 - - def test_summary_action_item_count(self, meeting_id: MeetingId) -> None: - """Test action_item_count property.""" - summary = Summary( - meeting_id=meeting_id, - action_items=[ - ActionItem(text="Task 1"), - ActionItem(text="Task 2"), - ], - ) - assert summary.action_item_count == 2 - - def test_all_points_have_evidence_true(self, meeting_id: MeetingId) -> None: - """Test all_points_have_evidence returns True when all evidenced.""" - summary = Summary( - meeting_id=meeting_id, - key_points=[ - KeyPoint(text="Point 1", segment_ids=[0]), - KeyPoint(text="Point 2", segment_ids=[1, 2]), - ], - ) - assert summary.all_points_have_evidence() is True - - def test_all_points_have_evidence_false(self, meeting_id: MeetingId) -> None: - """Test all_points_have_evidence returns False when some unevidenced.""" - summary = Summary( - meeting_id=meeting_id, - key_points=[ - KeyPoint(text="Point 1", segment_ids=[0]), - KeyPoint(text="Point 2"), # No evidence - ], - ) - assert summary.all_points_have_evidence() is False - - def test_all_actions_have_evidence_true(self, meeting_id: MeetingId) -> None: - """Test all_actions_have_evidence returns True when all evidenced.""" - summary = Summary( - meeting_id=meeting_id, - action_items=[ - ActionItem(text="Task 1", segment_ids=[0]), - ], - ) - assert summary.all_actions_have_evidence() is True - - def test_all_actions_have_evidence_false(self, meeting_id: MeetingId) -> None: - """Test all_actions_have_evidence returns False when some unevidenced.""" - summary = Summary( - meeting_id=meeting_id, - action_items=[ - ActionItem(text="Task 1"), # No evidence - ], - ) - assert summary.all_actions_have_evidence() is False - - def test_is_fully_evidenced_true(self, meeting_id: MeetingId) -> None: - """Test is_fully_evidenced returns True when all items evidenced.""" - summary = Summary( - meeting_id=meeting_id, - key_points=[KeyPoint(text="KP", segment_ids=[0])], - action_items=[ActionItem(text="AI", segment_ids=[1])], - ) - assert summary.is_fully_evidenced() is True - - def test_is_fully_evidenced_false_points(self, meeting_id: MeetingId) -> None: - """Test is_fully_evidenced returns False with unevidenced points.""" - summary = Summary( - meeting_id=meeting_id, - key_points=[KeyPoint(text="KP")], # No evidence - action_items=[ActionItem(text="AI", segment_ids=[1])], - ) - assert summary.is_fully_evidenced() is False - - def test_unevidenced_points(self, meeting_id: MeetingId) -> None: - """Test unevidenced_points property filters correctly.""" - kp_no_evidence = KeyPoint(text="No evidence") - kp_with_evidence = KeyPoint(text="With evidence", segment_ids=[0]) - summary = Summary( - meeting_id=meeting_id, - key_points=[kp_no_evidence, kp_with_evidence], - ) - unevidenced = summary.unevidenced_points - assert len(unevidenced) == 1 - assert unevidenced[0] == kp_no_evidence - - def test_unevidenced_actions(self, meeting_id: MeetingId) -> None: - """Test unevidenced_actions property filters correctly.""" - ai_no_evidence = ActionItem(text="No evidence") - ai_with_evidence = ActionItem(text="With evidence", segment_ids=[0]) - summary = Summary( - meeting_id=meeting_id, - action_items=[ai_no_evidence, ai_with_evidence], - ) - unevidenced = summary.unevidenced_actions - assert len(unevidenced) == 1 - assert unevidenced[0] == ai_no_evidence -```` - -## File: tests/domain/test_value_objects.py -````python -"""Tests for domain value objects.""" - -from __future__ import annotations - -from uuid import UUID - -import pytest - -from noteflow.domain.value_objects import MeetingId, MeetingState - - -class TestMeetingState: - """Tests for MeetingState enum.""" - - @pytest.mark.parametrize( - ("current", "target", "expected"), - [ - # UNSPECIFIED transitions - (MeetingState.UNSPECIFIED, MeetingState.CREATED, True), - (MeetingState.UNSPECIFIED, MeetingState.RECORDING, False), - # CREATED transitions - (MeetingState.CREATED, MeetingState.RECORDING, True), - (MeetingState.CREATED, MeetingState.ERROR, True), - (MeetingState.CREATED, MeetingState.STOPPED, False), - # RECORDING transitions (must go through STOPPING) - (MeetingState.RECORDING, MeetingState.STOPPING, True), - (MeetingState.RECORDING, MeetingState.STOPPED, False), - (MeetingState.RECORDING, MeetingState.ERROR, True), - (MeetingState.RECORDING, MeetingState.CREATED, False), - # STOPPING transitions - (MeetingState.STOPPING, MeetingState.STOPPED, True), - (MeetingState.STOPPING, MeetingState.ERROR, True), - (MeetingState.STOPPING, MeetingState.RECORDING, False), - (MeetingState.STOPPING, MeetingState.CREATED, False), - # STOPPED transitions - (MeetingState.STOPPED, MeetingState.COMPLETED, True), - (MeetingState.STOPPED, MeetingState.ERROR, True), - (MeetingState.STOPPED, MeetingState.RECORDING, False), - # COMPLETED transitions - (MeetingState.COMPLETED, MeetingState.ERROR, True), - (MeetingState.COMPLETED, MeetingState.RECORDING, False), - # ERROR is terminal - (MeetingState.ERROR, MeetingState.CREATED, False), - (MeetingState.ERROR, MeetingState.RECORDING, False), - ], - ) - def test_can_transition_to( - self, - current: MeetingState, - target: MeetingState, - expected: bool, - ) -> None: - """Test state transition validation.""" - assert current.can_transition_to(target) == expected - - @pytest.mark.parametrize( - ("value", "expected"), - [ - (0, MeetingState.UNSPECIFIED), - (1, MeetingState.CREATED), - (2, MeetingState.RECORDING), - (3, MeetingState.STOPPED), - (4, MeetingState.COMPLETED), - (5, MeetingState.ERROR), - (6, MeetingState.STOPPING), - ], - ) - def test_from_int_valid(self, value: int, expected: MeetingState) -> None: - """Test conversion from valid integers.""" - assert MeetingState.from_int(value) == expected - - def test_from_int_invalid_raises(self) -> None: - """Test conversion from invalid integer raises ValueError.""" - with pytest.raises(ValueError, match="Invalid meeting state"): - MeetingState.from_int(99) - - -class TestMeetingId: - """Tests for MeetingId NewType.""" - - def test_meeting_id_is_uuid(self) -> None: - """Test MeetingId wraps UUID.""" - uuid = UUID("12345678-1234-5678-1234-567812345678") - meeting_id = MeetingId(uuid) - assert meeting_id == uuid - - def test_meeting_id_string_conversion(self) -> None: - """Test MeetingId can be converted to string.""" - uuid = UUID("12345678-1234-5678-1234-567812345678") - meeting_id = MeetingId(uuid) - assert str(meeting_id) == "12345678-1234-5678-1234-567812345678" -```` - -## File: tests/grpc/__init__.py -````python -"""gRPC service tests.""" -```` - -## File: tests/grpc/test_partial_transcription.py -````python -"""Tests for partial transcription in the gRPC service.""" - -from __future__ import annotations - -import time -from dataclasses import dataclass -from unittest.mock import MagicMock - -import numpy as np -import pytest -from numpy.typing import NDArray - -from noteflow.grpc.service import NoteFlowServicer - - -@dataclass -class MockAsrResult: - """Mock ASR transcription result.""" - - text: str - start: float = 0.0 - end: float = 1.0 - language: str = "en" - language_probability: float = 0.99 - avg_logprob: float = -0.5 - no_speech_prob: float = 0.01 - - -def _create_mock_asr_engine(transcribe_results: list[str] | None = None) -> MagicMock: - """Create mock ASR engine with configurable transcription results.""" - engine = MagicMock() - engine.is_loaded = True - engine.model_size = "base" - - results = transcribe_results or ["Test transcription"] - - def _transcribe(_audio: NDArray[np.float32]) -> list[MockAsrResult]: - return [MockAsrResult(text=text) for text in results] - - async def _transcribe_async( - _audio: NDArray[np.float32], - _language: str | None = None, - ) -> list[MockAsrResult]: - return [MockAsrResult(text=text) for text in results] - - engine.transcribe = _transcribe - engine.transcribe_async = _transcribe_async - return engine - - -class TestPartialTranscriptionState: - """Tests for partial transcription state initialization.""" - - def test_init_streaming_state_creates_partial_buffer(self) -> None: - """Initialize streaming state should create empty partial buffer.""" - servicer = NoteFlowServicer() - - servicer._init_streaming_state("meeting-123", next_segment_id=0) - - assert "meeting-123" in servicer._partial_buffers - assert servicer._partial_buffers["meeting-123"] == [] - - def test_init_streaming_state_creates_last_partial_time(self) -> None: - """Initialize streaming state should set last partial time to now.""" - servicer = NoteFlowServicer() - before = time.time() - - servicer._init_streaming_state("meeting-123", next_segment_id=0) - - assert "meeting-123" in servicer._last_partial_time - assert servicer._last_partial_time["meeting-123"] >= before - - def test_init_streaming_state_creates_empty_last_text(self) -> None: - """Initialize streaming state should set last partial text to empty.""" - servicer = NoteFlowServicer() - - servicer._init_streaming_state("meeting-123", next_segment_id=0) - - assert "meeting-123" in servicer._last_partial_text - assert servicer._last_partial_text["meeting-123"] == "" - - def test_cleanup_streaming_state_removes_partial_state(self) -> None: - """Cleanup streaming state should remove all partial-related state.""" - servicer = NoteFlowServicer() - servicer._init_streaming_state("meeting-123", next_segment_id=0) - - servicer._cleanup_streaming_state("meeting-123") - - assert "meeting-123" not in servicer._partial_buffers - assert "meeting-123" not in servicer._last_partial_time - assert "meeting-123" not in servicer._last_partial_text - - -class TestClearPartialBuffer: - """Tests for _clear_partial_buffer method.""" - - def test_clear_partial_buffer_empties_buffer(self) -> None: - """Clear partial buffer should empty the audio buffer.""" - servicer = NoteFlowServicer() - servicer._partial_buffers["meeting-123"] = [np.zeros(1600, dtype=np.float32)] - - servicer._clear_partial_buffer("meeting-123") - - assert servicer._partial_buffers["meeting-123"] == [] - - def test_clear_partial_buffer_resets_last_text(self) -> None: - """Clear partial buffer should reset last partial text.""" - servicer = NoteFlowServicer() - servicer._last_partial_text["meeting-123"] = "Previous partial" - - servicer._clear_partial_buffer("meeting-123") - - assert servicer._last_partial_text["meeting-123"] == "" - - def test_clear_partial_buffer_updates_time(self) -> None: - """Clear partial buffer should update last partial time.""" - servicer = NoteFlowServicer() - servicer._last_partial_time["meeting-123"] = 0.0 - before = time.time() - - servicer._clear_partial_buffer("meeting-123") - - assert servicer._last_partial_time["meeting-123"] >= before - - def test_clear_partial_buffer_handles_missing_meeting(self) -> None: - """Clear partial buffer should handle missing meeting gracefully.""" - servicer = NoteFlowServicer() - - servicer._clear_partial_buffer("nonexistent") # Should not raise - - -class TestMaybeEmitPartial: - """Tests for _maybe_emit_partial method.""" - - @pytest.mark.asyncio - async def test_returns_none_when_asr_not_loaded(self) -> None: - """Return None when ASR engine is not loaded.""" - servicer = NoteFlowServicer() - servicer._init_streaming_state("meeting-123", next_segment_id=0) - - result = await servicer._maybe_emit_partial("meeting-123") - - assert result is None - - @pytest.mark.asyncio - async def test_returns_none_when_cadence_not_reached(self) -> None: - """Return None when not enough time has passed since last partial.""" - engine = _create_mock_asr_engine(["Test"]) - servicer = NoteFlowServicer(asr_engine=engine) - servicer._init_streaming_state("meeting-123", next_segment_id=0) - # Set last time to now (cadence not reached) - servicer._last_partial_time["meeting-123"] = time.time() - # Add some audio - audio = np.ones(16000, dtype=np.float32) * 0.1 # 1 second of audio - servicer._partial_buffers["meeting-123"].append(audio) - - result = await servicer._maybe_emit_partial("meeting-123") - - assert result is None - - @pytest.mark.asyncio - async def test_returns_none_when_buffer_empty(self) -> None: - """Return None when partial buffer is empty.""" - engine = _create_mock_asr_engine(["Test"]) - servicer = NoteFlowServicer(asr_engine=engine) - servicer._init_streaming_state("meeting-123", next_segment_id=0) - # Set last time to past (cadence reached) - servicer._last_partial_time["meeting-123"] = time.time() - 10.0 - - result = await servicer._maybe_emit_partial("meeting-123") - - assert result is None - - @pytest.mark.asyncio - async def test_returns_none_when_audio_too_short(self) -> None: - """Return None when buffered audio is less than minimum.""" - engine = _create_mock_asr_engine(["Test"]) - servicer = NoteFlowServicer(asr_engine=engine) - servicer._init_streaming_state("meeting-123", next_segment_id=0) - servicer._last_partial_time["meeting-123"] = time.time() - 10.0 - # Add only 0.1 seconds of audio (minimum is 0.5s) - audio = np.ones(1600, dtype=np.float32) * 0.1 # 0.1 second - servicer._partial_buffers["meeting-123"].append(audio) - - result = await servicer._maybe_emit_partial("meeting-123") - - assert result is None - - @pytest.mark.asyncio - async def test_emits_partial_when_conditions_met(self) -> None: - """Emit partial when cadence reached and sufficient audio buffered.""" - engine = _create_mock_asr_engine(["Hello world"]) - servicer = NoteFlowServicer(asr_engine=engine) - servicer._init_streaming_state("meeting-123", next_segment_id=0) - servicer._last_partial_time["meeting-123"] = time.time() - 10.0 - # Add 1 second of audio (above minimum of 0.5s) - audio = np.ones(16000, dtype=np.float32) * 0.1 - servicer._partial_buffers["meeting-123"].append(audio) - - result = await servicer._maybe_emit_partial("meeting-123") - - assert result is not None - assert result.update_type == 1 # UPDATE_TYPE_PARTIAL - assert result.partial_text == "Hello world" - assert result.meeting_id == "meeting-123" - - @pytest.mark.asyncio - async def test_debounces_duplicate_text(self) -> None: - """Return None when text is same as last partial (debounce).""" - engine = _create_mock_asr_engine(["Same text"]) - servicer = NoteFlowServicer(asr_engine=engine) - servicer._init_streaming_state("meeting-123", next_segment_id=0) - servicer._last_partial_time["meeting-123"] = time.time() - 10.0 - servicer._last_partial_text["meeting-123"] = "Same text" # Same as transcription - audio = np.ones(16000, dtype=np.float32) * 0.1 - servicer._partial_buffers["meeting-123"].append(audio) - - result = await servicer._maybe_emit_partial("meeting-123") - - assert result is None - - @pytest.mark.asyncio - async def test_updates_last_partial_state(self) -> None: - """Emitting partial should update last text and time.""" - engine = _create_mock_asr_engine(["New text"]) - servicer = NoteFlowServicer(asr_engine=engine) - servicer._init_streaming_state("meeting-123", next_segment_id=0) - servicer._last_partial_time["meeting-123"] = time.time() - 10.0 - audio = np.ones(16000, dtype=np.float32) * 0.1 - servicer._partial_buffers["meeting-123"].append(audio) - before = time.time() - - await servicer._maybe_emit_partial("meeting-123") - - assert servicer._last_partial_text["meeting-123"] == "New text" - assert servicer._last_partial_time["meeting-123"] >= before - - -class TestPartialCadence: - """Tests for partial transcription cadence constants.""" - - def test_partial_cadence_is_2_seconds(self) -> None: - """Partial cadence should be 2 seconds per spec.""" - assert NoteFlowServicer.PARTIAL_CADENCE_SECONDS == 2.0 - - def test_min_partial_audio_is_half_second(self) -> None: - """Minimum partial audio should be 0.5 seconds.""" - assert NoteFlowServicer.MIN_PARTIAL_AUDIO_SECONDS == 0.5 - - -class TestPartialBufferAccumulation: - """Tests for audio buffer accumulation during speech.""" - - @pytest.mark.asyncio - async def test_speech_audio_added_to_buffer(self) -> None: - """Speech audio should be accumulated in partial buffer.""" - engine = _create_mock_asr_engine() - servicer = NoteFlowServicer(asr_engine=engine) - servicer._init_streaming_state("meeting-123", next_segment_id=0) - - # Simulate speech detection by processing audio - audio = np.ones(1600, dtype=np.float32) * 0.1 - - # Mock VAD to return True (is_speech) - servicer._vad_instances["meeting-123"].process_chunk = MagicMock(return_value=True) - - updates = [] - async for update in servicer._process_audio_with_vad("meeting-123", audio): - updates.append(update) - - # Buffer should have audio added - assert len(servicer._partial_buffers["meeting-123"]) >= 1 - - @pytest.mark.asyncio - async def test_silence_does_not_add_to_buffer(self) -> None: - """Silent audio should not be added to partial buffer.""" - engine = _create_mock_asr_engine() - servicer = NoteFlowServicer(asr_engine=engine) - servicer._init_streaming_state("meeting-123", next_segment_id=0) - - audio = np.zeros(1600, dtype=np.float32) # Silence - - # Mock VAD to return False (is_silence) - servicer._vad_instances["meeting-123"].process_chunk = MagicMock(return_value=False) - - updates = [] - async for update in servicer._process_audio_with_vad("meeting-123", audio): - updates.append(update) - - # Buffer should still be empty - assert servicer._partial_buffers["meeting-123"] == [] - - -class TestPartialIntegrationWithFinal: - """Tests for partial buffer clearing when final segment emitted.""" - - @pytest.mark.asyncio - async def test_buffer_cleared_on_final_segment(self) -> None: - """Partial buffer should be cleared when a final segment is produced.""" - servicer = NoteFlowServicer() - servicer._init_streaming_state("meeting-123", next_segment_id=0) - - # Add some audio to buffer - audio = np.ones(16000, dtype=np.float32) * 0.1 - servicer._partial_buffers["meeting-123"].append(audio) - servicer._last_partial_text["meeting-123"] = "Some partial" - - # Clear buffer (simulates final segment emission) - servicer._clear_partial_buffer("meeting-123") - - assert servicer._partial_buffers["meeting-123"] == [] - assert servicer._last_partial_text["meeting-123"] == "" -```` - -## File: tests/infrastructure/asr/__init__.py -````python -"""ASR infrastructure tests.""" -```` - -## File: tests/infrastructure/audio/__init__.py -````python -"""Audio infrastructure tests package.""" -```` - -## File: tests/infrastructure/audio/conftest.py -````python -"""Test fixtures for audio infrastructure tests.""" - -from __future__ import annotations - -import numpy as np -import pytest -from numpy.typing import NDArray - -from noteflow.infrastructure.audio import TimestampedAudio - - -@pytest.fixture -def silence_audio() -> NDArray[np.float32]: - """Return silent audio (all zeros).""" - return np.zeros(1600, dtype=np.float32) # 100ms at 16kHz - - -@pytest.fixture -def full_scale_audio() -> NDArray[np.float32]: - """Return full-scale audio (all ones).""" - return np.ones(1600, dtype=np.float32) - - -@pytest.fixture -def half_scale_audio() -> NDArray[np.float32]: - """Return half-scale audio (all 0.5).""" - return np.full(1600, 0.5, dtype=np.float32) - - -@pytest.fixture -def sample_timestamped_audio() -> TimestampedAudio: - """Return sample timestamped audio chunk.""" - return TimestampedAudio( - frames=np.zeros(1600, dtype=np.float32), - timestamp=0.0, - duration=0.1, - ) - - -@pytest.fixture -def timestamped_audio_sequence() -> list[TimestampedAudio]: - """Return sequence of timestamped audio chunks for buffer tests.""" - return [ - TimestampedAudio( - frames=np.zeros(1600, dtype=np.float32), - timestamp=float(i) * 0.1, - duration=0.1, - ) - for i in range(10) - ] -```` - -## File: tests/infrastructure/audio/test_dto.py -````python -"""Tests for audio DTOs.""" - -from __future__ import annotations - -from dataclasses import FrozenInstanceError - -import numpy as np -import pytest - -from noteflow.infrastructure.audio import AudioDeviceInfo, TimestampedAudio - - -class TestAudioDeviceInfo: - """Tests for AudioDeviceInfo dataclass.""" - - def test_audio_device_info_creation(self) -> None: - """Test AudioDeviceInfo can be created with all fields.""" - device = AudioDeviceInfo( - device_id=0, - name="Test Microphone", - channels=2, - sample_rate=48000, - is_default=True, - ) - assert device.device_id == 0 - assert device.name == "Test Microphone" - assert device.channels == 2 - assert device.sample_rate == 48000 - assert device.is_default is True - - def test_audio_device_info_frozen(self) -> None: - """Test AudioDeviceInfo is immutable (frozen).""" - device = AudioDeviceInfo( - device_id=0, - name="Test", - channels=1, - sample_rate=16000, - is_default=False, - ) - with pytest.raises(FrozenInstanceError): - # Intentionally assign to frozen field to verify immutability - device.name = "Modified" # type: ignore[misc] - - -class TestTimestampedAudio: - """Tests for TimestampedAudio dataclass.""" - - def test_timestamped_audio_creation(self) -> None: - """Test TimestampedAudio can be created with valid values.""" - frames = np.zeros(1600, dtype=np.float32) - audio = TimestampedAudio( - frames=frames, - timestamp=1.0, - duration=0.1, - ) - assert len(audio.frames) == 1600 - assert audio.timestamp == 1.0 - assert audio.duration == 0.1 - - def test_timestamped_audio_negative_duration_raises(self) -> None: - """Test TimestampedAudio raises on negative duration.""" - frames = np.zeros(1600, dtype=np.float32) - with pytest.raises(ValueError, match="Duration must be non-negative"): - TimestampedAudio( - frames=frames, - timestamp=0.0, - duration=-0.1, - ) - - def test_timestamped_audio_negative_timestamp_raises(self) -> None: - """Test TimestampedAudio raises on negative timestamp.""" - frames = np.zeros(1600, dtype=np.float32) - with pytest.raises(ValueError, match="Timestamp must be non-negative"): - TimestampedAudio( - frames=frames, - timestamp=-1.0, - duration=0.1, - ) - - def test_timestamped_audio_zero_duration_valid(self) -> None: - """Test TimestampedAudio accepts zero duration.""" - frames = np.zeros(0, dtype=np.float32) - audio = TimestampedAudio( - frames=frames, - timestamp=0.0, - duration=0.0, - ) - assert audio.duration == 0.0 - - def test_timestamped_audio_zero_timestamp_valid(self) -> None: - """Test TimestampedAudio accepts zero timestamp.""" - frames = np.zeros(1600, dtype=np.float32) - audio = TimestampedAudio( - frames=frames, - timestamp=0.0, - duration=0.1, - ) - assert audio.timestamp == 0.0 -```` - -## File: tests/infrastructure/audio/test_reader.py -````python -"""Tests for MeetingAudioReader.""" - -from __future__ import annotations - -import json -from pathlib import Path -from uuid import uuid4 - -import numpy as np -import pytest - -from noteflow.infrastructure.audio.reader import MeetingAudioReader -from noteflow.infrastructure.audio.writer import MeetingAudioWriter -from noteflow.infrastructure.security.crypto import AesGcmCryptoBox -from noteflow.infrastructure.security.keystore import InMemoryKeyStore - - -@pytest.fixture -def crypto() -> AesGcmCryptoBox: - """Create crypto instance with in-memory keystore.""" - keystore = InMemoryKeyStore() - return AesGcmCryptoBox(keystore) - - -@pytest.fixture -def meetings_dir(tmp_path: Path) -> Path: - """Create temporary meetings directory.""" - return tmp_path / "meetings" - - -def test_audio_exists_requires_manifest( - crypto: AesGcmCryptoBox, - meetings_dir: Path, -) -> None: - """audio_exists should require both audio.enc and manifest.json.""" - meeting_id = str(uuid4()) - meeting_dir = meetings_dir / meeting_id - meeting_dir.mkdir(parents=True, exist_ok=True) - - # Only audio.enc present -> False - (meeting_dir / "audio.enc").write_bytes(b"") - reader = MeetingAudioReader(crypto, meetings_dir) - assert reader.audio_exists(meeting_id) is False - - # Add manifest.json -> True - (meeting_dir / "manifest.json").write_text(json.dumps({"sample_rate": 16000})) - assert reader.audio_exists(meeting_id) is True - - -def test_reader_uses_manifest_sample_rate( - crypto: AesGcmCryptoBox, - meetings_dir: Path, -) -> None: - """Reader should expose sample_rate from manifest and use it for durations.""" - meeting_id = str(uuid4()) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer = MeetingAudioWriter(crypto, meetings_dir) - writer.open(meeting_id, dek, wrapped_dek, sample_rate=48000) - writer.write_chunk(np.zeros(1600, dtype=np.float32)) # 1600 samples @ 48kHz - writer.close() - - reader = MeetingAudioReader(crypto, meetings_dir) - chunks = reader.load_meeting_audio(meeting_id) - - assert reader.sample_rate == 48000 - assert len(chunks) == 1 - assert chunks[0].duration == pytest.approx(1600 / 48000, rel=1e-6) -```` - -## File: tests/infrastructure/audio/test_ring_buffer.py -````python -"""Tests for TimestampedRingBuffer.""" - -from __future__ import annotations - -import numpy as np -import pytest - -from noteflow.infrastructure.audio import TimestampedAudio, TimestampedRingBuffer - - -class TestTimestampedRingBuffer: - """Tests for TimestampedRingBuffer class.""" - - def test_init_with_valid_duration(self) -> None: - """Test buffer initialization with valid max_duration.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - assert buffer.max_duration == 10.0 - assert buffer.duration == 0.0 - assert buffer.chunk_count == 0 - - def test_init_with_default_duration(self) -> None: - """Test buffer uses default max_duration of 30 seconds.""" - buffer = TimestampedRingBuffer() - assert buffer.max_duration == 30.0 - - def test_init_with_invalid_duration_raises(self) -> None: - """Test buffer raises on non-positive max_duration.""" - with pytest.raises(ValueError, match="max_duration must be positive"): - TimestampedRingBuffer(max_duration=0.0) - - with pytest.raises(ValueError, match="max_duration must be positive"): - TimestampedRingBuffer(max_duration=-1.0) - - def test_push_single_chunk(self, sample_timestamped_audio: TimestampedAudio) -> None: - """Test pushing single audio chunk.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - buffer.push(sample_timestamped_audio) - - assert buffer.chunk_count == 1 - assert buffer.duration == sample_timestamped_audio.duration - - def test_push_multiple_chunks(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None: - """Test pushing multiple audio chunks.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - for audio in timestamped_audio_sequence: - buffer.push(audio) - - assert buffer.chunk_count == 10 - assert buffer.duration == pytest.approx(1.0, rel=1e-9) # 10 chunks * 0.1s - - def test_push_evicts_old_at_capacity(self) -> None: - """Test old chunks are evicted when buffer exceeds max_duration.""" - buffer = TimestampedRingBuffer(max_duration=0.5) # 500ms max - - # Push 10 chunks of 0.1s each (1.0s total) - for i in range(10): - audio = TimestampedAudio( - frames=np.zeros(1600, dtype=np.float32), - timestamp=float(i) * 0.1, - duration=0.1, - ) - buffer.push(audio) - - # Should only keep ~5 chunks (0.5s worth) - assert buffer.duration <= 0.5 - assert buffer.chunk_count <= 6 # May keep one extra during eviction - - def test_get_window_returns_requested_duration( - self, timestamped_audio_sequence: list[TimestampedAudio] - ) -> None: - """Test get_window returns chunks for requested duration.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - for audio in timestamped_audio_sequence: - buffer.push(audio) - - # Request 0.3 seconds (should get ~3 chunks) - window = buffer.get_window(0.3) - total_duration = sum(a.duration for a in window) - - assert total_duration >= 0.3 - assert len(window) >= 3 - - def test_get_window_empty_returns_empty(self) -> None: - """Test get_window on empty buffer returns empty list.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - window = buffer.get_window(1.0) - - assert window == [] - - def test_get_window_negative_returns_empty( - self, sample_timestamped_audio: TimestampedAudio - ) -> None: - """Test get_window with negative duration returns empty list.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - buffer.push(sample_timestamped_audio) - - window = buffer.get_window(-1.0) - assert window == [] - - def test_get_window_zero_returns_empty( - self, sample_timestamped_audio: TimestampedAudio - ) -> None: - """Test get_window with zero duration returns empty list.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - buffer.push(sample_timestamped_audio) - - window = buffer.get_window(0.0) - assert window == [] - - def test_get_window_exceeds_buffer_returns_all( - self, timestamped_audio_sequence: list[TimestampedAudio] - ) -> None: - """Test get_window with duration > buffer returns all chunks.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - for audio in timestamped_audio_sequence: - buffer.push(audio) - - window = buffer.get_window(100.0) # Request more than available - assert len(window) == 10 - - def test_get_window_chronological_order( - self, timestamped_audio_sequence: list[TimestampedAudio] - ) -> None: - """Test get_window returns chunks in chronological order.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - for audio in timestamped_audio_sequence: - buffer.push(audio) - - window = buffer.get_window(1.0) - - # Verify timestamps are increasing - for i in range(1, len(window)): - assert window[i].timestamp >= window[i - 1].timestamp - - def test_get_all_returns_all_chunks( - self, timestamped_audio_sequence: list[TimestampedAudio] - ) -> None: - """Test get_all returns all buffered chunks.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - for audio in timestamped_audio_sequence: - buffer.push(audio) - - all_chunks = buffer.get_all() - assert len(all_chunks) == 10 - - def test_clear_removes_all(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None: - """Test clear removes all chunks and resets duration.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - for audio in timestamped_audio_sequence: - buffer.push(audio) - - buffer.clear() - - assert buffer.chunk_count == 0 - assert buffer.duration == 0.0 - assert len(buffer) == 0 - - def test_duration_property(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None: - """Test duration property tracks total buffered duration.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - - assert buffer.duration == 0.0 - - for i, audio in enumerate(timestamped_audio_sequence): - buffer.push(audio) - expected = (i + 1) * 0.1 - assert buffer.duration == pytest.approx(expected, rel=1e-9) - - def test_chunk_count_property(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None: - """Test chunk_count property tracks number of chunks.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - - for i, audio in enumerate(timestamped_audio_sequence): - buffer.push(audio) - assert buffer.chunk_count == i + 1 - - def test_max_duration_property(self) -> None: - """Test max_duration property returns configured value.""" - buffer = TimestampedRingBuffer(max_duration=15.0) - assert buffer.max_duration == 15.0 - - def test_len_returns_chunk_count( - self, timestamped_audio_sequence: list[TimestampedAudio] - ) -> None: - """Test __len__ returns chunk count.""" - buffer = TimestampedRingBuffer(max_duration=10.0) - for audio in timestamped_audio_sequence: - buffer.push(audio) - - assert len(buffer) == buffer.chunk_count -```` - -## File: tests/infrastructure/audio/test_writer.py -````python -"""Tests for MeetingAudioWriter.""" - -from __future__ import annotations - -import json -from pathlib import Path -from uuid import uuid4 - -import numpy as np -import pytest - -from noteflow.infrastructure.audio.writer import MeetingAudioWriter -from noteflow.infrastructure.security.crypto import AesGcmCryptoBox, ChunkedAssetReader -from noteflow.infrastructure.security.keystore import InMemoryKeyStore - - -@pytest.fixture -def crypto() -> AesGcmCryptoBox: - """Create crypto instance with in-memory keystore.""" - keystore = InMemoryKeyStore() - return AesGcmCryptoBox(keystore) - - -@pytest.fixture -def meetings_dir(tmp_path: Path) -> Path: - """Create temporary meetings directory.""" - return tmp_path / "meetings" - - -class TestMeetingAudioWriterBasics: - """Tests for MeetingAudioWriter basic operations.""" - - def test_writer_creates_meeting_directory( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test writer creates meeting directory structure.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - meeting_id = str(uuid4()) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(meeting_id, dek, wrapped_dek) - - meeting_dir = meetings_dir / meeting_id - assert meeting_dir.exists() - assert (meeting_dir / "manifest.json").exists() - assert (meeting_dir / "audio.enc").exists() - - writer.close() - - def test_manifest_contains_correct_metadata( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test manifest.json has required fields.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - meeting_id = str(uuid4()) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(meeting_id, dek, wrapped_dek, sample_rate=16000) - writer.close() - - manifest_path = meetings_dir / meeting_id / "manifest.json" - manifest = json.loads(manifest_path.read_text()) - - assert manifest["meeting_id"] == meeting_id - assert manifest["sample_rate"] == 16000 - assert manifest["channels"] == 1 - assert manifest["format"] == "pcm16" - assert "wrapped_dek" in manifest - assert "created_at" in manifest - - def test_write_chunk_converts_float32_to_pcm16( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test audio conversion from float32 to PCM16.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - meeting_id = str(uuid4()) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(meeting_id, dek, wrapped_dek) - - # Create test audio: 1600 samples = 0.1 seconds at 16kHz - test_audio = np.linspace(-1.0, 1.0, 1600, dtype=np.float32) - writer.write_chunk(test_audio) - - assert writer.bytes_written > 0 - # PCM16 = 2 bytes/sample = 3200 bytes raw, but encrypted with overhead - assert writer.bytes_written > 3200 - assert writer.chunk_count == 1 - - writer.close() - - def test_multiple_chunks_written( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test writing multiple audio chunks.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - meeting_id = str(uuid4()) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(meeting_id, dek, wrapped_dek) - - # Write 100 chunks - for _ in range(100): - audio = np.random.uniform(-0.5, 0.5, 1600).astype(np.float32) - writer.write_chunk(audio) - - # Should have written significant data - assert writer.bytes_written > 100 * 3200 # At least raw PCM16 size - assert writer.chunk_count == 100 - - writer.close() - - def test_write_chunk_clamps_audio_range( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test audio values outside [-1, 1] are clamped before encoding.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - meeting_id = str(uuid4()) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(meeting_id, dek, wrapped_dek) - writer.write_chunk(np.array([-2.0, 0.0, 2.0], dtype=np.float32)) - writer.close() - - audio_path = meetings_dir / meeting_id / "audio.enc" - reader = ChunkedAssetReader(crypto) - reader.open(audio_path, dek) - - chunk_bytes = next(reader.read_chunks()) - pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16) - audio_float = pcm16.astype(np.float32) / 32767.0 - - assert audio_float.min() >= -1.0 - assert audio_float.max() <= 1.0 - - reader.close() - - -class TestMeetingAudioWriterErrors: - """Tests for MeetingAudioWriter error handling.""" - - def test_writer_raises_if_already_open( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test writer raises RuntimeError if opened twice.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(str(uuid4()), dek, wrapped_dek) - - with pytest.raises(RuntimeError, match="already open"): - writer.open(str(uuid4()), dek, wrapped_dek) - - writer.close() - - def test_writer_raises_if_write_when_not_open( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test writer raises RuntimeError if write called before open.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - audio = np.zeros(1600, dtype=np.float32) - - with pytest.raises(RuntimeError, match="not open"): - writer.write_chunk(audio) - - def test_close_is_idempotent( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test close can be called multiple times safely.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(str(uuid4()), dek, wrapped_dek) - writer.close() - writer.close() # Should not raise - writer.close() # Should not raise - - -class TestMeetingAudioWriterProperties: - """Tests for MeetingAudioWriter properties.""" - - def test_is_open_property( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test is_open property reflects writer state.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - assert writer.is_open is False - - writer.open(str(uuid4()), dek, wrapped_dek) - assert writer.is_open is True - - writer.close() - assert writer.is_open is False - - def test_meeting_dir_property( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test meeting_dir property returns correct path.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - meeting_id = str(uuid4()) - - assert writer.meeting_dir is None - - writer.open(meeting_id, dek, wrapped_dek) - assert writer.meeting_dir == meetings_dir / meeting_id - - writer.close() - assert writer.meeting_dir is None - - def test_bytes_written_when_closed( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test bytes_written returns 0 when not open.""" - writer = MeetingAudioWriter(crypto, meetings_dir) - assert writer.bytes_written == 0 - - -class TestMeetingAudioWriterIntegration: - """Integration tests for audio roundtrip.""" - - def test_audio_roundtrip_encryption_decryption( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test writing audio, then reading it back encrypted.""" - # Write audio - writer = MeetingAudioWriter(crypto, meetings_dir) - meeting_id = str(uuid4()) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(meeting_id, dek, wrapped_dek) - - # Write 10 chunks of known audio - original_chunks: list[np.ndarray] = [] - for i in range(10): - audio = np.sin(2 * np.pi * 440 * np.linspace(i, i + 0.1, 1600)).astype(np.float32) - original_chunks.append(audio) - writer.write_chunk(audio) - - writer.close() - - # Read audio back - audio_path = meetings_dir / meeting_id / "audio.enc" - assert audio_path.exists() - - reader = ChunkedAssetReader(crypto) - reader.open(audio_path, dek) - - read_chunks: list[np.ndarray] = [] - for chunk_bytes in reader.read_chunks(): - # Convert bytes back to PCM16 then to float32 - pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16) - audio_float = pcm16.astype(np.float32) / 32767.0 - read_chunks.append(audio_float) - - reader.close() - - # Verify we read same number of chunks - assert len(read_chunks) == len(original_chunks) - - # Verify audio content matches (within quantization error) - for orig, read in zip(original_chunks, read_chunks, strict=True): - # PCM16 quantization adds ~0.00003 max error - assert np.allclose(orig, read, atol=0.0001) - - def test_manifest_wrapped_dek_can_decrypt_audio( - self, - crypto: AesGcmCryptoBox, - meetings_dir: Path, - ) -> None: - """Test that wrapped_dek from manifest can decrypt audio file.""" - # Write audio - writer = MeetingAudioWriter(crypto, meetings_dir) - meeting_id = str(uuid4()) - dek = crypto.generate_dek() - wrapped_dek = crypto.wrap_dek(dek) - - writer.open(meeting_id, dek, wrapped_dek) - writer.write_chunk(np.zeros(1600, dtype=np.float32)) - writer.close() - - # Read manifest - manifest_path = meetings_dir / meeting_id / "manifest.json" - manifest = json.loads(manifest_path.read_text()) - wrapped_dek_hex = manifest["wrapped_dek"] - - # Unwrap DEK from manifest - unwrapped_dek = crypto.unwrap_dek(bytes.fromhex(wrapped_dek_hex)) - - # Use unwrapped DEK to read audio - audio_path = meetings_dir / meeting_id / "audio.enc" - reader = ChunkedAssetReader(crypto) - reader.open(audio_path, unwrapped_dek) - - chunks = list(reader.read_chunks()) - assert len(chunks) == 1 # Should read the one chunk we wrote - - reader.close() -```` - -## File: tests/infrastructure/export/test_formatting.py -````python -"""Tests for export formatting helpers.""" - -from __future__ import annotations - -from datetime import datetime - -from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp - - -class TestFormatTimestamp: - """Tests for format_timestamp.""" - - def test_format_timestamp_under_hour(self) -> None: - assert format_timestamp(0) == "0:00" - assert format_timestamp(59) == "0:59" - assert format_timestamp(60) == "1:00" - assert format_timestamp(125) == "2:05" - - def test_format_timestamp_over_hour(self) -> None: - assert format_timestamp(3600) == "1:00:00" - assert format_timestamp(3661) == "1:01:01" - - -class TestFormatDatetime: - """Tests for format_datetime.""" - - def test_format_datetime_none(self) -> None: - assert format_datetime(None) == "" - - def test_format_datetime_value(self) -> None: - dt = datetime(2024, 1, 1, 12, 30, 15) - assert format_datetime(dt) == "2024-01-01 12:30:15" -```` - -## File: tests/infrastructure/export/test_html.py -````python -"""Tests for HTML exporter.""" - -from __future__ import annotations - -from noteflow.domain.entities import ActionItem, KeyPoint, Meeting, Segment, Summary -from noteflow.infrastructure.export.html import HtmlExporter - - -class TestHtmlExporter: - """Tests for HtmlExporter output.""" - - def test_export_escapes_html(self) -> None: - meeting = Meeting.create(title="") - segments = [ - Segment(segment_id=0, text="Hello ", start_time=0.0, end_time=1.0), - ] - summary = Summary( - meeting_id=meeting.id, - executive_summary="Summary with bold", - key_points=[KeyPoint(text="Key ")], - action_items=[ActionItem(text="Do ", assignee="bob<")], - ) - meeting.summary = summary - - exporter = HtmlExporter() - output = exporter.export(meeting, segments) - - assert "<Weekly & Sync>" in output - assert "Hello <team>" in output - assert "Summary with <b>bold</b>" in output - assert "Key <point>" in output - assert "@bob<" in output -```` - -## File: tests/infrastructure/export/test_markdown.py -````python -"""Tests for Markdown exporter.""" - -from __future__ import annotations - -from datetime import datetime - -from noteflow.domain.entities import ActionItem, KeyPoint, Meeting, Segment, Summary -from noteflow.infrastructure.export.markdown import MarkdownExporter - - -class TestMarkdownExporter: - """Tests for MarkdownExporter output.""" - - def test_export_includes_sections(self) -> None: - meeting = Meeting.create(title="Weekly Sync") - meeting.started_at = datetime(2024, 1, 1, 9, 0, 0) - meeting.ended_at = datetime(2024, 1, 1, 9, 30, 0) - - segments = [ - Segment(segment_id=0, text="Hello team", start_time=0.0, end_time=1.0), - Segment(segment_id=1, text="Next steps", start_time=1.0, end_time=2.0), - ] - - summary = Summary( - meeting_id=meeting.id, - executive_summary="Great meeting.", - key_points=[KeyPoint(text="KP1")], - action_items=[ActionItem(text="Do thing", assignee="alice")], - ) - meeting.summary = summary - - exporter = MarkdownExporter() - output = exporter.export(meeting, segments) - - assert "# Weekly Sync" in output - assert "## Meeting Info" in output - assert "## Transcript" in output - assert "**[0:00]** Hello team" in output - assert "## Summary" in output - assert "### Key Points" in output - assert "- KP1" in output - assert "### Action Items" in output - assert "- [ ] Do thing (@alice)" in output - assert "Exported from NoteFlow" in output -```` - -## File: tests/infrastructure/security/test_crypto.py -````python -"""Tests for crypto error paths and asset reader behavior.""" - -from __future__ import annotations - -import struct -from pathlib import Path - -import pytest - -from noteflow.infrastructure.security.crypto import ( - FILE_MAGIC, - FILE_VERSION, - AesGcmCryptoBox, - ChunkedAssetReader, - ChunkedAssetWriter, -) -from noteflow.infrastructure.security.keystore import InMemoryKeyStore - - -@pytest.fixture -def crypto() -> AesGcmCryptoBox: - """Crypto box with in-memory key store.""" - return AesGcmCryptoBox(InMemoryKeyStore()) - - -class TestAesGcmCryptoBox: - """Tests for AesGcmCryptoBox edge cases.""" - - def test_unwrap_dek_too_short_raises(self, crypto: AesGcmCryptoBox) -> None: - """unwrap_dek rejects payloads shorter than nonce+ciphertext+tag.""" - with pytest.raises(ValueError, match="Invalid wrapped DEK"): - crypto.unwrap_dek(b"short") - - -class TestChunkedAssetReader: - """Tests for ChunkedAssetReader validation.""" - - def test_open_invalid_magic_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None: - """Reader rejects files with invalid magic.""" - path = tmp_path / "bad_magic.enc" - path.write_bytes(b"BAD!" + bytes([FILE_VERSION])) - - reader = ChunkedAssetReader(crypto) - with pytest.raises(ValueError, match="Invalid file format"): - reader.open(path, crypto.generate_dek()) - - def test_open_invalid_version_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None: - """Reader rejects unsupported file versions.""" - path = tmp_path / "bad_version.enc" - path.write_bytes(FILE_MAGIC + bytes([FILE_VERSION + 1])) - - reader = ChunkedAssetReader(crypto) - with pytest.raises(ValueError, match="Unsupported file version"): - reader.open(path, crypto.generate_dek()) - - def test_read_truncated_chunk_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None: - """Reader errors on truncated chunk data.""" - path = tmp_path / "truncated.enc" - with path.open("wb") as handle: - handle.write(FILE_MAGIC) - handle.write(struct.pack("B", FILE_VERSION)) - handle.write(struct.pack(">I", 10)) # claim 10 bytes - handle.write(b"12345") # only 5 bytes provided - - reader = ChunkedAssetReader(crypto) - reader.open(path, crypto.generate_dek()) - with pytest.raises(ValueError, match="Truncated chunk"): - list(reader.read_chunks()) - - reader.close() - - def test_read_with_wrong_dek_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None: - """Decrypting with the wrong key fails.""" - path = tmp_path / "wrong_key.enc" - dek = crypto.generate_dek() - other_dek = crypto.generate_dek() - - writer = ChunkedAssetWriter(crypto) - writer.open(path, dek) - writer.write_chunk(b"hello") - writer.close() - - reader = ChunkedAssetReader(crypto) - reader.open(path, other_dek) - with pytest.raises(ValueError, match="Chunk decryption failed"): - list(reader.read_chunks()) - reader.close() -```` - -## File: tests/infrastructure/summarization/test_citation_verifier.py -````python -"""Tests for citation verification.""" - -from __future__ import annotations - -from uuid import uuid4 - -import pytest - -from noteflow.domain.entities import ActionItem, KeyPoint, Segment, Summary -from noteflow.domain.value_objects import MeetingId -from noteflow.infrastructure.summarization import SegmentCitationVerifier - - -def _segment(segment_id: int, text: str = "Test") -> Segment: - """Create a test segment.""" - return Segment( - segment_id=segment_id, - text=text, - start_time=segment_id * 5.0, - end_time=(segment_id + 1) * 5.0, - ) - - -def _key_point(text: str, segment_ids: list[int]) -> KeyPoint: - """Create a test key point.""" - return KeyPoint(text=text, segment_ids=segment_ids) - - -def _action_item(text: str, segment_ids: list[int]) -> ActionItem: - """Create a test action item.""" - return ActionItem(text=text, segment_ids=segment_ids) - - -def _summary( - key_points: list[KeyPoint] | None = None, - action_items: list[ActionItem] | None = None, -) -> Summary: - """Create a test summary.""" - return Summary( - meeting_id=MeetingId(uuid4()), - executive_summary="Test summary", - key_points=key_points or [], - action_items=action_items or [], - ) - - -class TestSegmentCitationVerifier: - """Tests for SegmentCitationVerifier.""" - - @pytest.fixture - def verifier(self) -> SegmentCitationVerifier: - """Create verifier instance.""" - return SegmentCitationVerifier() - - def test_verify_valid_citations(self, verifier: SegmentCitationVerifier) -> None: - """All citations valid should return is_valid=True.""" - segments = [_segment(0), _segment(1), _segment(2)] - summary = _summary( - key_points=[_key_point("Point 1", [0, 1])], - action_items=[_action_item("Action 1", [2])], - ) - - result = verifier.verify_citations(summary, segments) - - assert result.is_valid is True - assert result.invalid_key_point_indices == () - assert result.invalid_action_item_indices == () - assert result.missing_segment_ids == () - - def test_verify_invalid_key_point_citation(self, verifier: SegmentCitationVerifier) -> None: - """Invalid segment_id in key point should be detected.""" - segments = [_segment(0), _segment(1)] - summary = _summary( - key_points=[_key_point("Point 1", [0, 99])], # 99 doesn't exist - ) - - result = verifier.verify_citations(summary, segments) - - assert result.is_valid is False - assert result.invalid_key_point_indices == (0,) - assert result.invalid_action_item_indices == () - assert result.missing_segment_ids == (99,) - - def test_verify_invalid_action_item_citation(self, verifier: SegmentCitationVerifier) -> None: - """Invalid segment_id in action item should be detected.""" - segments = [_segment(0), _segment(1)] - summary = _summary( - action_items=[_action_item("Action 1", [50])], # 50 doesn't exist - ) - - result = verifier.verify_citations(summary, segments) - - assert result.is_valid is False - assert result.invalid_key_point_indices == () - assert result.invalid_action_item_indices == (0,) - assert result.missing_segment_ids == (50,) - - def test_verify_multiple_invalid_citations(self, verifier: SegmentCitationVerifier) -> None: - """Multiple invalid citations should all be detected.""" - segments = [_segment(0)] - summary = _summary( - key_points=[ - _key_point("Point 1", [0]), - _key_point("Point 2", [1]), # Invalid - _key_point("Point 3", [2]), # Invalid - ], - action_items=[ - _action_item("Action 1", [3]), # Invalid - ], - ) - - result = verifier.verify_citations(summary, segments) - - assert result.is_valid is False - assert result.invalid_key_point_indices == (1, 2) - assert result.invalid_action_item_indices == (0,) - assert result.missing_segment_ids == (1, 2, 3) - - def test_verify_empty_summary(self, verifier: SegmentCitationVerifier) -> None: - """Empty summary should be valid.""" - segments = [_segment(0)] - summary = _summary() - - result = verifier.verify_citations(summary, segments) - - assert result.is_valid is True - - def test_verify_empty_segments(self, verifier: SegmentCitationVerifier) -> None: - """Summary with citations but no segments should be invalid.""" - segments: list[Segment] = [] - summary = _summary(key_points=[_key_point("Point 1", [0])]) - - result = verifier.verify_citations(summary, segments) - - assert result.is_valid is False - assert result.missing_segment_ids == (0,) - - def test_verify_empty_citations(self, verifier: SegmentCitationVerifier) -> None: - """Key points/actions with empty segment_ids should be valid.""" - segments = [_segment(0)] - summary = _summary( - key_points=[_key_point("Point 1", [])], # No citations - action_items=[_action_item("Action 1", [])], # No citations - ) - - result = verifier.verify_citations(summary, segments) - - assert result.is_valid is True - - def test_invalid_count_property(self, verifier: SegmentCitationVerifier) -> None: - """invalid_count should sum key point and action item invalid counts.""" - segments = [_segment(0)] - summary = _summary( - key_points=[ - _key_point("Point 1", [1]), # Invalid - _key_point("Point 2", [2]), # Invalid - ], - action_items=[ - _action_item("Action 1", [3]), # Invalid - ], - ) - - result = verifier.verify_citations(summary, segments) - - assert result.invalid_count == 3 - - -class TestFilterInvalidCitations: - """Tests for filter_invalid_citations method.""" - - @pytest.fixture - def verifier(self) -> SegmentCitationVerifier: - """Create verifier instance.""" - return SegmentCitationVerifier() - - def test_filter_removes_invalid_segment_ids(self, verifier: SegmentCitationVerifier) -> None: - """Invalid segment_ids should be removed from citations.""" - segments = [_segment(0), _segment(1)] - summary = _summary( - key_points=[_key_point("Point 1", [0, 1, 99])], # 99 invalid - action_items=[_action_item("Action 1", [1, 50])], # 50 invalid - ) - - filtered = verifier.filter_invalid_citations(summary, segments) - - assert filtered.key_points[0].segment_ids == [0, 1] - assert filtered.action_items[0].segment_ids == [1] - - def test_filter_preserves_valid_citations(self, verifier: SegmentCitationVerifier) -> None: - """Valid citations should be preserved.""" - segments = [_segment(0), _segment(1), _segment(2)] - summary = _summary( - key_points=[_key_point("Point 1", [0, 1])], - action_items=[_action_item("Action 1", [2])], - ) - - filtered = verifier.filter_invalid_citations(summary, segments) - - assert filtered.key_points[0].segment_ids == [0, 1] - assert filtered.action_items[0].segment_ids == [2] - - def test_filter_preserves_other_fields(self, verifier: SegmentCitationVerifier) -> None: - """Non-citation fields should be preserved.""" - segments = [_segment(0)] - summary = Summary( - meeting_id=MeetingId(uuid4()), - executive_summary="Important meeting", - key_points=[KeyPoint(text="Key point", segment_ids=[0], start_time=1.0, end_time=2.0)], - action_items=[ActionItem(text="Action", segment_ids=[0], assignee="Alice", priority=2)], - model_version="test-1.0", - ) - - filtered = verifier.filter_invalid_citations(summary, segments) - - assert filtered.executive_summary == "Important meeting" - assert filtered.key_points[0].text == "Key point" - assert filtered.key_points[0].start_time == 1.0 - assert filtered.action_items[0].assignee == "Alice" - assert filtered.action_items[0].priority == 2 - assert filtered.model_version == "test-1.0" -```` - -## File: tests/infrastructure/summarization/test_mock_provider.py -````python -"""Tests for mock summarization provider.""" - -from __future__ import annotations - -from uuid import uuid4 - -import pytest - -from noteflow.domain.entities import Segment -from noteflow.domain.summarization import SummarizationRequest -from noteflow.domain.value_objects import MeetingId -from noteflow.infrastructure.summarization import MockSummarizer - - -def _segment( - segment_id: int, - text: str, - start: float = 0.0, - end: float = 5.0, -) -> Segment: - """Create a test segment.""" - return Segment( - segment_id=segment_id, - text=text, - start_time=start, - end_time=end, - ) - - -class TestMockSummarizer: - """Tests for MockSummarizer.""" - - @pytest.fixture - def summarizer(self) -> MockSummarizer: - """Create MockSummarizer instance.""" - return MockSummarizer(latency_ms=0.0) - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Create a test meeting ID.""" - return MeetingId(uuid4()) - - def test_provider_name(self, summarizer: MockSummarizer) -> None: - """Provider name should be 'mock'.""" - assert summarizer.provider_name == "mock" - - def test_is_available(self, summarizer: MockSummarizer) -> None: - """Mock provider should always be available.""" - assert summarizer.is_available is True - - def test_requires_cloud_consent(self, summarizer: MockSummarizer) -> None: - """Mock provider should not require cloud consent.""" - assert summarizer.requires_cloud_consent is False - - @pytest.mark.asyncio - async def test_summarize_returns_result( - self, - summarizer: MockSummarizer, - meeting_id: MeetingId, - ) -> None: - """Summarize should return a SummarizationResult.""" - segments = [ - _segment(0, "First segment text.", 0.0, 5.0), - _segment(1, "Second segment text.", 5.0, 10.0), - ] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert result.provider_name == "mock" - assert result.model_name == "mock-1.0" - assert result.summary.meeting_id == meeting_id - - @pytest.mark.asyncio - async def test_summarize_generates_executive_summary( - self, - summarizer: MockSummarizer, - meeting_id: MeetingId, - ) -> None: - """Summarize should generate executive summary with segment count.""" - segments = [ - _segment(0, "Hello", 0.0, 5.0), - _segment(1, "World", 5.0, 10.0), - _segment(2, "Test", 10.0, 15.0), - ] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert "3 segments" in result.summary.executive_summary - assert "15.0 seconds" in result.summary.executive_summary - - @pytest.mark.asyncio - async def test_summarize_generates_key_points_with_citations( - self, - summarizer: MockSummarizer, - meeting_id: MeetingId, - ) -> None: - """Key points should have valid segment_id citations.""" - segments = [ - _segment(0, "First point", 0.0, 5.0), - _segment(1, "Second point", 5.0, 10.0), - ] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert len(result.summary.key_points) == 2 - assert result.summary.key_points[0].segment_ids == [0] - assert result.summary.key_points[1].segment_ids == [1] - - @pytest.mark.asyncio - async def test_summarize_respects_max_key_points( - self, - summarizer: MockSummarizer, - meeting_id: MeetingId, - ) -> None: - """Key points should be limited to max_key_points.""" - segments = [_segment(i, f"Segment {i}", i * 5.0, (i + 1) * 5.0) for i in range(10)] - request = SummarizationRequest( - meeting_id=meeting_id, - segments=segments, - max_key_points=3, - ) - - result = await summarizer.summarize(request) - - assert len(result.summary.key_points) == 3 - - @pytest.mark.asyncio - async def test_summarize_extracts_action_items( - self, - summarizer: MockSummarizer, - meeting_id: MeetingId, - ) -> None: - """Action items should be extracted from segments with action keywords.""" - segments = [ - _segment(0, "General discussion", 0.0, 5.0), - _segment(1, "We need to fix the bug", 5.0, 10.0), - _segment(2, "TODO: Review the code", 10.0, 15.0), - _segment(3, "The meeting went well", 15.0, 20.0), - ] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert len(result.summary.action_items) == 2 - assert result.summary.action_items[0].segment_ids == [1] - assert result.summary.action_items[1].segment_ids == [2] - - @pytest.mark.asyncio - async def test_summarize_respects_max_action_items( - self, - summarizer: MockSummarizer, - meeting_id: MeetingId, - ) -> None: - """Action items should be limited to max_action_items.""" - segments = [_segment(i, f"TODO: task {i}", i * 5.0, (i + 1) * 5.0) for i in range(10)] - request = SummarizationRequest( - meeting_id=meeting_id, - segments=segments, - max_action_items=2, - ) - - result = await summarizer.summarize(request) - - assert len(result.summary.action_items) == 2 - - @pytest.mark.asyncio - async def test_summarize_sets_generated_at( - self, - summarizer: MockSummarizer, - meeting_id: MeetingId, - ) -> None: - """Summary should have generated_at timestamp.""" - segments = [_segment(0, "Test", 0.0, 5.0)] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert result.summary.generated_at is not None - - @pytest.mark.asyncio - async def test_summarize_empty_segments( - self, - summarizer: MockSummarizer, - meeting_id: MeetingId, - ) -> None: - """Summarize should handle empty segments list.""" - request = SummarizationRequest(meeting_id=meeting_id, segments=[]) - - result = await summarizer.summarize(request) - - assert result.summary.key_points == [] - assert result.summary.action_items == [] - assert "0 segments" in result.summary.executive_summary -```` - -## File: tests/infrastructure/summarization/test_ollama_provider.py -````python -"""Tests for Ollama summarization provider.""" - -from __future__ import annotations - -import json -import sys -import types -from typing import Any -from uuid import uuid4 - -import pytest - -from noteflow.domain.entities import Segment -from noteflow.domain.summarization import ( - InvalidResponseError, - ProviderUnavailableError, - SummarizationRequest, -) -from noteflow.domain.value_objects import MeetingId - - -def _segment( - segment_id: int, - text: str, - start: float = 0.0, - end: float = 5.0, -) -> Segment: - """Create a test segment.""" - return Segment( - segment_id=segment_id, - text=text, - start_time=start, - end_time=end, - ) - - -def _valid_json_response( - summary: str = "Test summary.", - key_points: list[dict[str, Any]] | None = None, - action_items: list[dict[str, Any]] | None = None, -) -> str: - """Build a valid JSON response string.""" - return json.dumps( - { - "executive_summary": summary, - "key_points": key_points or [], - "action_items": action_items or [], - } - ) - - -class TestOllamaSummarizerProperties: - """Tests for OllamaSummarizer properties.""" - - @pytest.fixture - def mock_ollama_module(self, monkeypatch: pytest.MonkeyPatch) -> types.ModuleType: - """Mock ollama module.""" - mock_client = types.SimpleNamespace( - list=lambda: {"models": []}, - chat=lambda **_: {"message": {"content": _valid_json_response()}}, - ) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - return mock_module - - def test_provider_name(self, mock_ollama_module: types.ModuleType) -> None: - """Provider name should be 'ollama'.""" - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - assert summarizer.provider_name == "ollama" - - def test_requires_cloud_consent_false(self, mock_ollama_module: types.ModuleType) -> None: - """Ollama should not require cloud consent (local processing).""" - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - assert summarizer.requires_cloud_consent is False - - def test_is_available_when_server_responds(self, mock_ollama_module: types.ModuleType) -> None: - """is_available should be True when server responds.""" - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - assert summarizer.is_available is True - - def test_is_available_false_when_connection_fails( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - """is_available should be False when server unreachable.""" - - def raise_error() -> None: - raise ConnectionError("Connection refused") - - mock_client = types.SimpleNamespace(list=raise_error) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - assert summarizer.is_available is False - - -class TestOllamaSummarizerSummarize: - """Tests for OllamaSummarizer.summarize method.""" - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Create test meeting ID.""" - return MeetingId(uuid4()) - - @pytest.mark.asyncio - async def test_summarize_empty_segments( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Empty segments should return empty summary without calling LLM.""" - call_count = 0 - - def mock_chat(**_: Any) -> dict[str, Any]: - nonlocal call_count - call_count += 1 - return {"message": {"content": _valid_json_response()}} - - mock_client = types.SimpleNamespace(list=lambda: {}, chat=mock_chat) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - request = SummarizationRequest(meeting_id=meeting_id, segments=[]) - - result = await summarizer.summarize(request) - - assert result.summary.key_points == [] - assert result.summary.action_items == [] - assert call_count == 0 - - @pytest.mark.asyncio - async def test_summarize_returns_result( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Summarize should return SummarizationResult.""" - response = _valid_json_response( - summary="Meeting discussed project updates.", - key_points=[{"text": "Project on track", "segment_ids": [0]}], - action_items=[ - {"text": "Review code", "assignee": "Alice", "priority": 2, "segment_ids": [1]} - ], - ) - - mock_client = types.SimpleNamespace( - list=lambda: {}, - chat=lambda **_: {"message": {"content": response}}, - ) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - segments = [ - _segment(0, "Project is on track.", 0.0, 5.0), - _segment(1, "Alice needs to review the code.", 5.0, 10.0), - ] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert result.provider_name == "ollama" - assert result.summary.meeting_id == meeting_id - assert result.summary.executive_summary == "Meeting discussed project updates." - assert len(result.summary.key_points) == 1 - assert result.summary.key_points[0].segment_ids == [0] - assert len(result.summary.action_items) == 1 - assert result.summary.action_items[0].assignee == "Alice" - assert result.summary.action_items[0].priority == 2 - - @pytest.mark.asyncio - async def test_summarize_filters_invalid_segment_ids( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Invalid segment_ids in response should be filtered out.""" - response = _valid_json_response( - summary="Test", - key_points=[{"text": "Point", "segment_ids": [0, 99, 100]}], # 99, 100 invalid - ) - - mock_client = types.SimpleNamespace( - list=lambda: {}, - chat=lambda **_: {"message": {"content": response}}, - ) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - segments = [_segment(0, "Only segment")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert result.summary.key_points[0].segment_ids == [0] - - @pytest.mark.asyncio - async def test_summarize_respects_max_limits( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Response items exceeding max limits should be truncated.""" - response = _valid_json_response( - summary="Test", - key_points=[{"text": f"Point {i}", "segment_ids": [0]} for i in range(10)], - action_items=[{"text": f"Action {i}", "segment_ids": [0]} for i in range(10)], - ) - - mock_client = types.SimpleNamespace( - list=lambda: {}, - chat=lambda **_: {"message": {"content": response}}, - ) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - segments = [_segment(0, "Test segment")] - request = SummarizationRequest( - meeting_id=meeting_id, - segments=segments, - max_key_points=3, - max_action_items=2, - ) - - result = await summarizer.summarize(request) - - assert len(result.summary.key_points) == 3 - assert len(result.summary.action_items) == 2 - - @pytest.mark.asyncio - async def test_summarize_handles_markdown_fenced_json( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Markdown code fences around JSON should be stripped.""" - json_content = _valid_json_response(summary="Fenced response") - response = f"```json\n{json_content}\n```" - - mock_client = types.SimpleNamespace( - list=lambda: {}, - chat=lambda **_: {"message": {"content": response}}, - ) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert result.summary.executive_summary == "Fenced response" - - -class TestOllamaSummarizerErrors: - """Tests for OllamaSummarizer error handling.""" - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Create test meeting ID.""" - return MeetingId(uuid4()) - - @pytest.mark.asyncio - async def test_raises_unavailable_when_package_missing( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should raise ProviderUnavailableError when ollama not installed.""" - # Remove ollama from sys.modules if present - monkeypatch.delitem(sys.modules, "ollama", raising=False) - - # Make import fail - import builtins - - original_import = builtins.__import__ - - def mock_import(name: str, *args: Any, **kwargs: Any) -> Any: - if name == "ollama": - raise ImportError("No module named 'ollama'") - return original_import(name, *args, **kwargs) - - monkeypatch.setattr(builtins, "__import__", mock_import) - - # Need to reload the module to trigger fresh import - from noteflow.infrastructure.summarization import ollama_provider - - # Create fresh instance that will try to import - summarizer = ollama_provider.OllamaSummarizer() - summarizer._client = None # Force re-import attempt - - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - with pytest.raises(ProviderUnavailableError, match="ollama package not installed"): - await summarizer.summarize(request) - - @pytest.mark.asyncio - async def test_raises_unavailable_on_connection_error( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should raise ProviderUnavailableError on connection failure.""" - - def raise_connection_error(**_: Any) -> None: - raise ConnectionRefusedError("Connection refused") - - mock_client = types.SimpleNamespace( - list=lambda: {}, - chat=raise_connection_error, - ) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - with pytest.raises(ProviderUnavailableError, match="Cannot connect"): - await summarizer.summarize(request) - - @pytest.mark.asyncio - async def test_raises_invalid_response_on_bad_json( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should raise InvalidResponseError on malformed JSON.""" - mock_client = types.SimpleNamespace( - list=lambda: {}, - chat=lambda **_: {"message": {"content": "not valid json {{{"}}, - ) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - with pytest.raises(InvalidResponseError, match="Invalid JSON"): - await summarizer.summarize(request) - - @pytest.mark.asyncio - async def test_raises_invalid_response_on_empty_content( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should raise InvalidResponseError on empty response.""" - mock_client = types.SimpleNamespace( - list=lambda: {}, - chat=lambda **_: {"message": {"content": ""}}, - ) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer() - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - with pytest.raises(InvalidResponseError, match="Empty response"): - await summarizer.summarize(request) - - -class TestOllamaSummarizerConfiguration: - """Tests for OllamaSummarizer configuration.""" - - @pytest.mark.asyncio - async def test_custom_model_name(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Custom model name should be used.""" - captured_model = None - - def capture_chat(**kwargs: Any) -> dict[str, Any]: - nonlocal captured_model - captured_model = kwargs.get("model") - return {"message": {"content": _valid_json_response()}} - - mock_client = types.SimpleNamespace(list=lambda: {}, chat=capture_chat) - mock_module = types.ModuleType("ollama") - mock_module.Client = lambda host: mock_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer(model="mistral") - meeting_id = MeetingId(uuid4()) - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - await summarizer.summarize(request) - - assert captured_model == "mistral" - - def test_custom_host(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Custom host should be passed to client.""" - captured_host = None - - def capture_client(host: str) -> types.SimpleNamespace: - nonlocal captured_host - captured_host = host - return types.SimpleNamespace( - list=lambda: {}, - chat=lambda **_: {"message": {"content": _valid_json_response()}}, - ) - - mock_module = types.ModuleType("ollama") - mock_module.Client = capture_client - monkeypatch.setitem(sys.modules, "ollama", mock_module) - - from noteflow.infrastructure.summarization import OllamaSummarizer - - summarizer = OllamaSummarizer(host="http://custom:8080") - _ = summarizer.is_available - - assert captured_host == "http://custom:8080" -```` - -## File: tests/infrastructure/triggers/conftest.py -````python -"""Test fixtures for trigger infrastructure tests.""" - -from __future__ import annotations - -import sys -import types -from collections.abc import Callable -from dataclasses import dataclass - -import pytest - - -@dataclass -class DummyWindow: - """Mock window object for pywinctl tests.""" - - title: str | None - - -@pytest.fixture -def mock_pywinctl(monkeypatch: pytest.MonkeyPatch) -> Callable[[str | None], None]: - """Factory fixture to install mocked pywinctl module. - - Usage: - mock_pywinctl("Zoom Meeting") # Window with title - mock_pywinctl(None) # No active window - """ - - def _install(title: str | None) -> None: - window = DummyWindow(title) if title is not None else None - module = types.SimpleNamespace(getActiveWindow=lambda: window) - monkeypatch.setitem(sys.modules, "pywinctl", module) - - return _install - - -@pytest.fixture -def mock_pywinctl_unavailable(monkeypatch: pytest.MonkeyPatch) -> None: - """Install pywinctl mock that raises ImportError.""" - - def raise_import_error() -> None: - msg = "No module named 'pywinctl'" - raise ImportError(msg) - - monkeypatch.setitem(sys.modules, "pywinctl", None) - - -@pytest.fixture -def mock_pywinctl_raises(monkeypatch: pytest.MonkeyPatch) -> None: - """Install pywinctl mock that raises RuntimeError on getActiveWindow.""" - - def raise_runtime_error() -> None: - msg = "No display available" - raise RuntimeError(msg) - - module = types.SimpleNamespace(getActiveWindow=raise_runtime_error) - monkeypatch.setitem(sys.modules, "pywinctl", module) -```` - -## File: tests/infrastructure/triggers/test_audio_activity.py -````python -"""Tests for audio activity trigger provider.""" - -from __future__ import annotations - -import time - -import numpy as np -import pytest - -from noteflow.infrastructure.audio import RmsLevelProvider -from noteflow.infrastructure.triggers.audio_activity import ( - AudioActivityProvider, - AudioActivitySettings, -) - - -def _settings(**overrides: object) -> AudioActivitySettings: - defaults: dict[str, object] = { - "enabled": True, - "threshold_db": -20.0, - "window_seconds": 10.0, - "min_active_ratio": 0.6, - "min_samples": 3, - "max_history": 10, - "weight": 0.3, - } | overrides - return AudioActivitySettings(**defaults) - - -def test_audio_activity_settings_validation() -> None: - """Settings should reject min_samples greater than max_history.""" - with pytest.raises(ValueError, match="min_samples"): - AudioActivitySettings( - enabled=True, - threshold_db=-20.0, - window_seconds=5.0, - min_active_ratio=0.5, - min_samples=11, - max_history=10, - weight=0.3, - ) - - -def test_audio_activity_provider_disabled_ignores_updates() -> None: - """Disabled provider should not emit signals.""" - provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=False)) - frames = np.ones(10, dtype=np.float32) - - provider.update(frames, timestamp=1.0) - - assert provider.get_signal() is None - - -def test_audio_activity_provider_emits_signal(monkeypatch: pytest.MonkeyPatch) -> None: - """Provider emits a signal when sustained activity passes ratio threshold.""" - provider = AudioActivityProvider(RmsLevelProvider(), _settings()) - active = np.ones(10, dtype=np.float32) - inactive = np.zeros(10, dtype=np.float32) - - provider.update(active, timestamp=1.0) - provider.update(active, timestamp=2.0) - provider.update(inactive, timestamp=3.0) - - monkeypatch.setattr(time, "monotonic", lambda: 4.0) - signal = provider.get_signal() - - assert signal is not None - assert signal.weight == pytest.approx(0.3) - - -def test_audio_activity_provider_window_excludes_old_samples( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Samples outside the window should not contribute to activity ratio.""" - provider = AudioActivityProvider(RmsLevelProvider(), _settings(window_seconds=2.0)) - active = np.ones(10, dtype=np.float32) - - provider.update(active, timestamp=1.0) - provider.update(active, timestamp=2.0) - provider.update(active, timestamp=3.0) - - monkeypatch.setattr(time, "monotonic", lambda: 10.0) - assert provider.get_signal() is None - - -def test_audio_activity_provider_source_property() -> None: - """Provider source should be AUDIO_ACTIVITY.""" - from noteflow.domain.triggers.entities import TriggerSource - - provider = AudioActivityProvider(RmsLevelProvider(), _settings()) - assert provider.source == TriggerSource.AUDIO_ACTIVITY - - -def test_audio_activity_provider_max_weight_property() -> None: - """Provider max_weight should reflect configured weight.""" - provider = AudioActivityProvider(RmsLevelProvider(), _settings(weight=0.5)) - assert provider.max_weight == pytest.approx(0.5) - - -def test_audio_activity_provider_is_enabled_reflects_settings() -> None: - """is_enabled should reflect settings.enabled.""" - enabled_provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=True)) - disabled_provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=False)) - - assert enabled_provider.is_enabled() is True - assert disabled_provider.is_enabled() is False - - -def test_audio_activity_provider_clear_history() -> None: - """clear_history should reset the activity history.""" - provider = AudioActivityProvider(RmsLevelProvider(), _settings()) - active = np.ones(10, dtype=np.float32) - - provider.update(active, timestamp=1.0) - provider.update(active, timestamp=2.0) - provider.update(active, timestamp=3.0) - - provider.clear_history() - - # After clearing, signal should be None due to insufficient samples - assert provider.get_signal() is None - - -def test_audio_activity_provider_insufficient_samples() -> None: - """Provider should return None when history has fewer than min_samples.""" - provider = AudioActivityProvider(RmsLevelProvider(), _settings(min_samples=5)) - active = np.ones(10, dtype=np.float32) - - # Add only 3 samples (less than min_samples=5) - provider.update(active, timestamp=1.0) - provider.update(active, timestamp=2.0) - provider.update(active, timestamp=3.0) - - assert provider.get_signal() is None - - -def test_audio_activity_provider_below_activity_ratio() -> None: - """Provider should return None when active ratio < min_active_ratio.""" - provider = AudioActivityProvider(RmsLevelProvider(), _settings(min_active_ratio=0.7)) - active = np.ones(10, dtype=np.float32) - inactive = np.zeros(10, dtype=np.float32) - - # Add 3 active, 7 inactive = 30% active ratio (below 70% threshold) - provider.update(active, timestamp=1.0) - provider.update(active, timestamp=2.0) - provider.update(active, timestamp=3.0) - provider.update(inactive, timestamp=4.0) - provider.update(inactive, timestamp=5.0) - provider.update(inactive, timestamp=6.0) - provider.update(inactive, timestamp=7.0) - provider.update(inactive, timestamp=8.0) - provider.update(inactive, timestamp=9.0) - provider.update(inactive, timestamp=10.0) - - assert provider.get_signal() is None - - -def test_audio_activity_provider_boundary_activity_ratio( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Provider should emit signal when ratio exactly equals min_active_ratio.""" - provider = AudioActivityProvider( - RmsLevelProvider(), - _settings(min_active_ratio=0.6, min_samples=5, max_history=10), - ) - active = np.ones(10, dtype=np.float32) - inactive = np.zeros(10, dtype=np.float32) - - # Add 6 active, 4 inactive = 60% active ratio (exactly at threshold) - provider.update(active, timestamp=1.0) - provider.update(active, timestamp=2.0) - provider.update(active, timestamp=3.0) - provider.update(active, timestamp=4.0) - provider.update(active, timestamp=5.0) - provider.update(active, timestamp=6.0) - provider.update(inactive, timestamp=7.0) - provider.update(inactive, timestamp=8.0) - provider.update(inactive, timestamp=9.0) - provider.update(inactive, timestamp=10.0) - - monkeypatch.setattr(time, "monotonic", lambda: 11.0) - signal = provider.get_signal() - - assert signal is not None - assert signal.weight == pytest.approx(0.3) -```` - -## File: tests/infrastructure/triggers/test_foreground_app.py -````python -"""Tests for foreground app trigger provider.""" - -from __future__ import annotations - -import sys -import types - -import pytest - -from noteflow.domain.triggers.entities import TriggerSource -from noteflow.infrastructure.triggers.foreground_app import ( - ForegroundAppProvider, - ForegroundAppSettings, -) - - -class DummyWindow: - """Mock window object for pywinctl tests.""" - - def __init__(self, title: str | None) -> None: - self.title = title - - -def _install_pywinctl(monkeypatch: pytest.MonkeyPatch, title: str | None) -> None: - """Install mocked pywinctl with specified window title.""" - window = DummyWindow(title) if title is not None else None - module = types.SimpleNamespace(getActiveWindow=lambda: window) - monkeypatch.setitem(sys.modules, "pywinctl", module) - - -def _settings(**overrides: object) -> ForegroundAppSettings: - """Create ForegroundAppSettings with defaults and overrides.""" - defaults: dict[str, object] = { - "enabled": True, - "weight": 0.4, - "meeting_apps": {"zoom"}, - "suppressed_apps": set(), - } | overrides - return ForegroundAppSettings(**defaults) - - -# --- Existing Tests --- - - -def test_foreground_app_provider_emits_signal(monkeypatch: pytest.MonkeyPatch) -> None: - """Provider emits signal when a meeting app is in foreground.""" - _install_pywinctl(monkeypatch, "Zoom Meeting") - provider = ForegroundAppProvider(_settings()) - - signal = provider.get_signal() - - assert signal is not None - assert signal.weight == pytest.approx(0.4) - assert signal.app_name == "Zoom Meeting" - - -def test_foreground_app_provider_suppressed(monkeypatch: pytest.MonkeyPatch) -> None: - """Suppressed apps should not emit signals.""" - _install_pywinctl(monkeypatch, "Zoom Meeting") - provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom"})) - - assert provider.get_signal() is None - - -def test_foreground_app_provider_unavailable(monkeypatch: pytest.MonkeyPatch) -> None: - """Unavailable provider should report disabled.""" - provider = ForegroundAppProvider(_settings()) - monkeypatch.setattr(provider, "_is_available", lambda: False) - - assert provider.is_enabled() is False - - -# --- New Tests --- - - -def test_foreground_app_provider_source_property() -> None: - """Provider source should be FOREGROUND_APP.""" - provider = ForegroundAppProvider(_settings()) - assert provider.source == TriggerSource.FOREGROUND_APP - - -def test_foreground_app_provider_max_weight_property() -> None: - """Provider max_weight should reflect configured weight.""" - provider = ForegroundAppProvider(_settings(weight=0.5)) - assert provider.max_weight == pytest.approx(0.5) - - -def test_foreground_app_settings_lowercases_apps() -> None: - """Settings __post_init__ should lowercase meeting_apps and suppressed_apps.""" - settings = ForegroundAppSettings( - enabled=True, - weight=0.4, - meeting_apps={"ZOOM", "Teams", "GoToMeeting"}, - suppressed_apps={"SLACK", "Discord"}, - ) - - assert "zoom" in settings.meeting_apps - assert "teams" in settings.meeting_apps - assert "gotomeeting" in settings.meeting_apps - assert "slack" in settings.suppressed_apps - assert "discord" in settings.suppressed_apps - # Original case should not be present - assert "ZOOM" not in settings.meeting_apps - assert "SLACK" not in settings.suppressed_apps - - -def test_foreground_app_provider_disabled_returns_none( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Provider should return None when enabled=False.""" - _install_pywinctl(monkeypatch, "Zoom Meeting") - provider = ForegroundAppProvider(_settings(enabled=False)) - - assert provider.get_signal() is None - - -def test_foreground_app_provider_no_window_returns_none( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Provider should return None when getActiveWindow() returns None.""" - _install_pywinctl(monkeypatch, None) - provider = ForegroundAppProvider(_settings()) - # Force availability check to succeed - provider._available = True - - assert provider.get_signal() is None - - -def test_foreground_app_provider_empty_title_returns_none( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Provider should return None when window title is empty string.""" - _install_pywinctl(monkeypatch, "") - provider = ForegroundAppProvider(_settings()) - provider._available = True - - assert provider.get_signal() is None - - -def test_foreground_app_provider_non_meeting_app_returns_none( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Provider should return None when foreground app is not a meeting app.""" - _install_pywinctl(monkeypatch, "Firefox Browser") - provider = ForegroundAppProvider(_settings(meeting_apps={"zoom", "teams"})) - provider._available = True - - assert provider.get_signal() is None - - -def test_foreground_app_provider_suppress_app() -> None: - """suppress_app should add lowercased app to suppressed_apps.""" - provider = ForegroundAppProvider(_settings(suppressed_apps=set())) - - provider.suppress_app("ZOOM") - provider.suppress_app("Teams") - - assert "zoom" in provider.suppressed_apps - assert "teams" in provider.suppressed_apps - - -def test_foreground_app_provider_unsuppress_app() -> None: - """unsuppress_app should remove app from suppressed_apps.""" - provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom", "teams"})) - - provider.unsuppress_app("zoom") - - assert "zoom" not in provider.suppressed_apps - assert "teams" in provider.suppressed_apps - - -def test_foreground_app_provider_add_meeting_app() -> None: - """add_meeting_app should add lowercased app to meeting_apps.""" - provider = ForegroundAppProvider(_settings(meeting_apps={"zoom"})) - - provider.add_meeting_app("WEBEX") - provider.add_meeting_app("RingCentral") - - assert "webex" in provider._settings.meeting_apps - assert "ringcentral" in provider._settings.meeting_apps - - -def test_foreground_app_provider_suppressed_apps_property() -> None: - """suppressed_apps property should return frozenset.""" - provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom", "teams"})) - - result = provider.suppressed_apps - - assert isinstance(result, frozenset) - assert "zoom" in result - assert "teams" in result - - -def test_foreground_app_provider_case_insensitive_matching( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """Provider should match meeting apps case-insensitively.""" - _install_pywinctl(monkeypatch, "ZOOM MEETING - Conference Room") - provider = ForegroundAppProvider(_settings(meeting_apps={"zoom"})) - provider._available = True - - signal = provider.get_signal() - - assert signal is not None - assert signal.app_name == "ZOOM MEETING - Conference Room" - - -def test_foreground_app_provider_is_enabled_when_enabled_and_available( - monkeypatch: pytest.MonkeyPatch, -) -> None: - """is_enabled should return True when both enabled and available.""" - _install_pywinctl(monkeypatch, "Some Window") - provider = ForegroundAppProvider(_settings(enabled=True)) - - assert provider.is_enabled() is True -```` - -## File: tests/infrastructure/__init__.py -````python -"""Infrastructure tests package.""" -```` - -## File: tests/infrastructure/test_converters.py -````python -"""Tests for infrastructure converters.""" - -from __future__ import annotations - -from noteflow.domain import entities -from noteflow.infrastructure.asr import dto -from noteflow.infrastructure.converters import AsrConverter, OrmConverter - - -class TestAsrConverter: - """Tests for AsrConverter.""" - - def test_word_timing_to_domain_maps_field_names(self) -> None: - """Test ASR start/end maps to domain start_time/end_time.""" - asr_word = dto.WordTiming(word="hello", start=1.5, end=2.0, probability=0.95) - - result = AsrConverter.word_timing_to_domain(asr_word) - - assert result.word == "hello" - assert result.start_time == 1.5 - assert result.end_time == 2.0 - assert result.probability == 0.95 - - def test_word_timing_to_domain_preserves_precision(self) -> None: - """Test timing values preserve floating point precision.""" - asr_word = dto.WordTiming( - word="test", - start=0.123456789, - end=0.987654321, - probability=0.999999, - ) - - result = AsrConverter.word_timing_to_domain(asr_word) - - assert result.start_time == 0.123456789 - assert result.end_time == 0.987654321 - assert result.probability == 0.999999 - - def test_word_timing_to_domain_returns_domain_type(self) -> None: - """Test converter returns domain WordTiming type.""" - asr_word = dto.WordTiming(word="test", start=1.0, end=2.0, probability=0.9) - - result = AsrConverter.word_timing_to_domain(asr_word) - - assert isinstance(result, entities.WordTiming) - - def test_result_to_domain_words_converts_all(self) -> None: - """Test batch conversion of ASR result words.""" - asr_result = dto.AsrResult( - text="hello world", - start=0.0, - end=2.0, - words=( - dto.WordTiming(word="hello", start=0.0, end=1.0, probability=0.9), - dto.WordTiming(word="world", start=1.0, end=2.0, probability=0.95), - ), - ) - - words = AsrConverter.result_to_domain_words(asr_result) - - assert len(words) == 2 - assert words[0].word == "hello" - assert words[0].start_time == 0.0 - assert words[1].word == "world" - assert words[1].start_time == 1.0 - - def test_result_to_domain_words_empty(self) -> None: - """Test conversion with empty words tuple.""" - asr_result = dto.AsrResult(text="", start=0.0, end=0.0, words=()) - - words = AsrConverter.result_to_domain_words(asr_result) - - assert words == [] - - -class TestOrmConverterToOrmKwargs: - """Tests for OrmConverter.word_timing_to_orm_kwargs.""" - - def test_converts_to_dict(self) -> None: - """Test domain to ORM kwargs conversion.""" - word = entities.WordTiming( - word="test", - start_time=1.5, - end_time=2.0, - probability=0.9, - ) - - result = OrmConverter.word_timing_to_orm_kwargs(word) - - assert result == { - "word": "test", - "start_time": 1.5, - "end_time": 2.0, - "probability": 0.9, - } - - def test_preserves_precision(self) -> None: - """Test floating point precision in kwargs.""" - word = entities.WordTiming( - word="precise", - start_time=0.123456789, - end_time=0.987654321, - probability=0.111111, - ) - - result = OrmConverter.word_timing_to_orm_kwargs(word) - - assert result["start_time"] == 0.123456789 - assert result["end_time"] == 0.987654321 - assert result["probability"] == 0.111111 -```` - -## File: tests/integration/__init__.py -````python -"""Integration tests using testcontainers.""" -```` - -## File: tests/integration/test_unit_of_work.py -````python -"""Integration tests for SqlAlchemyUnitOfWork.""" - -from __future__ import annotations - -from datetime import UTC, datetime -from typing import TYPE_CHECKING - -import pytest - -from noteflow.domain.entities import Meeting, Segment, Summary -from noteflow.domain.value_objects import MeetingState -from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork - -if TYPE_CHECKING: - from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker - - -@pytest.mark.integration -class TestUnitOfWork: - """Integration tests for SqlAlchemyUnitOfWork.""" - - async def test_uow_context_manager( - self, session_factory: async_sessionmaker[AsyncSession] - ) -> None: - """Test UoW works as async context manager.""" - async with SqlAlchemyUnitOfWork(session_factory) as uow: - assert uow.meetings is not None - assert uow.segments is not None - assert uow.summaries is not None - - async def test_uow_commit(self, session_factory: async_sessionmaker[AsyncSession]) -> None: - """Test UoW commit persists changes.""" - meeting = Meeting.create(title="Commit Test") - - async with SqlAlchemyUnitOfWork(session_factory) as uow: - await uow.meetings.create(meeting) - await uow.commit() - - # Verify in new UoW - async with SqlAlchemyUnitOfWork(session_factory) as uow: - retrieved = await uow.meetings.get(meeting.id) - assert retrieved is not None - assert retrieved.title == "Commit Test" - - async def test_uow_rollback(self, session_factory: async_sessionmaker[AsyncSession]) -> None: - """Test UoW rollback discards changes.""" - meeting = Meeting.create(title="Rollback Test") - - async with SqlAlchemyUnitOfWork(session_factory) as uow: - await uow.meetings.create(meeting) - await uow.rollback() - - # Verify not persisted - async with SqlAlchemyUnitOfWork(session_factory) as uow: - retrieved = await uow.meetings.get(meeting.id) - assert retrieved is None - - async def test_uow_auto_rollback_on_exception( - self, session_factory: async_sessionmaker[AsyncSession] - ) -> None: - """Test UoW auto-rollbacks on exception.""" - meeting = Meeting.create(title="Exception Test") - - with pytest.raises(ValueError, match="Test exception"): - async with SqlAlchemyUnitOfWork(session_factory) as uow: - await uow.meetings.create(meeting) - raise ValueError("Test exception") - - # Verify not persisted - async with SqlAlchemyUnitOfWork(session_factory) as uow: - retrieved = await uow.meetings.get(meeting.id) - assert retrieved is None - - async def test_uow_transactional_consistency( - self, session_factory: async_sessionmaker[AsyncSession] - ) -> None: - """Test UoW provides transactional consistency across repos.""" - meeting = Meeting.create(title="Transactional Test") - segment = Segment( - segment_id=0, - text="Hello", - start_time=0.0, - end_time=1.0, - meeting_id=meeting.id, - ) - summary = Summary( - meeting_id=meeting.id, - executive_summary="Test summary", - generated_at=datetime.now(UTC), - ) - - # Create meeting, segment, and summary in same transaction - async with SqlAlchemyUnitOfWork(session_factory) as uow: - await uow.meetings.create(meeting) - await uow.segments.add(meeting.id, segment) - await uow.summaries.save(summary) - await uow.commit() - - # Verify all persisted - async with SqlAlchemyUnitOfWork(session_factory) as uow: - m = await uow.meetings.get(meeting.id) - segs = await uow.segments.get_by_meeting(meeting.id) - s = await uow.summaries.get_by_meeting(meeting.id) - - assert m is not None - assert len(segs) == 1 - assert s is not None - - async def test_uow_repository_caching( - self, session_factory: async_sessionmaker[AsyncSession] - ) -> None: - """Test UoW caches repository instances.""" - async with SqlAlchemyUnitOfWork(session_factory) as uow: - meetings1 = uow.meetings - meetings2 = uow.meetings - assert meetings1 is meetings2 - - segments1 = uow.segments - segments2 = uow.segments - assert segments1 is segments2 - - async def test_uow_multiple_operations( - self, session_factory: async_sessionmaker[AsyncSession] - ) -> None: - """Test UoW handles multiple operations in sequence.""" - meeting = Meeting.create(title="Multi-op Test") - - async with SqlAlchemyUnitOfWork(session_factory) as uow: - # Create - await uow.meetings.create(meeting) - await uow.commit() - - # Update - meeting.start_recording() - await uow.meetings.update(meeting) - await uow.commit() - - # Add segment - segment = Segment(segment_id=0, text="Test", start_time=0.0, end_time=1.0) - await uow.segments.add(meeting.id, segment) - await uow.commit() - - # Verify final state - async with SqlAlchemyUnitOfWork(session_factory) as uow: - m = await uow.meetings.get(meeting.id) - segs = await uow.segments.get_by_meeting(meeting.id) - - assert m is not None - assert m.state == MeetingState.RECORDING - assert len(segs) == 1 -```` - -## File: tests/__init__.py -````python -"""NoteFlow test suite.""" -```` - -## File: tests/conftest.py -````python -"""Global test fixtures to mock optional extra dependencies. - -These stubs allow running the suite without installing heavy/optional packages -like openai/anthropic/ollama/pywinctl, while individual tests can still -override with more specific monkeypatches when needed. -""" - -from __future__ import annotations - -import sys -import types -from types import SimpleNamespace - -import pytest - - -@pytest.fixture(autouse=True, scope="session") -def mock_optional_extras() -> None: - """Install lightweight stubs for optional extra deps if absent.""" - - if "openai" not in sys.modules: - try: - import openai as _openai # noqa: F401 - except ImportError: - - def _default_create(**_: object) -> SimpleNamespace: - return SimpleNamespace( - choices=[SimpleNamespace(message=SimpleNamespace(content="{}"))], - usage=SimpleNamespace(total_tokens=0), - ) - - openai_module = types.ModuleType("openai") - openai_module.OpenAI = lambda **kwargs: SimpleNamespace( - chat=SimpleNamespace(completions=SimpleNamespace(create=_default_create)) - ) - sys.modules["openai"] = openai_module - - if "anthropic" not in sys.modules: - try: - import anthropic as _anthropic # noqa: F401 - except ImportError: - - def _default_messages_create(**_: object) -> SimpleNamespace: - return SimpleNamespace( - content=[SimpleNamespace(text="{}")], - usage=SimpleNamespace(input_tokens=0, output_tokens=0), - ) - - anthropic_module = types.ModuleType("anthropic") - anthropic_module.Anthropic = lambda **kwargs: SimpleNamespace( - messages=SimpleNamespace(create=_default_messages_create) - ) - sys.modules["anthropic"] = anthropic_module - - if "ollama" not in sys.modules: - try: - import ollama as _ollama # noqa: F401 - except ImportError: - - def _default_chat(**_: object) -> dict[str, object]: - return { - "message": { - "content": '{"executive_summary": "", "key_points": [], "action_items": []}' - }, - "eval_count": 0, - "prompt_eval_count": 0, - } - - ollama_module = types.ModuleType("ollama") - ollama_module.Client = lambda **kwargs: SimpleNamespace( - list=lambda: {}, chat=_default_chat - ) - sys.modules["ollama"] = ollama_module - - # pywinctl depends on pymonctl, which may fail in headless environments - # Mock both if not already present - if "pymonctl" not in sys.modules: - try: - import pymonctl as _pymonctl # noqa: F401 - except Exception: - # Mock pymonctl for headless environments (Xlib.error.DisplayNameError, etc.) - pymonctl_module = types.ModuleType("pymonctl") - pymonctl_module.getAllMonitors = lambda: [] - sys.modules["pymonctl"] = pymonctl_module - - if "pywinctl" not in sys.modules: - try: - import pywinctl as _pywinctl # noqa: F401 - except Exception: - # ImportError: package not installed - # OSError/Xlib errors: pywinctl may fail in headless environments - pywinctl_module = types.ModuleType("pywinctl") - pywinctl_module.getActiveWindow = lambda: None - pywinctl_module.getAllWindows = lambda: [] - pywinctl_module.getAllTitles = lambda: [] - sys.modules["pywinctl"] = pywinctl_module +from noteflow.grpc.proto.noteflow_pb2 import * ```` ## File: src/noteflow/application/services/meeting_service.py ````python -"""Meeting application service. - -Orchestrates meeting-related use cases with persistence. -""" - from __future__ import annotations - import logging import shutil from collections.abc import Sequence from datetime import UTC, datetime from pathlib import Path from typing import TYPE_CHECKING - from noteflow.domain.entities import ( ActionItem, Annotation, @@ -11532,64 +4026,27 @@ from noteflow.domain.entities import ( WordTiming, ) from noteflow.domain.value_objects import AnnotationId, AnnotationType - if TYPE_CHECKING: from collections.abc import Sequence as SequenceType - from noteflow.domain.ports.unit_of_work import UnitOfWork from noteflow.domain.value_objects import MeetingId, MeetingState - logger = logging.getLogger(__name__) - - class MeetingService: - """Application service for meeting operations. - - Provides use cases for managing meetings, segments, and summaries. - All methods are async and expect a UnitOfWork to be provided. - """ - def __init__(self, uow: UnitOfWork) -> None: - """Initialize the meeting service. - - Args: - uow: Unit of work for persistence. - """ self._uow = uow - async def create_meeting( self, title: str, metadata: dict[str, str] | None = None, ) -> Meeting: - """Create a new meeting. - - Args: - title: Meeting title. - metadata: Optional metadata. - - Returns: - Created meeting. - """ meeting = Meeting.create(title=title, metadata=metadata or {}) - async with self._uow: saved = await self._uow.meetings.create(meeting) await self._uow.commit() return saved - async def get_meeting(self, meeting_id: MeetingId) -> Meeting | None: - """Get a meeting by ID. - - Args: - meeting_id: Meeting identifier. - - Returns: - Meeting if found, None otherwise. - """ async with self._uow: return await self._uow.meetings.get(meeting_id) - async def list_meetings( self, states: list[MeetingState] | None = None, @@ -11597,17 +4054,6 @@ class MeetingService: offset: int = 0, sort_desc: bool = True, ) -> tuple[Sequence[Meeting], int]: - """List meetings with optional filtering. - - Args: - states: Optional list of states to filter by. - limit: Maximum number of meetings to return. - offset: Number of meetings to skip. - sort_desc: Sort by created_at descending if True. - - Returns: - Tuple of (meetings list, total count). - """ async with self._uow: return await self._uow.meetings.list_all( states=states, @@ -11615,92 +4061,43 @@ class MeetingService: offset=offset, sort_desc=sort_desc, ) - async def start_recording(self, meeting_id: MeetingId) -> Meeting | None: - """Start recording a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - Updated meeting, or None if not found. - """ async with self._uow: meeting = await self._uow.meetings.get(meeting_id) if meeting is None: return None - meeting.start_recording() await self._uow.meetings.update(meeting) await self._uow.commit() return meeting - async def stop_meeting(self, meeting_id: MeetingId) -> Meeting | None: - """Stop a meeting through graceful STOPPING state. - - Transitions: RECORDING -> STOPPING -> STOPPED - - Args: - meeting_id: Meeting identifier. - - Returns: - Updated meeting, or None if not found. - """ async with self._uow: meeting = await self._uow.meetings.get(meeting_id) if meeting is None: return None - - # Graceful shutdown: RECORDING -> STOPPING -> STOPPED meeting.begin_stopping() meeting.stop_recording() await self._uow.meetings.update(meeting) await self._uow.commit() return meeting - async def complete_meeting(self, meeting_id: MeetingId) -> Meeting | None: - """Mark a meeting as completed. - - Args: - meeting_id: Meeting identifier. - - Returns: - Updated meeting, or None if not found. - """ async with self._uow: meeting = await self._uow.meetings.get(meeting_id) if meeting is None: return None - meeting.complete() await self._uow.meetings.update(meeting) await self._uow.commit() return meeting - async def delete_meeting( self, meeting_id: MeetingId, meetings_dir: Path | None = None, ) -> bool: - """Delete meeting with complete cleanup. - - Removes: - 1. Filesystem assets (audio, manifest) if meetings_dir provided - 2. Database records (cascade deletes children) - - Args: - meeting_id: Meeting identifier. - meetings_dir: Base directory for meeting assets. - - Returns: - True if deleted, False if not found. - """ async with self._uow: meeting = await self._uow.meetings.get(meeting_id) if meeting is None: return False - - # Delete filesystem assets first (if directory provided) if meetings_dir is not None: meeting_dir = meetings_dir / str(meeting_id) if meeting_dir.exists(): @@ -11709,15 +4106,11 @@ class MeetingService: "Deleted meeting assets at %s", meeting_dir, ) - - # Delete DB record (cascade handles children) success = await self._uow.meetings.delete(meeting_id) if success: await self._uow.commit() logger.info("Deleted meeting %s", meeting_id) - return success - async def add_segment( self, meeting_id: MeetingId, @@ -11731,23 +4124,6 @@ class MeetingService: avg_logprob: float = 0.0, no_speech_prob: float = 0.0, ) -> Segment: - """Add a transcript segment to a meeting. - - Args: - meeting_id: Meeting identifier. - segment_id: Segment sequence number. - text: Transcript text. - start_time: Start time in seconds. - end_time: End time in seconds. - words: Optional word-level timing. - language: Detected language code. - language_confidence: Language detection confidence. - avg_logprob: Average log probability. - no_speech_prob: No-speech probability. - - Returns: - Added segment. - """ segment = Segment( segment_id=segment_id, text=text, @@ -11760,74 +4136,41 @@ class MeetingService: avg_logprob=avg_logprob, no_speech_prob=no_speech_prob, ) - async with self._uow: saved = await self._uow.segments.add(meeting_id, segment) await self._uow.commit() return saved - async def add_segments_batch( self, meeting_id: MeetingId, segments: Sequence[Segment], ) -> Sequence[Segment]: - """Add multiple segments in batch. - - Args: - meeting_id: Meeting identifier. - segments: Segments to add. - - Returns: - Added segments. - """ async with self._uow: saved = await self._uow.segments.add_batch(meeting_id, segments) await self._uow.commit() return saved - async def get_segments( self, meeting_id: MeetingId, include_words: bool = True, ) -> Sequence[Segment]: - """Get all segments for a meeting. - - Args: - meeting_id: Meeting identifier. - include_words: Include word-level timing. - - Returns: - List of segments ordered by segment_id. - """ async with self._uow: return await self._uow.segments.get_by_meeting( meeting_id, include_words=include_words, ) - async def search_segments( self, query_embedding: list[float], limit: int = 10, meeting_id: MeetingId | None = None, ) -> Sequence[tuple[Segment, float]]: - """Search segments by semantic similarity. - - Args: - query_embedding: Query embedding vector. - limit: Maximum number of results. - meeting_id: Optional meeting to restrict search to. - - Returns: - List of (segment, similarity_score) tuples. - """ async with self._uow: return await self._uow.segments.search_semantic( query_embedding=query_embedding, limit=limit, meeting_id=meeting_id, ) - async def save_summary( self, meeting_id: MeetingId, @@ -11836,18 +4179,6 @@ class MeetingService: action_items: list[ActionItem] | None = None, model_version: str = "", ) -> Summary: - """Save or update a meeting summary. - - Args: - meeting_id: Meeting identifier. - executive_summary: Executive summary text. - key_points: List of key points. - action_items: List of action items. - model_version: Model version that generated the summary. - - Returns: - Saved summary. - """ summary = Summary( meeting_id=meeting_id, executive_summary=executive_summary, @@ -11856,26 +4187,13 @@ class MeetingService: generated_at=datetime.now(UTC), model_version=model_version, ) - async with self._uow: saved = await self._uow.summaries.save(summary) await self._uow.commit() return saved - async def get_summary(self, meeting_id: MeetingId) -> Summary | None: - """Get summary for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - Summary if exists, None otherwise. - """ async with self._uow: return await self._uow.summaries.get_by_meeting(meeting_id) - - # Annotation methods - async def add_annotation( self, meeting_id: MeetingId, @@ -11885,21 +4203,7 @@ class MeetingService: end_time: float, segment_ids: list[int] | None = None, ) -> Annotation: - """Add an annotation to a meeting. - - Args: - meeting_id: Meeting identifier. - annotation_type: Type of annotation. - text: Annotation text. - start_time: Start time in seconds. - end_time: End time in seconds. - segment_ids: Optional list of linked segment IDs. - - Returns: - Added annotation. - """ from uuid import uuid4 - annotation = Annotation( id=AnnotationId(uuid4()), meeting_id=meeting_id, @@ -11909,84 +4213,33 @@ class MeetingService: end_time=end_time, segment_ids=segment_ids or [], ) - async with self._uow: saved = await self._uow.annotations.add(annotation) await self._uow.commit() return saved - async def get_annotation(self, annotation_id: AnnotationId) -> Annotation | None: - """Get an annotation by ID. - - Args: - annotation_id: Annotation identifier. - - Returns: - Annotation if found, None otherwise. - """ async with self._uow: return await self._uow.annotations.get(annotation_id) - async def get_annotations( self, meeting_id: MeetingId, ) -> SequenceType[Annotation]: - """Get all annotations for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - List of annotations ordered by start_time. - """ async with self._uow: return await self._uow.annotations.get_by_meeting(meeting_id) - async def get_annotations_in_range( self, meeting_id: MeetingId, start_time: float, end_time: float, ) -> SequenceType[Annotation]: - """Get annotations within a time range. - - Args: - meeting_id: Meeting identifier. - start_time: Start of time range in seconds. - end_time: End of time range in seconds. - - Returns: - List of annotations overlapping the time range. - """ async with self._uow: return await self._uow.annotations.get_by_time_range(meeting_id, start_time, end_time) - async def update_annotation(self, annotation: Annotation) -> Annotation: - """Update an existing annotation. - - Args: - annotation: Annotation with updated fields. - - Returns: - Updated annotation. - - Raises: - ValueError: If annotation does not exist. - """ async with self._uow: updated = await self._uow.annotations.update(annotation) await self._uow.commit() return updated - async def delete_annotation(self, annotation_id: AnnotationId) -> bool: - """Delete an annotation. - - Args: - annotation_id: Annotation identifier. - - Returns: - True if deleted, False if not found. - """ async with self._uow: success = await self._uow.annotations.delete(annotation_id) if success: @@ -11996,46 +4249,23 @@ class MeetingService: ## File: src/noteflow/application/services/retention_service.py ````python -"""Service for automatic meeting retention and cleanup.""" - from __future__ import annotations - import logging from collections.abc import Callable from dataclasses import dataclass from datetime import UTC, datetime, timedelta from pathlib import Path from typing import TYPE_CHECKING - if TYPE_CHECKING: from noteflow.domain.entities import Meeting from noteflow.domain.ports.unit_of_work import UnitOfWork - logger = logging.getLogger(__name__) - - @dataclass(frozen=True) class RetentionReport: - """Result of retention cleanup run. - - Attributes: - meetings_checked: Number of meetings that matched cutoff criteria. - meetings_deleted: Number of meetings successfully deleted. - errors: List of error messages for failed deletions. - """ - meetings_checked: int meetings_deleted: int errors: tuple[str, ...] - - class RetentionService: - """Manage automatic deletion of expired meetings. - - Find and delete meetings that have been completed longer than - the configured retention period. - """ - def __init__( self, uow_factory: Callable[[], UnitOfWork], @@ -12043,53 +4273,24 @@ class RetentionService: meetings_dir: Path | None = None, enabled: bool = False, ) -> None: - """Initialize retention service. - - Args: - uow_factory: Factory that returns a fresh UnitOfWork instance per call. - retention_days: Days to retain completed meetings. - meetings_dir: Base directory for meeting assets. - enabled: Whether retention is enabled. - """ self._uow_factory = uow_factory self._retention_days = retention_days self._meetings_dir = meetings_dir self._enabled = enabled - @property def is_enabled(self) -> bool: - """Check if retention is enabled.""" return self._enabled - @property def retention_days(self) -> int: - """Get configured retention days.""" return self._retention_days - @property def cutoff_date(self) -> datetime: - """Calculate cutoff date for retention.""" return datetime.now(UTC) - timedelta(days=self._retention_days) - async def find_expired_meetings(self) -> list[Meeting]: - """Find meetings older than retention period. - - Returns: - List of meetings eligible for deletion. - """ uow = self._uow_factory() async with uow: return list(await uow.meetings.find_older_than(self.cutoff_date)) - async def run_cleanup(self, dry_run: bool = False) -> RetentionReport: - """Execute retention cleanup. - - Args: - dry_run: If True, report but don't delete. - - Returns: - Report of cleanup results. - """ if not self._enabled and not dry_run: logger.info("Retention disabled, skipping cleanup") return RetentionReport( @@ -12097,18 +4298,15 @@ class RetentionService: meetings_deleted=0, errors=(), ) - cutoff = self.cutoff_date logger.info( "Running retention cleanup (dry_run=%s, cutoff=%s)", dry_run, cutoff.isoformat(), ) - expired = await self.find_expired_meetings() deleted = 0 errors: list[str] = [] - for meeting in expired: if dry_run: logger.info( @@ -12117,12 +4315,8 @@ class RetentionService: meeting.ended_at, ) continue - try: - # Import here to avoid circular imports from noteflow.application.services import MeetingService - - # Use a fresh UnitOfWork instance for each deletion meeting_svc = MeetingService(self._uow_factory()) success = await meeting_svc.delete_meeting( meeting.id, @@ -12138,14 +4332,12 @@ class RetentionService: error_msg = f"{meeting.id}: {e}" errors.append(error_msg) logger.warning("Failed to delete meeting %s: %s", meeting.id, e) - logger.info( "Retention cleanup complete: checked=%d, deleted=%d, errors=%d", len(expired), deleted, len(errors), ) - return RetentionReport( meetings_checked=len(expired), meetings_deleted=deleted, @@ -12155,60 +4347,30 @@ class RetentionService: ## File: src/noteflow/application/services/summarization_service.py ````python -"""Summarization orchestration service. - -Coordinate provider selection, consent handling, and citation verification. -""" - from __future__ import annotations - import logging from dataclasses import dataclass, field from enum import Enum from typing import TYPE_CHECKING - from noteflow.domain.summarization import ( CitationVerificationResult, ProviderUnavailableError, SummarizationRequest, SummarizationResult, ) - if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Sequence - from noteflow.domain.entities import Segment, Summary from noteflow.domain.summarization import CitationVerifier, SummarizerProvider from noteflow.domain.value_objects import MeetingId - - # Type alias for persistence callback PersistCallback = Callable[[Summary], Awaitable[None]] - logger = logging.getLogger(__name__) - - class SummarizationMode(Enum): - """Available summarization modes.""" - MOCK = "mock" - LOCAL = "local" # Ollama - CLOUD = "cloud" # OpenAI/Anthropic - - + LOCAL = "local" + CLOUD = "cloud" @dataclass class SummarizationServiceSettings: - """Configuration for summarization service. - - Attributes: - default_mode: Default summarization mode. - cloud_consent_granted: Whether user has consented to cloud processing. - fallback_to_local: Fall back to local if cloud unavailable. - verify_citations: Whether to verify citations after summarization. - filter_invalid_citations: Remove invalid citations from result. - max_key_points: Default maximum key points. - max_action_items: Default maximum action items. - """ - default_mode: SummarizationMode = SummarizationMode.LOCAL cloud_consent_granted: bool = False fallback_to_local: bool = True @@ -12216,74 +4378,31 @@ class SummarizationServiceSettings: filter_invalid_citations: bool = True max_key_points: int = 5 max_action_items: int = 10 - - @dataclass class SummarizationServiceResult: - """Result from summarization service. - - Attributes: - result: The raw summarization result from the provider. - verification: Citation verification result (if verification enabled). - filtered_summary: Summary with invalid citations removed (if filtering enabled). - provider_used: Which provider was actually used. - fallback_used: Whether a fallback provider was used. - """ - result: SummarizationResult verification: CitationVerificationResult | None = None filtered_summary: Summary | None = None provider_used: str = "" fallback_used: bool = False - @property def summary(self) -> Summary: - """Get the best available summary (filtered if available).""" return self.filtered_summary or self.result.summary - @property def has_invalid_citations(self) -> bool: - """Check if summary has invalid citations.""" return self.verification is not None and not self.verification.is_valid - - @dataclass class SummarizationService: - """Orchestrate summarization with provider selection and citation verification. - - Manages provider selection based on mode and availability, handles - cloud consent requirements, and verifies/filters citation integrity. - """ - providers: dict[SummarizationMode, SummarizerProvider] = field(default_factory=dict) verifier: CitationVerifier | None = None settings: SummarizationServiceSettings = field(default_factory=SummarizationServiceSettings) on_persist: PersistCallback | None = None - def register_provider(self, mode: SummarizationMode, provider: SummarizerProvider) -> None: - """Register a provider for a specific mode. - - Args: - mode: The mode this provider handles. - provider: The provider implementation. - """ self.providers[mode] = provider logger.debug("Registered %s provider: %s", mode.value, provider.provider_name) - def set_verifier(self, verifier: CitationVerifier) -> None: - """Set the citation verifier. - - Args: - verifier: Citation verifier implementation. - """ self.verifier = verifier - def get_available_modes(self) -> list[SummarizationMode]: - """Get list of currently available summarization modes. - - Returns: - List of available modes based on registered providers. - """ available = [] for mode, provider in self.providers.items(): if mode == SummarizationMode.CLOUD: @@ -12292,28 +4411,14 @@ class SummarizationService: elif provider.is_available: available.append(mode) return available - def is_mode_available(self, mode: SummarizationMode) -> bool: - """Check if a specific mode is available. - - Args: - mode: The mode to check. - - Returns: - True if mode is available. - """ return mode in self.get_available_modes() - def grant_cloud_consent(self) -> None: - """Grant consent for cloud processing.""" self.settings.cloud_consent_granted = True logger.info("Cloud consent granted") - def revoke_cloud_consent(self) -> None: - """Revoke consent for cloud processing.""" self.settings.cloud_consent_granted = False logger.info("Cloud consent revoked") - async def summarize( self, meeting_id: MeetingId, @@ -12322,26 +4427,8 @@ class SummarizationService: max_key_points: int | None = None, max_action_items: int | None = None, ) -> SummarizationServiceResult: - """Generate evidence-linked summary for meeting transcript. - - Args: - meeting_id: The meeting ID. - segments: Transcript segments to summarize. - mode: Override default mode (None uses settings default). - max_key_points: Override default max key points. - max_action_items: Override default max action items. - - Returns: - SummarizationServiceResult with summary and verification. - - Raises: - SummarizationError: If summarization fails and no fallback available. - ProviderUnavailableError: If no provider is available for the mode. - """ target_mode = mode or self.settings.default_mode fallback_used = False - - # Get provider, potentially with fallback provider, actual_mode = self._get_provider_with_fallback(target_mode) if actual_mode != target_mode: fallback_used = True @@ -12350,106 +4437,58 @@ class SummarizationService: target_mode.value, actual_mode.value, ) - - # Build request request = SummarizationRequest( meeting_id=meeting_id, segments=segments, max_key_points=max_key_points or self.settings.max_key_points, max_action_items=max_action_items or self.settings.max_action_items, ) - - # Execute summarization logger.info( "Summarizing %d segments with %s provider", len(segments), provider.provider_name, ) result = await provider.summarize(request) - - # Build service result service_result = SummarizationServiceResult( result=result, provider_used=provider.provider_name, fallback_used=fallback_used, ) - - # Verify citations if enabled if self.settings.verify_citations and self.verifier is not None: verification = self.verifier.verify_citations(result.summary, list(segments)) service_result.verification = verification - if not verification.is_valid: logger.warning( "Summary has %d invalid citations", verification.invalid_count, ) - - # Filter if enabled if self.settings.filter_invalid_citations: service_result.filtered_summary = self._filter_citations( result.summary, list(segments) ) - - # Persist summary if callback provided if self.on_persist is not None: await self.on_persist(service_result.summary) logger.debug("Summary persisted for meeting %s", meeting_id) - return service_result - def _get_provider_with_fallback( self, mode: SummarizationMode ) -> tuple[SummarizerProvider, SummarizationMode]: - """Get provider for mode, with fallback if unavailable. - - Args: - mode: Requested mode. - - Returns: - Tuple of (provider, actual_mode). - - Raises: - ProviderUnavailableError: If no provider available. - """ - # Check requested mode if mode in self.providers: provider = self.providers[mode] - - # Check cloud consent if mode == SummarizationMode.CLOUD and not self.settings.cloud_consent_granted: logger.warning("Cloud mode requested but consent not granted") if self.settings.fallback_to_local: return self._get_fallback_provider(mode) raise ProviderUnavailableError("Cloud consent not granted") - if provider.is_available: return provider, mode - - # Provider exists but unavailable if self.settings.fallback_to_local and mode != SummarizationMode.MOCK: return self._get_fallback_provider(mode) - raise ProviderUnavailableError(f"No provider available for mode: {mode.value}") - def _get_fallback_provider( self, original_mode: SummarizationMode ) -> tuple[SummarizerProvider, SummarizationMode]: - """Get fallback provider when primary unavailable. - - Fallback order: LOCAL -> MOCK - - Args: - original_mode: The mode that was unavailable. - - Returns: - Tuple of (provider, mode). - - Raises: - ProviderUnavailableError: If no fallback available. - """ fallback_order = [SummarizationMode.LOCAL, SummarizationMode.MOCK] - for fallback_mode in fallback_order: if fallback_mode == original_mode: continue @@ -12457,90 +4496,149 @@ class SummarizationService: provider = self.providers[fallback_mode] if provider.is_available: return provider, fallback_mode - raise ProviderUnavailableError("No fallback provider available") - def _filter_citations(self, summary: Summary, segments: list[Segment]) -> Summary: - """Filter invalid citations from summary. - - Args: - summary: Summary to filter. - segments: Available segments. - - Returns: - Summary with invalid citations removed. - """ if self.verifier is None: return summary - - # Use verifier's filter method if available if hasattr(self.verifier, "filter_invalid_citations"): return self.verifier.filter_invalid_citations(summary, segments) - return summary - def set_default_mode(self, mode: SummarizationMode) -> None: - """Set the default summarization mode. - - Args: - mode: New default mode. - """ self.settings.default_mode = mode logger.info("Default summarization mode set to %s", mode.value) - def set_persist_callback(self, callback: PersistCallback | None) -> None: - """Set callback for persisting summaries after generation. - - Args: - callback: Async function that persists a Summary, or None to disable. - """ self.on_persist = callback ```` +## File: src/noteflow/application/services/trigger_service.py +````python +from __future__ import annotations +import logging +import time +from dataclasses import dataclass +from typing import TYPE_CHECKING +from noteflow.domain.triggers.entities import TriggerAction, TriggerDecision, TriggerSignal +if TYPE_CHECKING: + from noteflow.domain.triggers.ports import SignalProvider +logger = logging.getLogger(__name__) +@dataclass +class TriggerServiceSettings: + enabled: bool + auto_start_enabled: bool + rate_limit_seconds: int + snooze_seconds: int + threshold_ignore: float + threshold_auto_start: float + def __post_init__(self) -> None: + if self.threshold_auto_start < self.threshold_ignore: + msg = "threshold_auto_start must be >= threshold_ignore" + raise ValueError(msg) +class TriggerService: + def __init__( + self, + providers: list[SignalProvider], + settings: TriggerServiceSettings, + ) -> None: + self._providers = providers + self._settings = settings + self._last_prompt: float | None = None + self._snoozed_until: float | None = None + @property + def is_enabled(self) -> bool: + return self._settings.enabled + @property + def is_snoozed(self) -> bool: + if self._snoozed_until is None: + return False + return time.monotonic() < self._snoozed_until + @property + def snooze_remaining_seconds(self) -> float: + if self._snoozed_until is None: + return 0.0 + remaining = self._snoozed_until - time.monotonic() + return max(0.0, remaining) + def evaluate(self) -> TriggerDecision: + now = time.monotonic() + if not self._settings.enabled: + return self._make_decision(TriggerAction.IGNORE, 0.0, ()) + if self._snoozed_until is not None and now < self._snoozed_until: + return self._make_decision(TriggerAction.IGNORE, 0.0, ()) + signals = [] + for provider in self._providers: + if not provider.is_enabled(): + continue + if signal := provider.get_signal(): + signals.append(signal) + confidence = sum(s.weight for s in signals) + action = self._determine_action(confidence, now) + if action in (TriggerAction.NOTIFY, TriggerAction.AUTO_START): + self._last_prompt = now + logger.info( + "Trigger %s: confidence=%.2f, signals=%d", + action.value, + confidence, + len(signals), + ) + return self._make_decision(action, confidence, tuple(signals)) + def _determine_action(self, confidence: float, now: float) -> TriggerAction: + if confidence < self._settings.threshold_ignore: + return TriggerAction.IGNORE + if confidence >= self._settings.threshold_auto_start and self._settings.auto_start_enabled: + return TriggerAction.AUTO_START + if self._last_prompt is not None: + elapsed = now - self._last_prompt + if elapsed < self._settings.rate_limit_seconds: + return TriggerAction.IGNORE + return TriggerAction.NOTIFY + def _make_decision( + self, + action: TriggerAction, + confidence: float, + signals: tuple[TriggerSignal, ...], + ) -> TriggerDecision: + return TriggerDecision( + action=action, + confidence=confidence, + signals=signals, + ) + def snooze(self, seconds: int | None = None) -> None: + duration = seconds if seconds is not None else self._settings.snooze_seconds + self._snoozed_until = time.monotonic() + duration + logger.info("Triggers snoozed for %d seconds", duration) + def clear_snooze(self) -> None: + if self._snoozed_until is not None: + self._snoozed_until = None + logger.info("Trigger snooze cleared") + def set_enabled(self, enabled: bool) -> None: + self._settings.enabled = enabled + logger.info("Triggers %s", "enabled" if enabled else "disabled") + def set_auto_start(self, enabled: bool) -> None: + self._settings.auto_start_enabled = enabled + logger.info("Auto-start %s", "enabled" if enabled else "disabled") +```` + ## File: src/noteflow/cli/retention.py ````python -"""CLI command for retention cleanup. - -Usage: - python -m noteflow.cli.retention cleanup [--dry-run] - python -m noteflow.cli.retention status -""" - from __future__ import annotations - import argparse import asyncio import logging import sys - from noteflow.application.services import RetentionService from noteflow.config.settings import get_settings from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork - logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) - - async def _run_cleanup(dry_run: bool) -> int: - """Execute retention cleanup. - - Args: - dry_run: If True, report but don't delete. - - Returns: - Exit code (0 for success, 1 for errors). - """ settings = get_settings() - if not settings.retention_enabled and not dry_run: logger.warning( "Retention is disabled. Set NOTEFLOW_RETENTION_ENABLED=true or use --dry-run" ) return 1 - uow_factory = SqlAlchemyUnitOfWork.factory_from_settings(settings) service = RetentionService( uow_factory=uow_factory, @@ -12548,37 +4646,24 @@ async def _run_cleanup(dry_run: bool) -> int: meetings_dir=settings.meetings_dir, enabled=settings.retention_enabled, ) - logger.info( "Running retention cleanup (dry_run=%s, retention_days=%d, cutoff=%s)", dry_run, service.retention_days, service.cutoff_date.isoformat(), ) - report = await service.run_cleanup(dry_run=dry_run) - print("\nRetention Cleanup Report:") print(f" Meetings checked: {report.meetings_checked}") print(f" Meetings deleted: {report.meetings_deleted}") - if report.errors: print(f" Errors: {len(report.errors)}") for err in report.errors: print(f" - {err}") return 1 - return 0 - - async def _show_status() -> int: - """Show retention status and pending deletions. - - Returns: - Exit code (always 0). - """ settings = get_settings() - uow_factory = SqlAlchemyUnitOfWork.factory_from_settings(settings) service = RetentionService( uow_factory=uow_factory, @@ -12586,52 +4671,37 @@ async def _show_status() -> int: meetings_dir=settings.meetings_dir, enabled=settings.retention_enabled, ) - expired = await service.find_expired_meetings() - print("\nRetention Status:") print(f" Enabled: {settings.retention_enabled}") print(f" Retention days: {settings.retention_days}") print(f" Check interval: {settings.retention_check_interval_hours} hours") print(f" Cutoff date: {service.cutoff_date.isoformat()}") print(f" Meetings pending deletion: {len(expired)}") - if expired: print("\n Pending deletions:") - for meeting in expired[:10]: # Show first 10 + for meeting in expired[:10]: print(f" - {meeting.id}: {meeting.title} (ended: {meeting.ended_at})") if len(expired) > 10: print(f" ... and {len(expired) - 10} more") - return 0 - - def main() -> None: - """Entry point for retention CLI.""" parser = argparse.ArgumentParser( description="NoteFlow meeting retention management", formatter_class=argparse.RawDescriptionHelpFormatter, ) - subparsers = parser.add_subparsers(dest="command", help="Available commands") - - # cleanup command cleanup_parser = subparsers.add_parser("cleanup", help="Run retention cleanup") cleanup_parser.add_argument( "--dry-run", action="store_true", help="Report what would be deleted without deleting", ) - - # status command subparsers.add_parser("status", help="Show retention status") - args = parser.parse_args() - if not args.command: parser.print_help() sys.exit(1) - if args.command == "cleanup": exit_code = asyncio.run(_run_cleanup(dry_run=args.dry_run)) elif args.command == "status": @@ -12639,46 +4709,21 @@ def main() -> None: else: parser.print_help() exit_code = 1 - sys.exit(exit_code) - - if __name__ == "__main__": main() ```` ## File: src/noteflow/client/components/_async_mixin.py ````python -"""Mixin for async operations with loading/error state management. - -Provides standardized handling for UI components that perform async operations, -including loading state, error handling, and UI thread dispatch. -""" - from __future__ import annotations - from collections.abc import Awaitable, Callable from typing import TYPE_CHECKING, TypeVar - if TYPE_CHECKING: import flet as ft - - T = TypeVar("T") - - class AsyncOperationMixin[T]: - """Mixin providing standardized async operation handling. - - Manages loading state, error handling, and UI thread dispatch for - Flet components that perform async operations. - - Components using this mixin must have: - - `_page: ft.Page | None` attribute for UI updates - """ - _page: ft.Page | None - async def run_async_operation( self, operation: Callable[[], Awaitable[T]], @@ -12686,146 +4731,78 @@ class AsyncOperationMixin[T]: on_error: Callable[[str], None], set_loading: Callable[[bool], None], ) -> T | None: - """Run async operation with standardized state management. - - Handles loading state, error catching, and UI thread dispatch. - All callbacks are dispatched to the UI thread. - - Args: - operation: Async callable to execute. - on_success: Callback with result on success (called on UI thread). - on_error: Callback with error message on failure (called on UI thread). - set_loading: Callback to set loading state (called on UI thread). - - Returns: - Result of operation on success, None on failure. - """ self._dispatch_ui(lambda: set_loading(True)) try: result = await operation() - # Capture result for closure - self._dispatch_ui(lambda r=result: on_success(r)) # type: ignore[misc] + self._dispatch_ui(lambda r=result: on_success(r)) return result except Exception as e: error_msg = str(e) - self._dispatch_ui(lambda msg=error_msg: on_error(msg)) # type: ignore[misc] + self._dispatch_ui(lambda msg=error_msg: on_error(msg)) return None finally: self._dispatch_ui(lambda: set_loading(False)) - def _dispatch_ui(self, callback: Callable[[], None]) -> None: - """Dispatch callback to UI thread. - - Safe to call even if page is None (no-op in that case). - - Args: - callback: Function to execute on UI thread. - """ if not self._page: return - async def _runner() -> None: callback() - - # Flet expects a coroutine function here; schedule it. self._page.run_task(_runner) ```` ## File: src/noteflow/client/components/annotation_display.py ````python -"""Annotation display component for meeting review. - -Display existing annotations during meeting review with type badges and clickable timestamps. -Reuses patterns from MeetingLibraryComponent (ListView) and SummaryPanelComponent (type badges). -""" - from __future__ import annotations - import logging from collections.abc import Callable from typing import TYPE_CHECKING - import flet as ft - -# REUSE existing formatting utility from noteflow.infrastructure.export._formatting import format_timestamp - if TYPE_CHECKING: from noteflow.client.state import AppState from noteflow.grpc.client import AnnotationInfo - logger = logging.getLogger(__name__) - -# Annotation type colors (reused pattern from summary_panel.py) ANNOTATION_TYPE_COLORS: dict[str, str] = { "action_item": ft.Colors.GREEN_400, "decision": ft.Colors.BLUE_400, "note": ft.Colors.GREY_400, "risk": ft.Colors.ORANGE_400, } - ANNOTATION_TYPE_ICONS: dict[str, str] = { "action_item": ft.Icons.CHECK_CIRCLE_OUTLINE, "decision": ft.Icons.GAVEL, "note": ft.Icons.NOTE, "risk": ft.Icons.WARNING, } - ANNOTATION_TYPE_LABELS: dict[str, str] = { "action_item": "Action", "decision": "Decision", "note": "Note", "risk": "Risk", } - - class AnnotationDisplayComponent: - """Display existing annotations during meeting review. - - Shows annotations sorted by start_time with type badges and clickable timestamps. - Reuses ListView pattern from MeetingLibraryComponent. - """ - def __init__( self, state: AppState, on_annotation_seek: Callable[[float], None] | None = None, ) -> None: - """Initialize annotation display. - - Args: - state: Centralized application state. - on_annotation_seek: Callback when annotation is clicked (seek to timestamp). - """ self._state = state self._on_annotation_seek = on_annotation_seek - - # UI elements self._list_view: ft.ListView | None = None self._header_text: ft.Text | None = None self._container: ft.Container | None = None - - # State self._annotations: list[AnnotationInfo] = [] - def build(self) -> ft.Container: - """Build annotation display UI. - - Returns: - Container with annotation list. - """ self._header_text = ft.Text( "Annotations (0)", size=14, weight=ft.FontWeight.BOLD, ) - self._list_view = ft.ListView( spacing=5, padding=10, height=150, ) - self._container = ft.Container( content=ft.Column( [ @@ -12838,22 +4815,13 @@ class AnnotationDisplayComponent: ], spacing=5, ), - visible=False, # Hidden until annotations loaded + visible=False, ) return self._container - def load_annotations(self, annotations: list[AnnotationInfo]) -> None: - """Load and display annotations. - - Args: - annotations: List of annotations to display. - """ - # Sort by start_time self._annotations = sorted(annotations, key=lambda a: a.start_time) self._state.run_on_ui_thread(self._render_annotations) - def clear(self) -> None: - """Clear all annotations.""" self._annotations = [] if self._list_view: self._list_view.controls.clear() @@ -12862,43 +4830,22 @@ class AnnotationDisplayComponent: if self._container: self._container.visible = False self._state.request_update() - def _render_annotations(self) -> None: - """Render annotation list (UI thread only).""" if not self._list_view or not self._header_text or not self._container: return - self._list_view.controls.clear() - for annotation in self._annotations: self._list_view.controls.append(self._create_annotation_row(annotation)) - - # Update header and visibility count = len(self._annotations) self._header_text.value = f"Annotations ({count})" self._container.visible = count > 0 - self._state.request_update() - def _create_annotation_row(self, annotation: AnnotationInfo) -> ft.Container: - """Create a row for an annotation. - - Args: - annotation: Annotation to display. - - Returns: - Container with annotation details. - """ - # Get type styling atype = annotation.annotation_type color = ANNOTATION_TYPE_COLORS.get(atype, ft.Colors.GREY_400) icon = ANNOTATION_TYPE_ICONS.get(atype, ft.Icons.NOTE) label = ANNOTATION_TYPE_LABELS.get(atype, atype.title()) - - # Format timestamp time_str = format_timestamp(annotation.start_time) - - # Type badge badge = ft.Container( content=ft.Row( [ @@ -12907,15 +4854,12 @@ class AnnotationDisplayComponent: ], spacing=2, ), - bgcolor=f"{color}20", # 20% opacity background + bgcolor=f"{color}20", padding=ft.padding.symmetric(horizontal=6, vertical=2), border_radius=4, ) - - # Annotation text (truncated if long) text = annotation.text display_text = f"{text[:80]}..." if len(text) > 80 else text - row = ft.Row( [ badge, @@ -12924,7 +4868,6 @@ class AnnotationDisplayComponent: ], spacing=10, ) - return ft.Container( content=row, padding=8, @@ -12932,13 +4875,7 @@ class AnnotationDisplayComponent: on_click=lambda e, a=annotation: self._on_annotation_click(a), ink=True, ) - def _on_annotation_click(self, annotation: AnnotationInfo) -> None: - """Handle annotation row click. - - Args: - annotation: Clicked annotation. - """ if self._on_annotation_seek: self._on_annotation_seek(annotation.start_time) logger.debug( @@ -12950,65 +4887,32 @@ class AnnotationDisplayComponent: ## File: src/noteflow/client/components/annotation_toolbar.py ````python -"""Annotation toolbar component for adding action items, decisions, and notes. - -Uses AnnotationInfo from grpc.client and NoteFlowClient.add_annotation(). -Does not recreate any types - imports and uses existing ones. -""" - from __future__ import annotations - import logging from collections.abc import Callable from typing import TYPE_CHECKING - import flet as ft - if TYPE_CHECKING: from noteflow.client.state import AppState from noteflow.grpc.client import NoteFlowClient - logger = logging.getLogger(__name__) - - class AnnotationToolbarComponent: - """Toolbar for adding annotations during recording or playback. - - Uses NoteFlowClient.add_annotation() to persist annotations. - """ - def __init__( self, state: AppState, get_client: Callable[[], NoteFlowClient | None], ) -> None: - """Initialize annotation toolbar. - - Args: - state: Centralized application state. - get_client: Callable that returns current gRPC client or None. - """ self._state = state self._get_client = get_client - - # UI elements self._action_btn: ft.ElevatedButton | None = None self._decision_btn: ft.ElevatedButton | None = None self._note_btn: ft.ElevatedButton | None = None self._risk_btn: ft.ElevatedButton | None = None self._row: ft.Row | None = None - - # Dialog elements self._dialog: ft.AlertDialog | None = None self._text_field: ft.TextField | None = None self._current_annotation_type: str = "" - def build(self) -> ft.Row: - """Build annotation toolbar UI. - - Returns: - Row containing annotation buttons. - """ self._action_btn = ft.ElevatedButton( "Action Item", icon=ft.Icons.CHECK_CIRCLE_OUTLINE, @@ -13033,19 +4937,12 @@ class AnnotationToolbarComponent: on_click=lambda e: self._show_annotation_dialog("risk"), disabled=True, ) - self._row = ft.Row( [self._action_btn, self._decision_btn, self._note_btn, self._risk_btn], visible=False, ) return self._row - def set_enabled(self, enabled: bool) -> None: - """Enable or disable annotation buttons. - - Args: - enabled: Whether buttons should be enabled. - """ if self._action_btn: self._action_btn.disabled = not enabled if self._decision_btn: @@ -13055,28 +4952,13 @@ class AnnotationToolbarComponent: if self._risk_btn: self._risk_btn.disabled = not enabled self._state.request_update() - def set_visible(self, visible: bool) -> None: - """Set visibility of annotation toolbar. - - Args: - visible: Whether toolbar should be visible. - """ if self._row: self._row.visible = visible self._state.request_update() - def _show_annotation_dialog(self, annotation_type: str) -> None: - """Show dialog for entering annotation text. - - Args: - annotation_type: Type of annotation (action_item, decision, note). - """ self._current_annotation_type = annotation_type - - # Format type for display type_display = annotation_type.replace("_", " ").title() - self._text_field = ft.TextField( label=f"{type_display} Text", multiline=True, @@ -13085,7 +4967,6 @@ class AnnotationToolbarComponent: width=400, autofocus=True, ) - self._dialog = ft.AlertDialog( title=ft.Text(f"Add {type_display}"), content=self._text_field, @@ -13095,51 +4976,37 @@ class AnnotationToolbarComponent: ], actions_alignment=ft.MainAxisAlignment.END, ) - - # Show dialog if self._state._page: self._state._page.dialog = self._dialog self._dialog.open = True self._state.request_update() - def _close_dialog(self, e: ft.ControlEvent | None = None) -> None: - """Close the annotation dialog.""" if self._dialog: self._dialog.open = False self._state.request_update() - def _submit_annotation(self, e: ft.ControlEvent) -> None: - """Submit the annotation to the server.""" if not self._text_field: return - text = self._text_field.value or "" if not text.strip(): return - self._close_dialog() - - # Get current timestamp timestamp = self._get_current_timestamp() - - # Submit to server client = self._get_client() if not client: logger.warning("No gRPC client available for annotation") return - meeting = self._state.current_meeting if not meeting: logger.warning("No current meeting for annotation") return - try: if annotation := client.add_annotation( meeting_id=meeting.id, annotation_type=self._current_annotation_type, text=text.strip(), start_time=timestamp, - end_time=timestamp, # Point annotation + end_time=timestamp, ): self._state.annotations.append(annotation) logger.info( @@ -13149,325 +5016,655 @@ class AnnotationToolbarComponent: logger.error("Failed to add annotation") except Exception as exc: logger.error("Error adding annotation: %s", exc) - def _get_current_timestamp(self) -> float: - """Get current timestamp for annotation. - - Returns timestamp from playback position (during playback) or - recording elapsed time (during recording). - - Returns: - Current timestamp in seconds. - """ - # During playback, use playback position if self._state.playback_position > 0: return self._state.playback_position - - # During recording, use elapsed seconds return float(self._state.elapsed_seconds) ```` -## File: src/noteflow/client/components/playback_controls.py +## File: src/noteflow/client/components/meeting_library.py ````python -"""Playback controls component with play/pause/stop and timeline. - -Uses SoundDevicePlayback from infrastructure.audio and format_timestamp from _formatting. -Does not recreate any types - imports and uses existing ones. -""" - from __future__ import annotations - import logging +import threading +import time from collections.abc import Callable -from typing import TYPE_CHECKING, Final - +from datetime import datetime +from typing import TYPE_CHECKING import flet as ft - -from noteflow.client.components._thread_mixin import BackgroundWorkerMixin - -# REUSE existing types - do not recreate -from noteflow.infrastructure.audio import PlaybackState -from noteflow.infrastructure.export._formatting import format_timestamp - +from noteflow.infrastructure.export._formatting import format_datetime if TYPE_CHECKING: from noteflow.client.state import AppState - + from noteflow.grpc.client import MeetingInfo, NoteFlowClient logger = logging.getLogger(__name__) - -POSITION_POLL_INTERVAL: Final[float] = 0.1 # 100ms for smooth timeline updates - - -class PlaybackControlsComponent(BackgroundWorkerMixin): - """Audio playback controls with play/pause/stop and timeline. - - Uses SoundDevicePlayback from state and format_timestamp from _formatting. - """ - +class MeetingLibraryComponent: + DIARIZATION_POLL_INTERVAL_SECONDS: float = 2.0 def __init__( self, state: AppState, - on_position_change: Callable[[float], None] | None = None, + get_client: Callable[[], NoteFlowClient | None], + on_meeting_selected: Callable[[MeetingInfo], None] | None = None, ) -> None: - """Initialize playback controls component. - - Args: - state: Centralized application state. - on_position_change: Callback when playback position changes. - """ self._state = state - self._on_position_change = on_position_change - self._init_worker() - - # UI elements - self._play_btn: ft.IconButton | None = None - self._stop_btn: ft.IconButton | None = None - self._position_label: ft.Text | None = None - self._duration_label: ft.Text | None = None - self._timeline_slider: ft.Slider | None = None - self._row: ft.Row | None = None - - def build(self) -> ft.Row: - """Build playback controls UI. - - Returns: - Row containing playback buttons and timeline. - """ - self._play_btn = ft.IconButton( - icon=ft.Icons.PLAY_ARROW, - icon_color=ft.Colors.GREEN, - tooltip="Play", - on_click=self._on_play_click, - disabled=True, - ) - self._stop_btn = ft.IconButton( - icon=ft.Icons.STOP, - icon_color=ft.Colors.RED, - tooltip="Stop", - on_click=self._on_stop_click, - disabled=True, - ) - self._position_label = ft.Text("00:00", size=12, width=50) - self._duration_label = ft.Text("00:00", size=12, width=50) - self._timeline_slider = ft.Slider( - min=0, - max=100, - value=0, + self._get_client = get_client + self._on_meeting_selected = on_meeting_selected + self._search_field: ft.TextField | None = None + self._list_view: ft.ListView | None = None + self._export_btn: ft.ElevatedButton | None = None + self._analyze_btn: ft.ElevatedButton | None = None + self._rename_btn: ft.ElevatedButton | None = None + self._refresh_btn: ft.IconButton | None = None + self._column: ft.Column | None = None + self._export_dialog: ft.AlertDialog | None = None + self._format_dropdown: ft.Dropdown | None = None + self._analyze_dialog: ft.AlertDialog | None = None + self._num_speakers_field: ft.TextField | None = None + self._rename_dialog: ft.AlertDialog | None = None + self._rename_fields: dict[str, ft.TextField] = {} + def build(self) -> ft.Column: + self._search_field = ft.TextField( + label="Search meetings", + prefix_icon=ft.Icons.SEARCH, + on_change=self._on_search_change, expand=True, - on_change=self._on_slider_change, + ) + self._refresh_btn = ft.IconButton( + icon=ft.Icons.REFRESH, + tooltip="Refresh meetings", + on_click=self._on_refresh_click, + ) + self._export_btn = ft.ElevatedButton( + "Export", + icon=ft.Icons.DOWNLOAD, + on_click=self._show_export_dialog, disabled=True, ) - - self._row = ft.Row( - [ - self._play_btn, - self._stop_btn, - self._position_label, - self._timeline_slider, - self._duration_label, - ], - visible=False, + self._analyze_btn = ft.ElevatedButton( + "Refine Speakers", + icon=ft.Icons.RECORD_VOICE_OVER, + on_click=self._show_analyze_dialog, + disabled=True, ) - return self._row - - def set_visible(self, visible: bool) -> None: - """Set visibility of playback controls. - - Args: - visible: Whether controls should be visible. - """ - if self._row: - self._row.visible = visible - self._state.request_update() - - def load_audio(self) -> None: - """Load session audio buffer for playback.""" - buffer = self._state.session_audio_buffer - if not buffer: - logger.warning("No audio in session buffer") + self._rename_btn = ft.ElevatedButton( + "Rename Speakers", + icon=ft.Icons.EDIT, + on_click=self._show_rename_dialog, + disabled=True, + ) + self._list_view = ft.ListView( + spacing=5, + padding=10, + height=200, + ) + self._column = ft.Column( + [ + ft.Row([self._search_field, self._refresh_btn]), + ft.Container( + content=self._list_view, + border=ft.border.all(1, ft.Colors.GREY_400), + border_radius=8, + ), + ft.Row( + [self._analyze_btn, self._rename_btn, self._export_btn], + alignment=ft.MainAxisAlignment.END, + spacing=10, + ), + ], + spacing=10, + ) + return self._column + def refresh_meetings(self) -> None: + client = self._get_client() + if not client: + logger.warning("No gRPC client available") return - - # Play through SoundDevicePlayback - self._state.playback.play(buffer) - self._state.playback.pause() # Load but don't start - - # Update UI state - duration = self._state.playback.total_duration - self._state.playback_position = 0.0 - - self._state.run_on_ui_thread(lambda: self._update_loaded_state(duration)) - - def _update_loaded_state(self, duration: float) -> None: - """Update UI after audio is loaded (UI thread only).""" - if self._play_btn: - self._play_btn.disabled = False - if self._stop_btn: - self._stop_btn.disabled = False - if self._timeline_slider: - self._timeline_slider.disabled = False - self._timeline_slider.max = max(duration, 0.1) - self._timeline_slider.value = 0 - if self._duration_label: - self._duration_label.value = format_timestamp(duration) - if self._position_label: - self._position_label.value = "00:00" - - self.set_visible(True) + try: + meetings = client.list_meetings(limit=50) + self._state.meetings = meetings + self._state.run_on_ui_thread(self._render_meetings) + except Exception as exc: + logger.error("Error fetching meetings: %s", exc) + def _on_search_change(self, e: ft.ControlEvent) -> None: + self._render_meetings() + def _on_refresh_click(self, e: ft.ControlEvent) -> None: + self.refresh_meetings() + def _render_meetings(self) -> None: + if not self._list_view: + return + self._list_view.controls.clear() + search_query = (self._search_field.value or "").lower() if self._search_field else "" + filtered_meetings = [m for m in self._state.meetings if search_query in m.title.lower()] + for meeting in filtered_meetings: + self._list_view.controls.append(self._create_meeting_row(meeting)) self._state.request_update() - - def seek(self, position: float) -> None: - """Seek to a specific position. - - Args: - position: Position in seconds. - """ - if self._state.playback.seek(position): - self._state.playback_position = position - self._state.run_on_ui_thread(self._update_position_display) - - def _on_play_click(self, e: ft.ControlEvent) -> None: - """Handle play/pause button click.""" - playback = self._state.playback - - if playback.state == PlaybackState.PLAYING: - playback.pause() - self._stop_polling() - self._update_play_button(playing=False) - elif playback.state == PlaybackState.PAUSED: - playback.resume() - self._start_polling() - self._update_play_button(playing=True) - elif buffer := self._state.session_audio_buffer: - playback.play(buffer) - self._start_polling() - self._update_play_button(playing=True) - - def _on_stop_click(self, e: ft.ControlEvent) -> None: - """Handle stop button click.""" - self._stop_polling() - self._state.playback.stop() - self._state.playback_position = 0.0 - self._update_play_button(playing=False) - self._state.run_on_ui_thread(self._update_position_display) - - def _on_slider_change(self, e: ft.ControlEvent) -> None: - """Handle timeline slider change.""" - if self._timeline_slider: - position = float(self._timeline_slider.value or 0) - self.seek(position) - - def _update_play_button(self, *, playing: bool) -> None: - """Update play button icon based on state.""" - if self._play_btn: - if playing: - self._play_btn.icon = ft.Icons.PAUSE - self._play_btn.tooltip = "Pause" + def _create_meeting_row(self, meeting: MeetingInfo) -> ft.Container: + created_dt = datetime.fromtimestamp(meeting.created_at) if meeting.created_at else None + date_str = format_datetime(created_dt) + duration = meeting.duration_seconds + duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else "--:--" + is_selected = self._state.selected_meeting and self._state.selected_meeting.id == meeting.id + row = ft.Row( + [ + ft.Column( + [ + ft.Text(meeting.title, weight=ft.FontWeight.BOLD, size=14), + ft.Text( + f"{date_str} | {meeting.state} | {meeting.segment_count} segments | {duration_str}", + size=11, + color=ft.Colors.GREY_600, + ), + ], + spacing=2, + expand=True, + ), + ] + ) + return ft.Container( + content=row, + padding=10, + border_radius=4, + bgcolor=ft.Colors.BLUE_50 if is_selected else None, + on_click=lambda e, m=meeting: self._on_meeting_click(m), + ink=True, + ) + def _on_meeting_click(self, meeting: MeetingInfo) -> None: + self._state.selected_meeting = meeting + if self._export_btn: + self._export_btn.disabled = False + if self._analyze_btn: + self._analyze_btn.disabled = not self._can_refine_speakers(meeting) + if self._rename_btn: + self._rename_btn.disabled = not self._can_refine_speakers(meeting) + self._render_meetings() + if self._on_meeting_selected: + self._on_meeting_selected(meeting) + def _show_export_dialog(self, e: ft.ControlEvent) -> None: + if not self._state.selected_meeting: + return + self._format_dropdown = ft.Dropdown( + label="Export Format", + options=[ + ft.dropdown.Option("markdown", "Markdown (.md)"), + ft.dropdown.Option("html", "HTML (.html)"), + ], + value="markdown", + width=200, + ) + self._export_dialog = ft.AlertDialog( + title=ft.Text("Export Transcript"), + content=ft.Column( + [ + ft.Text(f"Meeting: {self._state.selected_meeting.title}"), + self._format_dropdown, + ], + spacing=10, + tight=True, + ), + actions=[ + ft.TextButton("Cancel", on_click=self._close_export_dialog), + ft.ElevatedButton("Export", on_click=self._do_export), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + if self._state._page: + self._state._page.dialog = self._export_dialog + self._export_dialog.open = True + self._state.request_update() + def _close_export_dialog(self, e: ft.ControlEvent | None = None) -> None: + if self._export_dialog: + self._export_dialog.open = False + self._state.request_update() + def _do_export(self, e: ft.ControlEvent) -> None: + if not self._state.selected_meeting or not self._format_dropdown: + return + format_name = self._format_dropdown.value or "markdown" + meeting_id = self._state.selected_meeting.id + self._close_export_dialog() + client = self._get_client() + if not client: + logger.warning("No gRPC client available for export") + return + try: + if result := client.export_transcript(meeting_id, format_name): + self._save_export(result.content, result.file_extension) else: - self._play_btn.icon = ft.Icons.PLAY_ARROW - self._play_btn.tooltip = "Play" - self._state.request_update() - - def _start_polling(self) -> None: - """Start position polling thread.""" - self._start_worker(self._poll_loop, "PlaybackPositionPoll") - - def _stop_polling(self) -> None: - """Stop position polling thread.""" - self._stop_worker() - - def _poll_loop(self) -> None: - """Background polling loop for position updates.""" - while self._should_run(): - playback = self._state.playback - - if playback.state == PlaybackState.PLAYING: - position = playback.current_position - self._state.playback_position = position - self._state.run_on_ui_thread(self._update_position_display) - - # Notify callback - if self._on_position_change: + logger.error("Export failed - no result returned") + except Exception as exc: + logger.error("Error exporting transcript: %s", exc) + def _save_export(self, content: str, extension: str) -> None: + if not self._state.selected_meeting: + return + safe_title = "".join( + c if c.isalnum() or c in " -_" else "_" for c in self._state.selected_meeting.title + ) + filename = f"{safe_title}.{extension}" + if self._state._page: + def on_save(e: ft.FilePickerResultEvent) -> None: + if e.path: try: - self._on_position_change(position) - except Exception as e: - logger.error("Position change callback error: %s", e) - - elif playback.state == PlaybackState.STOPPED: - # Playback finished - update UI and stop polling - self._state.run_on_ui_thread(self._on_playback_finished) - break - - self._wait_interval(POSITION_POLL_INTERVAL) - - def _update_position_display(self) -> None: - """Update position display elements (UI thread only).""" - position = self._state.playback_position - - if self._position_label: - self._position_label.value = format_timestamp(position) - - if self._timeline_slider and not self._timeline_slider.disabled: - # Only update if user isn't dragging - self._timeline_slider.value = position - + with open(e.path, "w", encoding="utf-8") as f: + f.write(content) + logger.info("Exported to: %s", e.path) + except OSError as exc: + logger.error("Error saving export: %s", exc) + picker = ft.FilePicker(on_result=on_save) + self._state._page.overlay.append(picker) + self._state._page.update() + picker.save_file( + file_name=filename, + allowed_extensions=[extension], + ) + def _show_analyze_dialog(self, e: ft.ControlEvent) -> None: + if not self._state.selected_meeting: + return + if not self._can_refine_speakers(self._state.selected_meeting): + self._show_simple_dialog( + "Meeting still active", + ft.Text("Stop the meeting before refining speakers."), + ) + return + self._num_speakers_field = ft.TextField( + label="Number of speakers (optional)", + hint_text="Leave empty for auto-detect", + width=200, + keyboard_type=ft.KeyboardType.NUMBER, + ) + self._analyze_dialog = ft.AlertDialog( + title=ft.Text("Refine Speakers"), + content=ft.Column( + [ + ft.Text(f"Meeting: {self._state.selected_meeting.title}"), + ft.Text( + "Refine speaker labels using offline diarization.", + size=12, + color=ft.Colors.GREY_600, + ), + self._num_speakers_field, + ], + spacing=10, + tight=True, + ), + actions=[ + ft.TextButton("Cancel", on_click=self._close_analyze_dialog), + ft.ElevatedButton("Analyze", on_click=self._do_analyze), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + if self._state._page: + self._state._page.dialog = self._analyze_dialog + self._analyze_dialog.open = True + self._state.request_update() + def _close_analyze_dialog(self, e: ft.ControlEvent | None = None) -> None: + if self._analyze_dialog: + self._analyze_dialog.open = False self._state.request_update() + def _do_analyze(self, e: ft.ControlEvent) -> None: + if not self._state.selected_meeting: + return + num_speakers: int | None = None + if self._num_speakers_field and self._num_speakers_field.value: + try: + num_speakers = int(self._num_speakers_field.value) + if num_speakers < 1: + num_speakers = None + except ValueError: + logger.debug("Invalid speaker count input '%s', using auto-detection", self._num_speakers_field.value) + meeting_id = self._state.selected_meeting.id + self._close_analyze_dialog() + client = self._get_client() + if not client: + logger.warning("No gRPC client available for analysis") + return + self._show_analysis_progress("Starting...") + try: + result = client.refine_speaker_diarization(meeting_id, num_speakers) + except Exception as exc: + logger.error("Error analyzing speakers: %s", exc) + self._show_analysis_error(str(exc)) + return + if not result: + self._show_analysis_error("Analysis failed - no response from server") + return + if result.is_terminal: + if result.success: + self._show_analysis_result(result.segments_updated, result.speaker_ids) + else: + self._show_analysis_error(result.error_message or "Analysis failed") + return + if not result.job_id: + self._show_analysis_error(result.error_message or "Server did not return job ID") + return + self._show_analysis_progress(self._format_job_status(result.status)) + self._start_diarization_poll(result.job_id) + def _show_analysis_progress(self, status: str = "Refining...") -> None: + if self._analyze_btn: + self._analyze_btn.disabled = True + self._analyze_btn.text = status + self._state.request_update() + def _show_analysis_result(self, segments_updated: int, speaker_ids: list[str]) -> None: + if self._analyze_btn: + self._analyze_btn.disabled = False + self._analyze_btn.text = "Refine Speakers" + speaker_list = ", ".join(speaker_ids) if speaker_ids else "None found" + result_dialog = ft.AlertDialog( + title=ft.Text("Refinement Complete"), + content=ft.Column( + [ + ft.Text(f"Segments updated: {segments_updated}"), + ft.Text(f"Speakers found: {speaker_list}"), + ft.Text( + "Reload the meeting to see speaker labels.", + size=12, + color=ft.Colors.GREY_600, + italic=True, + ), + ], + spacing=5, + tight=True, + ), + actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))], + ) + if self._state._page: + self._state._page.dialog = result_dialog + result_dialog.open = True + self._state.request_update() + def _show_analysis_error(self, error_message: str) -> None: + if self._analyze_btn: + self._analyze_btn.disabled = False + self._analyze_btn.text = "Refine Speakers" + self._show_simple_dialog("Refinement Failed", ft.Text(error_message)) + def _close_result_dialog(self, e: ft.ControlEvent) -> None: + if self._state._page and self._state._page.dialog: + self._state._page.dialog.open = False + self._state.request_update() + def _start_diarization_poll(self, job_id: str) -> None: + page = self._state._page + if page and hasattr(page, "run_thread"): + page.run_thread(lambda: self._poll_diarization_job(job_id)) + return + threading.Thread( + target=self._poll_diarization_job, + args=(job_id,), + daemon=True, + name="diarization-poll", + ).start() + def _poll_diarization_job(self, job_id: str) -> None: + client = self._get_client() + if not client: + self._state.run_on_ui_thread( + lambda: self._show_analysis_error("No gRPC client available for polling") + ) + return + while True: + result = client.get_diarization_job_status(job_id) + if not result: + self._state.run_on_ui_thread( + lambda: self._show_analysis_error("Failed to fetch diarization status") + ) + return + if result.is_terminal: + if result.success: + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_result( + r.segments_updated, + r.speaker_ids, + ) + ) + else: + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_error( + r.error_message or "Diarization failed" + ) + ) + return + self._state.run_on_ui_thread( + lambda r=result: self._show_analysis_progress(self._format_job_status(r.status)) + ) + time.sleep(self.DIARIZATION_POLL_INTERVAL_SECONDS) + @staticmethod + def _format_job_status(status: str) -> str: + return { + "queued": "Queued...", + "running": "Refining...", + }.get(status, "Refining...") + def _show_simple_dialog(self, title: str, content: ft.Control) -> None: + dialog = ft.AlertDialog( + title=ft.Text(title), + content=content, + actions=[ft.TextButton("OK", on_click=self._close_result_dialog)], + ) + if self._state._page: + self._state._page.dialog = dialog + dialog.open = True + self._state.request_update() + def _show_rename_dialog(self, e: ft.ControlEvent) -> None: + if not self._state.selected_meeting: + return + if not self._can_refine_speakers(self._state.selected_meeting): + self._show_simple_dialog( + "Meeting still active", + ft.Text("Stop the meeting before renaming speakers."), + ) + return + client = self._get_client() + if not client: + logger.warning("No gRPC client available") + return + meeting_id = self._state.selected_meeting.id + segments = client.get_meeting_segments(meeting_id) + speaker_ids = sorted({s.speaker_id for s in segments if s.speaker_id}) + if not speaker_ids: + self._show_no_speakers_message() + return + self._rename_fields.clear() + speaker_controls: list[ft.Control] = [] + for speaker_id in speaker_ids: + field = ft.TextField( + label=f"{speaker_id}", + hint_text="Enter new name", + width=200, + ) + self._rename_fields[speaker_id] = field + speaker_controls.append( + ft.Row( + [ + ft.Text(speaker_id, width=120, size=12), + ft.Icon(ft.Icons.ARROW_RIGHT, size=16), + field, + ], + alignment=ft.MainAxisAlignment.START, + ) + ) + self._rename_dialog = ft.AlertDialog( + title=ft.Text("Rename Speakers"), + content=ft.Column( + [ + ft.Text(f"Meeting: {self._state.selected_meeting.title}"), + ft.Text( + "Enter new names for speakers (leave blank to keep current):", + size=12, + color=ft.Colors.GREY_600, + ), + ft.Divider(), + *speaker_controls, + ], + spacing=10, + scroll=ft.ScrollMode.AUTO, + height=300, + ), + actions=[ + ft.TextButton("Cancel", on_click=self._close_rename_dialog), + ft.ElevatedButton("Apply", on_click=self._do_rename), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + if self._state._page: + self._state._page.dialog = self._rename_dialog + self._rename_dialog.open = True + self._state.request_update() + def _close_rename_dialog(self, e: ft.ControlEvent | None = None) -> None: + if self._rename_dialog: + self._rename_dialog.open = False + self._state.request_update() + def _show_no_speakers_message(self) -> None: + self._show_simple_dialog( + "No Speakers Found", + ft.Text( + "This meeting has no speaker labels. " + "Run 'Refine Speakers' first to identify speakers." + ), + ) + def _do_rename(self, e: ft.ControlEvent) -> None: + if not self._state.selected_meeting: + return + client = self._get_client() + if not client: + logger.warning("No gRPC client available") + return + meeting_id = self._state.selected_meeting.id + self._close_rename_dialog() + renames: list[tuple[str, str]] = [] + for old_id, field in self._rename_fields.items(): + new_name = (field.value or "").strip() + if new_name and new_name != old_id: + renames.append((old_id, new_name)) + if not renames: + return + total_updated = 0 + errors: list[str] = [] + for old_id, new_name in renames: + try: + result = client.rename_speaker(meeting_id, old_id, new_name) + if result and result.success: + total_updated += result.segments_updated + else: + errors.append(f"{old_id}: rename failed") + except Exception as exc: + logger.error("Error renaming speaker %s: %s", old_id, exc) + errors.append(f"{old_id}: {exc}") + if errors: + self._show_rename_errors(errors) + else: + self._show_rename_success(total_updated, len(renames)) + def _show_rename_success(self, segments_updated: int, speakers_renamed: int) -> None: + success_dialog = ft.AlertDialog( + title=ft.Text("Rename Complete"), + content=ft.Column( + [ + ft.Text(f"Renamed {speakers_renamed} speaker(s)"), + ft.Text(f"Updated {segments_updated} segment(s)"), + ft.Text( + "Reload the meeting to see the new speaker names.", + size=12, + color=ft.Colors.GREY_600, + italic=True, + ), + ], + spacing=5, + tight=True, + ), + actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))], + ) + if self._state._page: + self._state._page.dialog = success_dialog + success_dialog.open = True + self._state.request_update() + def _show_rename_errors(self, errors: list[str]) -> None: + self._show_simple_dialog("Rename Errors", ft.Text("\n".join(errors))) + @staticmethod + def _can_refine_speakers(meeting: MeetingInfo) -> bool: + return meeting.state in {"stopped", "completed", "error"} +```` - def _on_playback_finished(self) -> None: - """Handle playback completion (UI thread only).""" - self._update_play_button(playing=False) - self._state.playback_position = 0.0 - self._update_position_display() +## File: src/noteflow/client/components/playback_sync.py +````python +from __future__ import annotations +import logging +from collections.abc import Callable +from typing import TYPE_CHECKING +from noteflow.infrastructure.audio import PlaybackState +if TYPE_CHECKING: + from noteflow.client.state import AppState +logger = logging.getLogger(__name__) +class PlaybackSyncController: + def __init__( + self, + state: AppState, + on_highlight_change: Callable[[int | None], None] | None = None, + ) -> None: + self._state = state + self._on_highlight_change = on_highlight_change + self._active = False + def start(self) -> None: + if self._active: + return + self._active = True + self._state.playback.add_position_callback(self._on_position_update) + logger.debug("Started playback sync controller") + def stop(self) -> None: + if not self._active: + return + self._active = False + self._state.playback.remove_position_callback(self._on_position_update) + if self._state.highlighted_segment_index is not None: + self._state.highlighted_segment_index = None + self._state.run_on_ui_thread(self._notify_highlight_change) + logger.debug("Stopped playback sync controller") + def _on_position_update(self, position: float) -> None: + if not self._active: + return + if self._state.playback.state == PlaybackState.STOPPED: + self.stop() + return + self._update_position(position) + def _update_position(self, position: float) -> None: + self._state.playback_position = position + new_index = self._state.find_segment_at_position(position) + old_index = self._state.highlighted_segment_index + if new_index != old_index: + self._state.highlighted_segment_index = new_index + self._state.run_on_ui_thread(self._notify_highlight_change) + def _notify_highlight_change(self) -> None: + if self._on_highlight_change: + try: + self._on_highlight_change(self._state.highlighted_segment_index) + except Exception as e: + logger.error("Highlight change callback error: %s", e) + self._state.request_update() + def seek_to_segment(self, segment_index: int) -> bool: + segments = self._state.transcript_segments + if not (0 <= segment_index < len(segments)): + logger.warning("Invalid segment index: %d", segment_index) + return False + playback = self._state.playback + segment = segments[segment_index] + if playback.seek(segment.start_time): + self._state.highlighted_segment_index = segment_index + self._state.playback_position = segment.start_time + self._state.run_on_ui_thread(self._notify_highlight_change) + return True + return False ```` ## File: src/noteflow/client/components/recording_timer.py ````python -"""Recording timer component with background thread. - -Uses format_timestamp() from infrastructure/export/_formatting.py (not local implementation). -""" - from __future__ import annotations - import time from typing import TYPE_CHECKING, Final - import flet as ft - from noteflow.client.components._thread_mixin import BackgroundWorkerMixin - -# REUSE existing formatting utility - do not recreate from noteflow.infrastructure.export._formatting import format_timestamp - if TYPE_CHECKING: from noteflow.client.state import AppState - TIMER_UPDATE_INTERVAL: Final[float] = 1.0 - - class RecordingTimerComponent(BackgroundWorkerMixin): - """Recording duration timer with background thread. - - Uses format_timestamp() from export._formatting (not local implementation). - """ - def __init__(self, state: AppState) -> None: - """Initialize timer component. - - Args: - state: Centralized application state. - """ self._state = state self._init_worker() - self._dot: ft.Icon | None = None self._label: ft.Text | None = None self._row: ft.Row | None = None - def build(self) -> ft.Row: - """Build timer UI elements. - - Returns: - Row containing recording dot and time label. - """ self._dot = ft.Icon( ft.Icons.FIBER_MANUAL_RECORD, color=ft.Colors.RED, @@ -13484,122 +5681,75 @@ class RecordingTimerComponent(BackgroundWorkerMixin): visible=False, ) return self._row - def start(self) -> None: - """Start the recording timer.""" self._state.recording_start_time = time.time() self._state.elapsed_seconds = 0 - if self._row: self._row.visible = True if self._label: self._label.value = "00:00" - self._start_worker(self._timer_loop, "RecordingTimer") self._state.request_update() - def stop(self) -> None: - """Stop the recording timer.""" self._stop_worker(timeout=2.0) - if self._row: self._row.visible = False - self._state.recording_start_time = None self._state.request_update() - def _timer_loop(self) -> None: - """Background timer loop.""" while self._should_run(): if self._state.recording_start_time is not None: self._state.elapsed_seconds = int(time.time() - self._state.recording_start_time) self._state.run_on_ui_thread(self._update_display) self._wait_interval(TIMER_UPDATE_INTERVAL) - def _update_display(self) -> None: - """Update timer display (UI thread only).""" if not self._label: return - - # REUSE existing format_timestamp from _formatting.py self._label.value = format_timestamp(float(self._state.elapsed_seconds)) self._state.request_update() ```` ## File: src/noteflow/client/components/summary_panel.py ````python -"""Summary panel component for evidence-linked meeting summaries. - -Uses existing patterns from MeetingLibraryComponent and TranscriptComponent. -Does not recreate any types - imports and uses existing domain entities. -""" - from __future__ import annotations - import logging from collections.abc import Callable from typing import TYPE_CHECKING from uuid import UUID - import flet as ft - if TYPE_CHECKING: from noteflow.application.services import SummarizationService from noteflow.client.state import AppState from noteflow.domain.entities import ActionItem, KeyPoint, Summary - from noteflow.domain.value_objects import MeetingId - logger = logging.getLogger(__name__) - -# Priority color mapping PRIORITY_COLORS: dict[int, str] = { - 0: ft.Colors.GREY_400, # Unspecified - 1: ft.Colors.BLUE_400, # Low - 2: ft.Colors.ORANGE_400, # Medium - 3: ft.Colors.RED_400, # High + 0: ft.Colors.GREY_400, + 1: ft.Colors.BLUE_400, + 2: ft.Colors.ORANGE_400, + 3: ft.Colors.RED_400, } - PRIORITY_LABELS: dict[int, str] = { 0: "—", 1: "Low", 2: "Med", 3: "High", } - - class SummaryPanelComponent: - """Summary panel with evidence-linked key points and action items. - - Displays executive summary, key points with citations, and action items - with priority badges. Citation chips link back to transcript segments. - """ - def __init__( self, state: AppState, get_service: Callable[[], SummarizationService | None], on_citation_click: Callable[[int], None] | None = None, ) -> None: - """Initialize summary panel. - - Args: - state: Centralized application state. - get_service: Callable to get summarization service. - on_citation_click: Callback when citation chip is clicked (segment_id). - """ self._state = state self._get_service = get_service self._on_citation_click = on_citation_click - - # Uncited drafts tracking self._show_uncited: bool = False self._original_summary: Summary | None = None self._filtered_summary: Summary | None = None self._uncited_key_points: int = 0 self._uncited_action_items: int = 0 - - # UI references (set in build) self._container: ft.Container | None = None self._summary_text: ft.Text | None = None self._key_points_list: ft.ListView | None = None @@ -13609,43 +5759,28 @@ class SummaryPanelComponent: self._error_text: ft.Text | None = None self._uncited_toggle: ft.Switch | None = None self._uncited_count_text: ft.Text | None = None - def build(self) -> ft.Container: - """Build the summary panel UI. - - Returns: - Container with summary panel content. - """ - # Executive summary section self._summary_text = ft.Text( "", size=14, selectable=True, ) - - # Key points list with citation chips self._key_points_list = ft.ListView( spacing=5, height=150, padding=5, ) - - # Action items list with priority badges self._action_items_list = ft.ListView( spacing=5, height=150, padding=5, ) - - # Generate button self._generate_btn = ft.ElevatedButton( "Generate Summary", icon=ft.Icons.AUTO_AWESOME, on_click=self._on_generate_click, disabled=True, ) - - # Loading/error states self._loading_indicator = ft.ProgressRing( visible=False, width=20, @@ -13657,8 +5792,6 @@ class SummaryPanelComponent: visible=False, size=12, ) - - # Uncited drafts toggle self._uncited_count_text = ft.Text( "", size=11, @@ -13672,14 +5805,12 @@ class SummaryPanelComponent: visible=False, scale=0.8, ) - summary_container = ft.Container( content=self._summary_text, padding=10, bgcolor=ft.Colors.GREY_100, border_radius=4, ) - self._container = ft.Container( content=ft.Column( [ @@ -13688,7 +5819,7 @@ class SummaryPanelComponent: ft.Text("Summary", size=16, weight=ft.FontWeight.BOLD), self._generate_btn, self._loading_indicator, - ft.Container(expand=True), # Spacer + ft.Container(expand=True), self._uncited_count_text, self._uncited_toggle, ], @@ -13715,55 +5846,32 @@ class SummaryPanelComponent: visible=False, ) return self._container - def set_visible(self, visible: bool) -> None: - """Set panel visibility. - - Args: - visible: Whether panel should be visible. - """ if self._container: self._container.visible = visible self._state.request_update() - def set_enabled(self, enabled: bool) -> None: - """Set generate button enabled state. - - Args: - enabled: Whether generate button should be enabled. - """ if self._generate_btn: self._generate_btn.disabled = not enabled self._state.request_update() - def _on_generate_click(self, e: ft.ControlEvent) -> None: - """Handle generate button click.""" if self._state._page: self._state._page.run_task(self._generate_summary) - async def _generate_summary(self) -> None: - """Generate summary asynchronously.""" service = self._get_service() if not service: self._show_error("Summarization service not available") return - if not self._state.current_meeting: self._show_error("No meeting selected") return - if not self._state.transcript_segments: self._show_error("No transcript segments to summarize") return - - # Convert TranscriptSegment to domain Segment segments = self._convert_segments() - self._state.summary_loading = True self._state.summary_error = None self._update_loading_state() - - # Convert meeting id string to MeetingId try: meeting_uuid = UUID(str(self._state.current_meeting.id)) except (AttributeError, ValueError) as exc: @@ -13772,25 +5880,17 @@ class SummaryPanelComponent: self._state.summary_loading = False self._state.run_on_ui_thread(self._update_loading_state) return - meeting_id = MeetingId(meeting_uuid) - try: result = await service.summarize( meeting_id=meeting_id, segments=segments, ) - # Track original and filtered summaries for toggle self._original_summary = result.result.summary self._filtered_summary = result.filtered_summary self._state.current_summary = result.summary - - # Calculate uncited counts self._calculate_uncited_counts() - self._state.run_on_ui_thread(self._render_summary) - - # Log provider info logger.info( "Summary generated by %s (fallback=%s)", result.provider_used, @@ -13804,15 +5904,8 @@ class SummaryPanelComponent: finally: self._state.summary_loading = False self._state.run_on_ui_thread(self._update_loading_state) - def _convert_segments(self) -> list: - """Convert TranscriptSegment to domain Segment for service call. - - Returns: - List of domain Segment entities. - """ from noteflow.domain.entities import Segment - segments = [] for ts in self._state.transcript_segments: seg = Segment( @@ -13824,85 +5917,49 @@ class SummaryPanelComponent: ) segments.append(seg) return segments - def _update_loading_state(self) -> None: - """Update loading indicator visibility.""" if self._loading_indicator: self._loading_indicator.visible = self._state.summary_loading if self._generate_btn: self._generate_btn.disabled = self._state.summary_loading self._state.request_update() - def _show_error(self, message: str) -> None: - """Show error message. - - Args: - message: Error message to display. - """ if self._error_text: self._error_text.value = message self._error_text.visible = True self._state.request_update() - def _clear_error(self) -> None: - """Clear error message.""" if self._error_text: self._error_text.value = "" self._error_text.visible = False self._state.request_update() - def _render_summary(self) -> None: - """Render summary content (UI thread only).""" summary = self._get_display_summary() if not summary: return - self._clear_error() - - # Update uncited toggle visibility self._update_uncited_ui() - - # Executive summary if self._summary_text: self._summary_text.value = summary.executive_summary or "No summary generated." - - # Key points if self._key_points_list: self._key_points_list.controls.clear() for idx, kp in enumerate(summary.key_points): self._key_points_list.controls.append(self._create_key_point_row(kp, idx)) - - # Action items if self._action_items_list: self._action_items_list.controls.clear() for idx, ai in enumerate(summary.action_items): self._action_items_list.controls.append(self._create_action_item_row(ai, idx)) - self._state.request_update() - def _create_key_point_row(self, kp: KeyPoint, index: int) -> ft.Container: - """Create a row for a key point. - - Args: - kp: Key point to display. - index: Index in the list. - - Returns: - Container with key point content. - """ - # Citation chips citation_chips = ft.Row( [self._create_citation_chip(sid) for sid in kp.segment_ids], spacing=4, ) - - # Evidence indicator evidence_icon = ft.Icon( ft.Icons.CHECK_CIRCLE if kp.has_evidence() else ft.Icons.HELP_OUTLINE, size=16, color=ft.Colors.GREEN_400 if kp.has_evidence() else ft.Colors.GREY_400, ) - row = ft.Row( [ ft.Text(f"{index + 1}.", size=12, color=ft.Colors.GREY_600, width=20), @@ -13913,47 +5970,28 @@ class SummaryPanelComponent: spacing=8, vertical_alignment=ft.CrossAxisAlignment.START, ) - return ft.Container( content=row, padding=ft.padding.symmetric(horizontal=8, vertical=4), border_radius=4, ) - def _create_action_item_row(self, ai: ActionItem, index: int) -> ft.Container: - """Create a row for an action item. - - Args: - ai: Action item to display. - index: Index in the list. - - Returns: - Container with action item content. - """ - # Priority badge priority_badge = self._create_priority_badge(ai.priority) - - # Assignee assignee_text = ft.Text( ai.assignee if ai.is_assigned() else "Unassigned", size=11, color=ft.Colors.BLUE_700 if ai.is_assigned() else ft.Colors.GREY_500, italic=not ai.is_assigned(), ) - - # Citation chips citation_chips = ft.Row( [self._create_citation_chip(sid) for sid in ai.segment_ids], spacing=4, ) - - # Evidence indicator evidence_icon = ft.Icon( ft.Icons.CHECK_CIRCLE if ai.has_evidence() else ft.Icons.HELP_OUTLINE, size=16, color=ft.Colors.GREEN_400 if ai.has_evidence() else ft.Colors.GREY_400, ) - row = ft.Row( [ ft.Text(f"{index + 1}.", size=12, color=ft.Colors.GREY_600, width=20), @@ -13972,22 +6010,12 @@ class SummaryPanelComponent: spacing=8, vertical_alignment=ft.CrossAxisAlignment.START, ) - return ft.Container( content=row, padding=ft.padding.symmetric(horizontal=8, vertical=4), border_radius=4, ) - def _create_priority_badge(self, priority: int) -> ft.Container: - """Create priority indicator badge. - - Args: - priority: Priority level (0-3). - - Returns: - Container with priority badge. - """ return ft.Container( content=ft.Text( PRIORITY_LABELS.get(priority, "—"), @@ -14000,16 +6028,7 @@ class SummaryPanelComponent: width=35, alignment=ft.alignment.center, ) - def _create_citation_chip(self, segment_id: int) -> ft.Container: - """Create clickable citation chip. - - Args: - segment_id: Segment ID to link to. - - Returns: - Container with citation chip. - """ return ft.Container( content=ft.Text( f"[#{segment_id}]", @@ -14022,47 +6041,29 @@ class SummaryPanelComponent: on_click=lambda _: self._handle_citation_click(segment_id), ink=True, ) - def _handle_citation_click(self, segment_id: int) -> None: - """Handle citation chip click. - - Args: - segment_id: Segment ID that was clicked. - """ if self._on_citation_click: self._on_citation_click(segment_id) - def _calculate_uncited_counts(self) -> None: - """Calculate number of uncited items filtered out.""" if not self._original_summary or not self._filtered_summary: self._uncited_key_points = 0 self._uncited_action_items = 0 return - original_kp = len(self._original_summary.key_points) filtered_kp = len(self._filtered_summary.key_points) self._uncited_key_points = original_kp - filtered_kp - original_ai = len(self._original_summary.action_items) filtered_ai = len(self._filtered_summary.action_items) self._uncited_action_items = original_ai - filtered_ai - def _has_uncited_items(self) -> bool: - """Check if any uncited items exist.""" return self._uncited_key_points > 0 or self._uncited_action_items > 0 - def _on_uncited_toggle(self, e: ft.ControlEvent) -> None: - """Handle uncited drafts toggle change.""" self._show_uncited = e.control.value self._render_summary() - def _update_uncited_ui(self) -> None: - """Update uncited toggle visibility and count text.""" has_uncited = self._has_uncited_items() - if self._uncited_toggle: self._uncited_toggle.visible = has_uncited - if self._uncited_count_text: if has_uncited: total_uncited = self._uncited_key_points + self._uncited_action_items @@ -14070,76 +6071,173 @@ class SummaryPanelComponent: self._uncited_count_text.visible = not self._show_uncited else: self._uncited_count_text.visible = False - def _get_display_summary(self) -> Summary | None: - """Get summary to display based on toggle state. - - Returns: - Original summary if showing uncited, filtered otherwise. - """ if self._show_uncited and self._original_summary: return self._original_summary return self._state.current_summary ```` -## File: src/noteflow/config/__init__.py +## File: src/noteflow/client/_trigger_mixin.py ````python -"""NoteFlow configuration module.""" - -from .constants import DEFAULT_GRPC_PORT, DEFAULT_SAMPLE_RATE, MAX_GRPC_MESSAGE_SIZE -from .settings import Settings, TriggerSettings, get_settings, get_trigger_settings - -__all__ = [ - "DEFAULT_GRPC_PORT", - "DEFAULT_SAMPLE_RATE", - "MAX_GRPC_MESSAGE_SIZE", - "Settings", - "TriggerSettings", - "get_settings", - "get_trigger_settings", -] +from __future__ import annotations +import asyncio +import logging +from typing import TYPE_CHECKING, Protocol +import flet as ft +from noteflow.application.services import TriggerService, TriggerServiceSettings +from noteflow.config.settings import TriggerSettings, get_trigger_settings +from noteflow.domain.triggers import TriggerAction, TriggerDecision +from noteflow.infrastructure.triggers import ( + AppAudioProvider, + AppAudioSettings, + CalendarProvider, + CalendarSettings, +) +from noteflow.infrastructure.triggers.calendar import parse_calendar_events +if TYPE_CHECKING: + from noteflow.client.state import AppState +logger = logging.getLogger(__name__) +class TriggerHost(Protocol): + _state: AppState + _trigger_settings: TriggerSettings | None + _trigger_service: TriggerService | None + _app_audio: AppAudioProvider | None + _calendar_provider: CalendarProvider | None + _trigger_poll_interval: float + _trigger_task: asyncio.Task | None + def _start_recording(self) -> None: + ... + def _ensure_audio_capture(self) -> bool: + ... +class TriggerMixin: + def _initialize_triggers(self: TriggerHost) -> None: + self._trigger_settings = get_trigger_settings() + self._state.trigger_enabled = self._trigger_settings.trigger_enabled + self._trigger_poll_interval = self._trigger_settings.trigger_poll_interval_seconds + meeting_apps = {app.lower() for app in self._trigger_settings.trigger_meeting_apps} + suppressed_apps = {app.lower() for app in self._trigger_settings.trigger_suppressed_apps} + app_audio_settings = AppAudioSettings( + enabled=self._trigger_settings.trigger_audio_enabled, + threshold_db=self._trigger_settings.trigger_audio_threshold_db, + window_seconds=self._trigger_settings.trigger_audio_window_seconds, + min_active_ratio=self._trigger_settings.trigger_audio_min_active_ratio, + min_samples=self._trigger_settings.trigger_audio_min_samples, + max_history=self._trigger_settings.trigger_audio_max_history, + weight=self._trigger_settings.trigger_weight_audio, + meeting_apps=meeting_apps, + suppressed_apps=suppressed_apps, + ) + calendar_settings = CalendarSettings( + enabled=self._trigger_settings.trigger_calendar_enabled, + weight=self._trigger_settings.trigger_weight_calendar, + lookahead_minutes=self._trigger_settings.trigger_calendar_lookahead_minutes, + lookbehind_minutes=self._trigger_settings.trigger_calendar_lookbehind_minutes, + events=parse_calendar_events(self._trigger_settings.trigger_calendar_events), + ) + self._app_audio = AppAudioProvider(app_audio_settings) + self._calendar_provider = CalendarProvider(calendar_settings) + self._trigger_service = TriggerService( + providers=[self._app_audio, self._calendar_provider], + settings=TriggerServiceSettings( + enabled=self._trigger_settings.trigger_enabled, + auto_start_enabled=self._trigger_settings.trigger_auto_start, + rate_limit_seconds=self._trigger_settings.trigger_rate_limit_minutes * 60, + snooze_seconds=self._trigger_settings.trigger_snooze_minutes * 60, + threshold_ignore=self._trigger_settings.trigger_confidence_ignore, + threshold_auto_start=self._trigger_settings.trigger_confidence_auto, + ), + ) + def _should_keep_capture_running(self: TriggerHost) -> bool: + return False + async def _trigger_check_loop(self: TriggerHost) -> None: + check_interval = self._trigger_poll_interval + try: + while True: + await asyncio.sleep(check_interval) + if self._state.recording or self._state.trigger_pending: + continue + if not self._state.trigger_enabled or not self._trigger_service: + continue + decision = self._trigger_service.evaluate() + self._state.trigger_decision = decision + if decision.action == TriggerAction.IGNORE: + continue + if decision.action == TriggerAction.AUTO_START: + if self._state.connected: + logger.info( + "Auto-starting recording (confidence=%.2f)", decision.confidence + ) + self._start_recording() + elif decision.action == TriggerAction.NOTIFY: + self._show_trigger_prompt(decision) + except asyncio.CancelledError: + logger.debug("Trigger loop cancelled") + raise + def _show_trigger_prompt(self: TriggerHost, decision: TriggerDecision) -> None: + self._state.trigger_pending = True + signal_desc = ", ".join(s.app_name or s.source.value for s in decision.signals) + def handle_start(_: ft.ControlEvent) -> None: + self._state.trigger_pending = False + if dialog.open: + dialog.open = False + self._state.request_update() + if self._state.connected: + self._start_recording() + def handle_snooze(_: ft.ControlEvent) -> None: + self._state.trigger_pending = False + if self._trigger_service: + self._trigger_service.snooze() + if dialog.open: + dialog.open = False + self._state.request_update() + def handle_dismiss(_: ft.ControlEvent) -> None: + self._state.trigger_pending = False + if dialog.open: + dialog.open = False + self._state.request_update() + dialog = ft.AlertDialog( + title=ft.Text("Meeting Detected"), + content=ft.Text( + "Detected: " + f"{signal_desc}\n" + f"Confidence: {decision.confidence:.0%}\n\n" + "Start recording?" + ), + actions=[ + ft.TextButton("Start", on_click=handle_start), + ft.TextButton("Snooze", on_click=handle_snooze), + ft.TextButton("Dismiss", on_click=handle_dismiss), + ], + actions_alignment=ft.MainAxisAlignment.END, + ) + if self._state._page: + self._state._page.dialog = dialog + dialog.open = True + self._state.request_update() ```` ## File: src/noteflow/domain/entities/segment.py ````python -"""Segment entity for transcript segments.""" - from __future__ import annotations - from dataclasses import dataclass, field from typing import TYPE_CHECKING - if TYPE_CHECKING: from noteflow.domain.value_objects import MeetingId - - @dataclass class WordTiming: - """Word-level timing information within a segment.""" - word: str start_time: float end_time: float probability: float - def __post_init__(self) -> None: - """Validate word timing.""" if self.end_time < self.start_time: raise ValueError( f"end_time ({self.end_time}) must be >= start_time ({self.start_time})" ) if not 0.0 <= self.probability <= 1.0: raise ValueError(f"probability must be between 0 and 1, got {self.probability}") - - @dataclass class Segment: - """Transcript segment entity. - - Represents a finalized segment of transcribed speech with optional - word-level timing information and language detection. - """ - segment_id: int text: str start_time: float @@ -14151,103 +6249,44 @@ class Segment: avg_logprob: float = 0.0 no_speech_prob: float = 0.0 embedding: list[float] | None = None - - # Speaker diarization (populated by diarization engine) speaker_id: str | None = None speaker_confidence: float = 0.0 - - # Database primary key (set after persistence) db_id: int | None = None - def __post_init__(self) -> None: - """Validate segment data.""" if self.end_time < self.start_time: raise ValueError( f"end_time ({self.end_time}) must be >= start_time ({self.start_time})" ) if self.segment_id < 0: raise ValueError(f"segment_id must be non-negative, got {self.segment_id}") - @property def duration(self) -> float: - """Segment duration in seconds.""" return self.end_time - self.start_time - @property def word_count(self) -> int: - """Number of words in segment.""" return len(self.words) if self.words else len(self.text.split()) - def has_embedding(self) -> bool: - """Check if segment has a computed embedding.""" return self.embedding is not None and len(self.embedding) > 0 ```` ## File: src/noteflow/domain/ports/repositories.py ````python -"""Repository protocol interfaces for persistence.""" - from __future__ import annotations - from collections.abc import Sequence from datetime import datetime from typing import TYPE_CHECKING, Protocol - if TYPE_CHECKING: from noteflow.domain.entities import Annotation, Meeting, Segment, Summary from noteflow.domain.value_objects import AnnotationId, MeetingId, MeetingState - - class MeetingRepository(Protocol): - """Repository protocol for Meeting aggregate operations.""" - async def create(self, meeting: Meeting) -> Meeting: - """Persist a new meeting. - - Args: - meeting: Meeting to create. - - Returns: - Created meeting with any generated fields populated. - """ ... - async def get(self, meeting_id: MeetingId) -> Meeting | None: - """Retrieve a meeting by ID. - - Args: - meeting_id: Meeting identifier. - - Returns: - Meeting if found, None otherwise. - """ ... - async def update(self, meeting: Meeting) -> Meeting: - """Update an existing meeting. - - Args: - meeting: Meeting with updated fields. - - Returns: - Updated meeting. - - Raises: - ValueError: If meeting does not exist. - """ ... - async def delete(self, meeting_id: MeetingId) -> bool: - """Delete a meeting and all associated data. - - Args: - meeting_id: Meeting identifier. - - Returns: - True if deleted, False if not found. - """ ... - async def list_all( self, states: list[MeetingState] | None = None, @@ -14255,403 +6294,97 @@ class MeetingRepository(Protocol): offset: int = 0, sort_desc: bool = True, ) -> tuple[Sequence[Meeting], int]: - """List meetings with optional filtering. - - Args: - states: Optional list of states to filter by. - limit: Maximum number of meetings to return. - offset: Number of meetings to skip. - sort_desc: Sort by created_at descending if True. - - Returns: - Tuple of (meetings list, total count matching filter). - """ ... - async def count_by_state(self, state: MeetingState) -> int: - """Count meetings in a specific state. - - Args: - state: Meeting state to count. - - Returns: - Number of meetings in the specified state. - """ ... - async def find_older_than(self, cutoff: datetime) -> Sequence[Meeting]: - """Find completed meetings older than cutoff date. - - Args: - cutoff: Cutoff datetime; meetings ended before this are returned. - - Returns: - Sequence of meetings with ended_at before cutoff. - """ ... - - class SegmentRepository(Protocol): - """Repository protocol for Segment operations.""" - async def add(self, meeting_id: MeetingId, segment: Segment) -> Segment: - """Add a segment to a meeting. - - Args: - meeting_id: Meeting identifier. - segment: Segment to add. - - Returns: - Added segment with db_id populated. - - Raises: - ValueError: If meeting does not exist. - """ ... - async def add_batch( self, meeting_id: MeetingId, segments: Sequence[Segment], ) -> Sequence[Segment]: - """Add multiple segments to a meeting in batch. - - Args: - meeting_id: Meeting identifier. - segments: Segments to add. - - Returns: - Added segments with db_ids populated. - - Raises: - ValueError: If meeting does not exist. - """ ... - async def get_by_meeting( self, meeting_id: MeetingId, include_words: bool = True, ) -> Sequence[Segment]: - """Get all segments for a meeting. - - Args: - meeting_id: Meeting identifier. - include_words: Include word-level timing. - - Returns: - List of segments ordered by segment_id. - """ ... - async def search_semantic( self, query_embedding: list[float], limit: int = 10, meeting_id: MeetingId | None = None, ) -> Sequence[tuple[Segment, float]]: - """Search segments by semantic similarity. - - Args: - query_embedding: Query embedding vector. - limit: Maximum number of results. - meeting_id: Optional meeting to restrict search to. - - Returns: - List of (segment, similarity_score) tuples. - """ ... - async def update_embedding( self, segment_db_id: int, embedding: list[float], ) -> None: - """Update the embedding for a segment. - - Args: - segment_db_id: Segment database primary key. - embedding: New embedding vector. - """ ... - - class SummaryRepository(Protocol): - """Repository protocol for Summary operations.""" - async def save(self, summary: Summary) -> Summary: - """Save or update a meeting summary. - - Args: - summary: Summary to save. - - Returns: - Saved summary with db_id populated. - """ ... - async def get_by_meeting(self, meeting_id: MeetingId) -> Summary | None: - """Get summary for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - Summary if exists, None otherwise. - """ ... - async def delete_by_meeting(self, meeting_id: MeetingId) -> bool: - """Delete summary for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - True if deleted, False if not found. - """ ... - - class AnnotationRepository(Protocol): - """Repository protocol for Annotation operations.""" - async def add(self, annotation: Annotation) -> Annotation: - """Add an annotation to a meeting. - - Args: - annotation: Annotation to add. - - Returns: - Added annotation with db_id populated. - - Raises: - ValueError: If meeting does not exist. - """ ... - async def get(self, annotation_id: AnnotationId) -> Annotation | None: - """Retrieve an annotation by ID. - - Args: - annotation_id: Annotation identifier. - - Returns: - Annotation if found, None otherwise. - """ ... - async def get_by_meeting( self, meeting_id: MeetingId, ) -> Sequence[Annotation]: - """Get all annotations for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - List of annotations ordered by start_time. - """ ... - async def get_by_time_range( self, meeting_id: MeetingId, start_time: float, end_time: float, ) -> Sequence[Annotation]: - """Get annotations within a time range. - - Args: - meeting_id: Meeting identifier. - start_time: Start of time range in seconds. - end_time: End of time range in seconds. - - Returns: - List of annotations overlapping the time range. - """ ... - async def update(self, annotation: Annotation) -> Annotation: - """Update an existing annotation. - - Args: - annotation: Annotation with updated fields. - - Returns: - Updated annotation. - - Raises: - ValueError: If annotation does not exist. - """ ... - async def delete(self, annotation_id: AnnotationId) -> bool: - """Delete an annotation. - - Args: - annotation_id: Annotation identifier. - - Returns: - True if deleted, False if not found. - """ ... ```` -## File: src/noteflow/domain/triggers/entities.py -````python -"""Trigger domain entities and value objects. - -Define trigger signals, decisions, and actions for meeting detection. -""" - -from __future__ import annotations - -import time -from dataclasses import dataclass, field -from enum import Enum - - -class TriggerSource(Enum): - """Source of a trigger signal.""" - - AUDIO_ACTIVITY = "audio_activity" - FOREGROUND_APP = "foreground_app" - CALENDAR = "calendar" # Deferred - optional connector - - -class TriggerAction(Enum): - """Action determined by trigger evaluation.""" - - IGNORE = "ignore" # Confidence < 0.40 - NOTIFY = "notify" # Confidence 0.40-0.79 - AUTO_START = "auto_start" # Confidence >= 0.80 (if enabled) - - -@dataclass(frozen=True) -class TriggerSignal: - """A signal from a single trigger source. - - Attributes: - source: The source that generated this signal. - weight: Confidence contribution (0.0-1.0). - app_name: For foreground app signals, the detected app name. - timestamp: When the signal was generated (monotonic time). - """ - - source: TriggerSource - weight: float - app_name: str | None = None - timestamp: float = field(default_factory=time.monotonic) - - def __post_init__(self) -> None: - """Validate weight is in valid range.""" - if not 0.0 <= self.weight <= 1.0: - msg = f"Weight must be 0.0-1.0, got {self.weight}" - raise ValueError(msg) - - -@dataclass(frozen=True) -class TriggerDecision: - """Result of trigger evaluation. - - Attributes: - action: The determined action (ignore, notify, auto_start). - confidence: Total confidence score from all signals. - signals: The signals that contributed to this decision. - timestamp: When the decision was made (monotonic time). - """ - - action: TriggerAction - confidence: float - signals: tuple[TriggerSignal, ...] - timestamp: float = field(default_factory=time.monotonic) - - @property - def primary_signal(self) -> TriggerSignal | None: - """Get the signal with highest weight contribution.""" - return max(self.signals, key=lambda s: s.weight) if self.signals else None - - @property - def detected_app(self) -> str | None: - """Get the detected app name from any signal if present.""" - return next((signal.app_name for signal in self.signals if signal.app_name), None) -```` - ## File: src/noteflow/domain/value_objects.py ````python -"""Domain value objects for NoteFlow.""" - from __future__ import annotations - from enum import Enum, IntEnum from typing import NewType from uuid import UUID - -# Type-safe identifiers MeetingId = NewType("MeetingId", UUID) AnnotationId = NewType("AnnotationId", UUID) - - class AnnotationType(Enum): - """User annotation type. - - Used to categorize user-created annotations during recording. - Distinct from LLM-extracted ActionItem/KeyPoint in summaries. - """ - ACTION_ITEM = "action_item" DECISION = "decision" NOTE = "note" RISK = "risk" - - class MeetingState(IntEnum): - """Meeting lifecycle state. - - State transitions: - CREATED -> RECORDING -> STOPPING -> STOPPED -> COMPLETED - Any state -> ERROR (on failure) - - The STOPPING state allows graceful shutdown with audio flush operations. - """ - UNSPECIFIED = 0 CREATED = 1 RECORDING = 2 STOPPED = 3 COMPLETED = 4 ERROR = 5 - STOPPING = 6 # Intermediate state for graceful shutdown - + STOPPING = 6 @classmethod def from_int(cls, value: int) -> MeetingState: - """Convert integer to MeetingState. - - Args: - value: Integer value. - - Returns: - Corresponding MeetingState. - - Raises: - ValueError: If value is not a valid state. - """ try: return cls(value) except ValueError as e: raise ValueError(f"Invalid meeting state: {value}") from e - def can_transition_to(self, target: MeetingState) -> bool: - """Check if transition to target state is valid. - - Args: - target: Target state. - - Returns: - True if transition is valid. - """ valid_transitions: dict[MeetingState, set[MeetingState]] = { MeetingState.UNSPECIFIED: {MeetingState.CREATED}, MeetingState.CREATED: {MeetingState.RECORDING, MeetingState.ERROR}, @@ -14659,11 +6392,2535 @@ class MeetingState(IntEnum): MeetingState.STOPPING: {MeetingState.STOPPED, MeetingState.ERROR}, MeetingState.STOPPED: {MeetingState.COMPLETED, MeetingState.ERROR}, MeetingState.COMPLETED: {MeetingState.ERROR}, - MeetingState.ERROR: set(), # Terminal state + MeetingState.ERROR: set(), } return target in valid_transitions.get(self, set()) ```` +## File: src/noteflow/grpc/__init__.py +````python +from noteflow.domain.value_objects import MeetingState +from .client import ( + AnnotationInfo, + DiarizationResult, + ExportResult, + MeetingInfo, + NoteFlowClient, + RenameSpeakerResult, + ServerInfo, + TranscriptSegment, +) +from .meeting_store import MeetingStore +from .service import NoteFlowServicer +__all__ = [ + "AnnotationInfo", + "DiarizationResult", + "ExportResult", + "MeetingInfo", + "MeetingState", + "MeetingStore", + "NoteFlowClient", + "NoteFlowServicer", + "RenameSpeakerResult", + "ServerInfo", + "TranscriptSegment", +] +```` + +## File: src/noteflow/infrastructure/asr/engine.py +````python +from __future__ import annotations +import asyncio +import logging +from collections.abc import Iterator +from functools import partial +from typing import TYPE_CHECKING, Final +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray +from noteflow.infrastructure.asr.dto import AsrResult, WordTiming +logger = logging.getLogger(__name__) +VALID_MODEL_SIZES: Final[tuple[str, ...]] = ( + "tiny", + "tiny.en", + "base", + "base.en", + "small", + "small.en", + "medium", + "medium.en", + "large-v1", + "large-v2", + "large-v3", +) +class FasterWhisperEngine: + def __init__( + self, + compute_type: str = "int8", + device: str = "cpu", + num_workers: int = 1, + ) -> None: + self._compute_type = compute_type + self._device = device + self._num_workers = num_workers + self._model = None + self._model_size: str | None = None + def load_model(self, model_size: str = "base") -> None: + from faster_whisper import WhisperModel + if model_size not in VALID_MODEL_SIZES: + raise ValueError( + f"Invalid model size: {model_size}. Valid sizes: {', '.join(VALID_MODEL_SIZES)}" + ) + logger.info( + "Loading Whisper model '%s' on %s with %s compute...", + model_size, + self._device, + self._compute_type, + ) + try: + self._model = WhisperModel( + model_size, + device=self._device, + compute_type=self._compute_type, + num_workers=self._num_workers, + ) + self._model_size = model_size + logger.info("Model loaded successfully") + except Exception as e: + raise RuntimeError(f"Failed to load model: {e}") from e + def transcribe( + self, + audio: NDArray[np.float32], + language: str | None = None, + ) -> Iterator[AsrResult]: + if self._model is None: + raise RuntimeError("Model not loaded. Call load_model() first.") + segments, info = self._model.transcribe( + audio, + language=language, + word_timestamps=True, + beam_size=5, + vad_filter=True, + ) + logger.debug( + "Detected language: %s (prob: %.2f)", + info.language, + info.language_probability, + ) + for segment in segments: + words: list[WordTiming] = [] + if segment.words: + words = [ + WordTiming( + word=word.word, + start=word.start, + end=word.end, + probability=word.probability, + ) + for word in segment.words + ] + yield AsrResult( + text=segment.text.strip(), + start=segment.start, + end=segment.end, + words=tuple(words), + language=info.language, + language_probability=info.language_probability, + avg_logprob=segment.avg_logprob, + no_speech_prob=segment.no_speech_prob, + ) + async def transcribe_async( + self, + audio: NDArray[np.float32], + language: str | None = None, + ) -> list[AsrResult]: + loop = asyncio.get_running_loop() + return await loop.run_in_executor( + None, + partial(lambda a, lang: list(self.transcribe(a, lang)), audio, language), + ) + @property + def is_loaded(self) -> bool: + return self._model is not None + @property + def model_size(self) -> str | None: + return self._model_size + def unload(self) -> None: + self._model = None + self._model_size = None + logger.info("Model unloaded") + @property + def compute_type(self) -> str: + return self._compute_type + @property + def device(self) -> str: + return self._device +```` + +## File: src/noteflow/infrastructure/audio/capture.py +````python +from __future__ import annotations +import logging +import time +from typing import TYPE_CHECKING +import numpy as np +import sounddevice as sd +from noteflow.config.constants import DEFAULT_SAMPLE_RATE +from noteflow.infrastructure.audio.dto import AudioDeviceInfo, AudioFrameCallback +if TYPE_CHECKING: + from numpy.typing import NDArray +logger = logging.getLogger(__name__) +class SoundDeviceCapture: + def __init__(self) -> None: + self._stream: sd.InputStream | None = None + self._callback: AudioFrameCallback | None = None + self._device_id: int | None = None + self._sample_rate: int = DEFAULT_SAMPLE_RATE + self._channels: int = 1 + def list_devices(self) -> list[AudioDeviceInfo]: + devices: list[AudioDeviceInfo] = [] + device_list = sd.query_devices() + try: + default_input = sd.default.device[0] + except (TypeError, IndexError): + default_input = -1 + devices.extend( + AudioDeviceInfo( + device_id=idx, + name=dev["name"], + channels=int(dev["max_input_channels"]), + sample_rate=int(dev["default_samplerate"]), + is_default=(idx == default_input), + ) + for idx, dev in enumerate(device_list) + if int(dev["max_input_channels"]) > 0 + ) + return devices + def get_default_device(self) -> AudioDeviceInfo | None: + devices = self.list_devices() + for dev in devices: + if dev.is_default: + return dev + return devices[0] if devices else None + def start( + self, + device_id: int | None, + on_frames: AudioFrameCallback, + sample_rate: int = DEFAULT_SAMPLE_RATE, + channels: int = 1, + chunk_duration_ms: int = 100, + ) -> None: + if self._stream is not None: + raise RuntimeError("Already capturing audio") + self._callback = on_frames + self._device_id = device_id + self._sample_rate = sample_rate + self._channels = channels + blocksize = int(sample_rate * chunk_duration_ms / 1000) + def _stream_callback( + indata: NDArray[np.float32], + frames: int, + time_info: object, + status: sd.CallbackFlags, + ) -> None: + _ = frames, time_info + if status: + logger.warning("Audio stream status: %s", status) + if self._callback is not None: + audio_data = indata.copy().flatten().astype(np.float32) + timestamp = time.monotonic() + self._callback(audio_data, timestamp) + try: + self._stream = sd.InputStream( + device=device_id, + channels=channels, + samplerate=sample_rate, + blocksize=blocksize, + dtype=np.float32, + callback=_stream_callback, + ) + self._stream.start() + logger.info( + "Started audio capture: device=%s, rate=%d, channels=%d, blocksize=%d", + device_id, + sample_rate, + channels, + blocksize, + ) + except sd.PortAudioError as e: + self._stream = None + self._callback = None + raise RuntimeError(f"Failed to start audio capture: {e}") from e + def stop(self) -> None: + if self._stream is not None: + try: + self._stream.stop() + self._stream.close() + except sd.PortAudioError as e: + logger.warning("Error stopping audio stream: %s", e) + finally: + self._stream = None + self._callback = None + logger.info("Stopped audio capture") + def is_capturing(self) -> bool: + return self._stream is not None and self._stream.active + @property + def current_device_id(self) -> int | None: + return self._device_id + @property + def sample_rate(self) -> int: + return self._sample_rate + @property + def channels(self) -> int: + return self._channels +```` + +## File: src/noteflow/infrastructure/audio/levels.py +````python +from __future__ import annotations +import math +from typing import Final +import numpy as np +from numpy.typing import NDArray +def compute_rms(frames: NDArray[np.float32]) -> float: + if len(frames) == 0: + return 0.0 + return float(np.sqrt(np.mean(frames.astype(np.float64) ** 2))) +class RmsLevelProvider: + MIN_DB: Final[float] = -60.0 + def get_rms(self, frames: NDArray[np.float32]) -> float: + rms = compute_rms(frames) + return min(1.0, max(0.0, rms)) + def get_db(self, frames: NDArray[np.float32]) -> float: + rms = self.get_rms(frames) + if rms <= 0: + return self.MIN_DB + db = 20.0 * math.log10(rms) + return max(self.MIN_DB, min(0.0, db)) + def rms_to_db(self, rms: float) -> float: + if rms <= 0: + return self.MIN_DB + db = 20.0 * math.log10(rms) + return max(self.MIN_DB, min(0.0, db)) + def db_to_rms(self, db: float) -> float: + return 0.0 if db <= self.MIN_DB else 10.0 ** (db / 20.0) +```` + +## File: src/noteflow/infrastructure/audio/playback.py +````python +from __future__ import annotations +import logging +import threading +from collections.abc import Callable +from enum import Enum, auto +from typing import TYPE_CHECKING +import numpy as np +import sounddevice as sd +from numpy.typing import NDArray +from noteflow.config.constants import DEFAULT_SAMPLE_RATE, POSITION_UPDATE_INTERVAL +if TYPE_CHECKING: + from noteflow.infrastructure.audio.dto import TimestampedAudio +logger = logging.getLogger(__name__) +class PlaybackState(Enum): + STOPPED = auto() + PLAYING = auto() + PAUSED = auto() +class SoundDevicePlayback: + def __init__( + self, + sample_rate: int = DEFAULT_SAMPLE_RATE, + channels: int = 1, + on_position_update: Callable[[float], None] | None = None, + ) -> None: + self._sample_rate = sample_rate + self._channels = channels + self._position_callbacks: list[Callable[[float], None]] = [] + if on_position_update is not None: + self._position_callbacks.append(on_position_update) + self._state = PlaybackState.STOPPED + self._lock = threading.Lock() + self._audio_data: NDArray[np.float32] | None = None + self._total_samples: int = 0 + self._current_sample: int = 0 + self._callback_interval_samples = int(sample_rate * POSITION_UPDATE_INTERVAL) + self._last_callback_sample: int = 0 + self._stream: sd.OutputStream | None = None + def play(self, audio: list[TimestampedAudio]) -> None: + if not audio: + logger.warning("No audio chunks to play") + return + with self._lock: + self._stop_internal() + frames = [chunk.frames for chunk in audio] + self._audio_data = np.concatenate(frames).astype(np.float32) + self._total_samples = len(self._audio_data) + self._current_sample = 0 + self._last_callback_sample = 0 + self._start_stream() + self._state = PlaybackState.PLAYING + logger.info( + "Started playback: %d samples (%.2f seconds)", + self._total_samples, + self.total_duration, + ) + def pause(self) -> None: + with self._lock: + if self._state == PlaybackState.PLAYING and self._stream is not None: + self._stream.stop() + self._state = PlaybackState.PAUSED + logger.debug("Paused playback at %.2f seconds", self.current_position) + def resume(self) -> None: + with self._lock: + if self._state == PlaybackState.PAUSED and self._stream is not None: + self._stream.start() + self._state = PlaybackState.PLAYING + logger.debug("Resumed playback from %.2f seconds", self.current_position) + def stop(self) -> None: + position = 0.0 + with self._lock: + if self._audio_data is not None: + position = self._current_sample / self._sample_rate + self._stop_internal() + self._notify_position_callbacks(position) + def _stop_internal(self) -> None: + if self._stream is not None: + try: + self._stream.stop() + self._stream.close() + except sd.PortAudioError as e: + logger.warning("Error stopping playback stream: %s", e) + finally: + self._stream = None + self._state = PlaybackState.STOPPED + self._current_sample = 0 + self._audio_data = None + self._total_samples = 0 + self._last_callback_sample = 0 + logger.debug("Stopped playback") + def _start_stream(self) -> None: + def _stream_callback( + outdata: NDArray[np.float32], + frames: int, + time_info: object, + status: sd.CallbackFlags, + ) -> None: + _ = time_info + if status: + logger.warning("Playback stream status: %s", status) + fire_callback = False + position = 0.0 + with self._lock: + if self._audio_data is None or self._state != PlaybackState.PLAYING: + outdata.fill(0) + return + available = self._total_samples - self._current_sample + to_copy = min(frames, available) + if to_copy > 0: + outdata[:to_copy, 0] = self._audio_data[ + self._current_sample : self._current_sample + to_copy + ] + self._current_sample += to_copy + if to_copy < frames: + outdata[to_copy:] = 0 + elapsed = self._current_sample - self._last_callback_sample + if elapsed >= self._callback_interval_samples: + fire_callback = True + position = self._current_sample / self._sample_rate + self._last_callback_sample = self._current_sample + if self._current_sample >= self._total_samples: + threading.Thread(target=self._on_playback_complete, daemon=True).start() + if fire_callback: + self._notify_position_callbacks(position) + try: + self._stream = sd.OutputStream( + channels=self._channels, + samplerate=self._sample_rate, + dtype=np.float32, + callback=_stream_callback, + ) + self._stream.start() + except sd.PortAudioError as e: + self._stream = None + raise RuntimeError(f"Failed to start playback stream: {e}") from e + def _on_playback_complete(self) -> None: + logger.info("Playback completed") + self.stop() + def seek(self, position: float) -> bool: + with self._lock: + if self._audio_data is None: + logger.warning("Cannot seek: no audio loaded") + return False + max_position = self._total_samples / self._sample_rate + clamped_position = max(0.0, min(position, max_position)) + self._current_sample = int(clamped_position * self._sample_rate) + self._last_callback_sample = self._current_sample + logger.debug( + "Seeked to %.2f seconds (sample %d)", + clamped_position, + self._current_sample, + ) + position_seconds = clamped_position + self._notify_position_callbacks(position_seconds) + return True + def is_playing(self) -> bool: + with self._lock: + return self._state == PlaybackState.PLAYING + @property + def current_position(self) -> float: + with self._lock: + return self._current_sample / self._sample_rate + @property + def total_duration(self) -> float: + with self._lock: + return self._total_samples / self._sample_rate + @property + def state(self) -> PlaybackState: + with self._lock: + return self._state + @property + def sample_rate(self) -> int: + return self._sample_rate + @property + def channels(self) -> int: + return self._channels + def add_position_callback( + self, + callback: Callable[[float], None], + ) -> None: + if callback not in self._position_callbacks: + self._position_callbacks.append(callback) + def _notify_position_callbacks(self, position: float) -> None: + for callback in list(self._position_callbacks): + try: + callback(position) + except Exception as e: + logger.debug("Position update callback error: %s", e) + def remove_position_callback( + self, + callback: Callable[[float], None], + ) -> None: + if callback in self._position_callbacks: + self._position_callbacks.remove(callback) + def set_position_callback( + self, + callback: Callable[[float], None] | None, + ) -> None: + self._position_callbacks.clear() + if callback is not None: + self._position_callbacks.append(callback) +```` + +## File: src/noteflow/infrastructure/audio/protocols.py +````python +from __future__ import annotations +from typing import TYPE_CHECKING, Protocol +from noteflow.config.constants import DEFAULT_SAMPLE_RATE +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + from noteflow.infrastructure.audio.dto import ( + AudioDeviceInfo, + AudioFrameCallback, + TimestampedAudio, + ) +class AudioCapture(Protocol): + def list_devices(self) -> list[AudioDeviceInfo]: + ... + def start( + self, + device_id: int | None, + on_frames: AudioFrameCallback, + sample_rate: int = DEFAULT_SAMPLE_RATE, + channels: int = 1, + chunk_duration_ms: int = 100, + ) -> None: + ... + def stop(self) -> None: + ... + def is_capturing(self) -> bool: + ... +class AudioLevelProvider(Protocol): + def get_rms(self, frames: NDArray[np.float32]) -> float: + ... + def get_db(self, frames: NDArray[np.float32]) -> float: + ... +class RingBuffer(Protocol): + def push(self, audio: TimestampedAudio) -> None: + ... + def get_window(self, duration_seconds: float) -> list[TimestampedAudio]: + ... + def clear(self) -> None: + ... + @property + def duration(self) -> float: + ... + @property + def max_duration(self) -> float: + ... +class AudioPlayback(Protocol): + def play(self, audio: list[TimestampedAudio]) -> None: + ... + def pause(self) -> None: + ... + def resume(self) -> None: + ... + def stop(self) -> None: + ... + def is_playing(self) -> bool: + ... + @property + def current_position(self) -> float: + ... + @property + def total_duration(self) -> float: + ... +```` + +## File: src/noteflow/infrastructure/audio/writer.py +````python +from __future__ import annotations +import json +import logging +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING +import numpy as np +from noteflow.config.constants import DEFAULT_SAMPLE_RATE +from noteflow.infrastructure.security.crypto import ChunkedAssetWriter +if TYPE_CHECKING: + from numpy.typing import NDArray + from noteflow.infrastructure.security.crypto import AesGcmCryptoBox +logger = logging.getLogger(__name__) +class MeetingAudioWriter: + def __init__( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + self._crypto = crypto + self._meetings_dir = meetings_dir + self._asset_writer: ChunkedAssetWriter | None = None + self._meeting_dir: Path | None = None + self._sample_rate: int = DEFAULT_SAMPLE_RATE + self._chunk_count: int = 0 + def open( + self, + meeting_id: str, + dek: bytes, + wrapped_dek: bytes, + sample_rate: int = DEFAULT_SAMPLE_RATE, + ) -> None: + if self._asset_writer is not None: + raise RuntimeError("Writer already open") + self._meeting_dir = self._meetings_dir / meeting_id + self._meeting_dir.mkdir(parents=True, exist_ok=True) + manifest = { + "meeting_id": meeting_id, + "created_at": datetime.now(UTC).isoformat(), + "sample_rate": sample_rate, + "channels": 1, + "format": "pcm16", + "wrapped_dek": wrapped_dek.hex(), + } + manifest_path = self._meeting_dir / "manifest.json" + manifest_path.write_text(json.dumps(manifest, indent=2)) + audio_path = self._meeting_dir / "audio.enc" + self._asset_writer = ChunkedAssetWriter(self._crypto) + self._asset_writer.open(audio_path, dek) + self._sample_rate = sample_rate + self._chunk_count = 0 + logger.info( + "Opened audio writer: meeting=%s, dir=%s", + meeting_id, + self._meeting_dir, + ) + def write_chunk(self, audio: NDArray[np.float32]) -> None: + if self._asset_writer is None or not self._asset_writer.is_open: + raise RuntimeError("Writer not open") + audio_clamped = np.clip(audio, -1.0, 1.0) + pcm16 = (audio_clamped * 32767.0).astype(np.int16) + self._asset_writer.write_chunk(pcm16.tobytes()) + self._chunk_count += 1 + def close(self) -> None: + if self._asset_writer is not None: + bytes_written = self._asset_writer.bytes_written + self._asset_writer.close() + self._asset_writer = None + logger.info( + "Closed audio writer: dir=%s, chunks=%d, bytes=%d", + self._meeting_dir, + self._chunk_count, + bytes_written, + ) + self._meeting_dir = None + self._chunk_count = 0 + @property + def is_open(self) -> bool: + return self._asset_writer is not None and self._asset_writer.is_open + @property + def bytes_written(self) -> int: + return 0 if self._asset_writer is None else self._asset_writer.bytes_written + @property + def chunk_count(self) -> int: + return self._chunk_count + @property + def meeting_dir(self) -> Path | None: + return self._meeting_dir +```` + +## File: src/noteflow/infrastructure/diarization/engine.py +````python +from __future__ import annotations +import logging +from typing import TYPE_CHECKING +from noteflow.config.constants import DEFAULT_SAMPLE_RATE +from noteflow.infrastructure.diarization.dto import SpeakerTurn +if TYPE_CHECKING: + from collections.abc import Sequence + import numpy as np + from numpy.typing import NDArray + from pyannote.core import Annotation +logger = logging.getLogger(__name__) +class DiarizationEngine: + def __init__( + self, + device: str = "auto", + hf_token: str | None = None, + streaming_latency: float = 0.5, + min_speakers: int = 1, + max_speakers: int = 10, + ) -> None: + self._device_preference = device + self._device: str | None = None + self._hf_token = hf_token + self._streaming_latency = streaming_latency + self._min_speakers = min_speakers + self._max_speakers = max_speakers + self._streaming_pipeline = None + self._offline_pipeline = None + def _resolve_device(self) -> str: + if self._device is not None: + return self._device + import torch + if self._device_preference == "auto": + if torch.cuda.is_available(): + self._device = "cuda" + elif torch.backends.mps.is_available(): + self._device = "mps" + else: + self._device = "cpu" + else: + self._device = self._device_preference + logger.info("Diarization device resolved to: %s", self._device) + return self._device + def load_streaming_model(self) -> None: + if self._streaming_pipeline is not None: + logger.debug("Streaming model already loaded") + return + if not self._hf_token: + raise ValueError("HuggingFace token required for pyannote models") + device = self._resolve_device() + logger.info( + "Loading streaming diarization model on %s with latency %.2fs...", + device, + self._streaming_latency, + ) + try: + from diart import SpeakerDiarization, SpeakerDiarizationConfig + from diart.models import EmbeddingModel, SegmentationModel + segmentation = SegmentationModel.from_pretrained( + "pyannote/segmentation-3.0", + use_hf_token=self._hf_token, + ) + embedding = EmbeddingModel.from_pretrained( + "pyannote/wespeaker-voxceleb-resnet34-LM", + use_hf_token=self._hf_token, + ) + config = SpeakerDiarizationConfig( + segmentation=segmentation, + embedding=embedding, + step=self._streaming_latency, + latency=self._streaming_latency, + device=device, + ) + self._streaming_pipeline = SpeakerDiarization(config) + logger.info("Streaming diarization model loaded successfully") + except Exception as e: + raise RuntimeError(f"Failed to load streaming diarization model: {e}") from e + def load_offline_model(self) -> None: + if self._offline_pipeline is not None: + logger.debug("Offline model already loaded") + return + if not self._hf_token: + raise ValueError("HuggingFace token required for pyannote models") + device = self._resolve_device() + logger.info("Loading offline diarization model on %s...", device) + try: + import torch + from pyannote.audio import Pipeline + self._offline_pipeline = Pipeline.from_pretrained( + "pyannote/speaker-diarization-3.1", + use_auth_token=self._hf_token, + ) + torch_device = torch.device(device) + self._offline_pipeline.to(torch_device) + logger.info("Offline diarization model loaded successfully") + except Exception as e: + raise RuntimeError(f"Failed to load offline diarization model: {e}") from e + def process_chunk( + self, + audio: NDArray[np.float32], + sample_rate: int = DEFAULT_SAMPLE_RATE, + ) -> Sequence[SpeakerTurn]: + if self._streaming_pipeline is None: + raise RuntimeError("Streaming model not loaded. Call load_streaming_model() first.") + from pyannote.core import SlidingWindowFeature + if audio.ndim == 1: + audio = audio.reshape(1, -1) + from pyannote.core import SlidingWindow + duration = audio.shape[1] / sample_rate + window = SlidingWindow(start=0.0, duration=duration, step=duration) + waveform = SlidingWindowFeature(audio, window) + results = self._streaming_pipeline([waveform]) + turns: list[SpeakerTurn] = [] + for annotation, _ in results: + turns.extend(self._annotation_to_turns(annotation)) + return turns + def diarize_full( + self, + audio: NDArray[np.float32], + sample_rate: int = DEFAULT_SAMPLE_RATE, + num_speakers: int | None = None, + ) -> Sequence[SpeakerTurn]: + if self._offline_pipeline is None: + raise RuntimeError("Offline model not loaded. Call load_offline_model() first.") + import torch + if audio.ndim == 1: + audio_tensor = torch.from_numpy(audio).unsqueeze(0) + else: + audio_tensor = torch.from_numpy(audio) + waveform = {"waveform": audio_tensor, "sample_rate": sample_rate} + logger.debug( + "Running offline diarization on %.2fs audio", + audio_tensor.shape[1] / sample_rate, + ) + if num_speakers is not None: + annotation = self._offline_pipeline(waveform, num_speakers=num_speakers) + else: + annotation = self._offline_pipeline( + waveform, + min_speakers=self._min_speakers, + max_speakers=self._max_speakers, + ) + return self._annotation_to_turns(annotation) + def _annotation_to_turns(self, annotation: Annotation) -> list[SpeakerTurn]: + turns: list[SpeakerTurn] = [] + for track in annotation.itertracks(yield_label=True): + if len(track) == 3: + segment, _, speaker = track + turns.append( + SpeakerTurn( + speaker=str(speaker), + start=segment.start, + end=segment.end, + ) + ) + return turns + def reset_streaming(self) -> None: + if self._streaming_pipeline is not None: + self._streaming_pipeline.reset() + logger.debug("Streaming pipeline state reset") + def unload(self) -> None: + self._streaming_pipeline = None + self._offline_pipeline = None + self._device = None + logger.info("Diarization models unloaded") + @property + def is_streaming_loaded(self) -> bool: + return self._streaming_pipeline is not None + @property + def is_offline_loaded(self) -> bool: + return self._offline_pipeline is not None + @property + def device(self) -> str | None: + return self._device +```` + +## File: src/noteflow/infrastructure/export/markdown.py +````python +from __future__ import annotations +from datetime import datetime +from typing import TYPE_CHECKING +from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp +if TYPE_CHECKING: + from collections.abc import Sequence + from noteflow.domain.entities.meeting import Meeting + from noteflow.domain.entities.segment import Segment +class MarkdownExporter: + @property + def format_name(self) -> str: + return "Markdown" + @property + def file_extension(self) -> str: + return ".md" + def export( + self, + meeting: Meeting, + segments: Sequence[Segment], + ) -> str: + lines: list[str] = [ + f"# {meeting.title}", + "", + "## Meeting Info", + "", + f"- **Date:** {format_datetime(meeting.created_at)}", + ] + if meeting.started_at: + lines.append(f"- **Started:** {format_datetime(meeting.started_at)}") + if meeting.ended_at: + lines.append(f"- **Ended:** {format_datetime(meeting.ended_at)}") + lines.append(f"- **Duration:** {format_timestamp(meeting.duration_seconds)}") + lines.extend((f"- **Segments:** {len(segments)}", "", "## Transcript", "")) + for segment in segments: + timestamp = format_timestamp(segment.start_time) + lines.extend((f"**[{timestamp}]** {segment.text}", "")) + if meeting.summary: + lines.extend(("## Summary", "")) + if meeting.summary.executive_summary: + lines.extend((meeting.summary.executive_summary, "")) + if meeting.summary.key_points: + lines.extend(("### Key Points", "")) + lines.extend(f"- {point.text}" for point in meeting.summary.key_points) + lines.append("") + if meeting.summary.action_items: + lines.extend(("### Action Items", "")) + for item in meeting.summary.action_items: + assignee = f" (@{item.assignee})" if item.assignee else "" + lines.append(f"- [ ] {item.text}{assignee}") + lines.append("") + lines.append("---") + lines.append(f"*Exported from NoteFlow on {format_datetime(datetime.now())}*") + return "\n".join(lines) +```` + +## File: src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py +````python +from collections.abc import Sequence +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql +revision: str = "6a9d9f408f40" +down_revision: str | Sequence[str] | None = None +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None +EMBEDDING_DIM = 1536 +def upgrade() -> None: + op.execute("CREATE SCHEMA IF NOT EXISTS noteflow") + try: + op.execute("CREATE EXTENSION IF NOT EXISTS vector") + except sa.exc.ProgrammingError as e: + raise RuntimeError( + f"Failed to create pgvector extension: {e}. " + "Ensure the database user has CREATE EXTENSION privileges, or " + "install pgvector manually: CREATE EXTENSION vector;" + ) from e + op.create_table( + "meetings", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column("title", sa.String(255), nullable=False), + sa.Column("state", sa.Integer(), nullable=False, server_default="1"), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True), + sa.Column( + "metadata", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="{}", + ), + sa.Column("wrapped_dek", sa.LargeBinary(), nullable=True), + schema="noteflow", + ) + op.create_table( + "segments", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "meeting_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("segment_id", sa.Integer(), nullable=False), + sa.Column("text", sa.Text(), nullable=False), + sa.Column("start_time", sa.Float(), nullable=False), + sa.Column("end_time", sa.Float(), nullable=False), + sa.Column("language", sa.String(10), nullable=False, server_default="en"), + sa.Column("language_confidence", sa.Float(), nullable=False, server_default="0.0"), + sa.Column("avg_logprob", sa.Float(), nullable=False, server_default="0.0"), + sa.Column("no_speech_prob", sa.Float(), nullable=False, server_default="0.0"), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + schema="noteflow", + ) + op.execute(f"ALTER TABLE noteflow.segments ADD COLUMN embedding vector({EMBEDDING_DIM})") + op.execute( + "CREATE INDEX IF NOT EXISTS ix_segments_embedding " + "ON noteflow.segments USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)" + ) + op.create_index( + "ix_segments_meeting_id", + "segments", + ["meeting_id"], + schema="noteflow", + ) + op.create_table( + "word_timings", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "segment_pk", + sa.Integer(), + sa.ForeignKey("noteflow.segments.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("word", sa.String(255), nullable=False), + sa.Column("start_time", sa.Float(), nullable=False), + sa.Column("end_time", sa.Float(), nullable=False), + sa.Column("probability", sa.Float(), nullable=False), + schema="noteflow", + ) + op.create_index( + "ix_word_timings_segment_pk", + "word_timings", + ["segment_pk"], + schema="noteflow", + ) + op.create_table( + "summaries", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "meeting_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + unique=True, + ), + sa.Column("executive_summary", sa.Text(), nullable=True), + sa.Column( + "generated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column("model_version", sa.String(50), nullable=True), + schema="noteflow", + ) + op.create_table( + "key_points", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "summary_id", + sa.Integer(), + sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("text", sa.Text(), nullable=False), + sa.Column("start_time", sa.Float(), nullable=False, server_default="0.0"), + sa.Column("end_time", sa.Float(), nullable=False, server_default="0.0"), + sa.Column( + "segment_ids", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + schema="noteflow", + ) + op.create_index( + "ix_key_points_summary_id", + "key_points", + ["summary_id"], + schema="noteflow", + ) + op.create_table( + "action_items", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "summary_id", + sa.Integer(), + sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("text", sa.Text(), nullable=False), + sa.Column("assignee", sa.String(255), nullable=False, server_default=""), + sa.Column("due_date", sa.DateTime(timezone=True), nullable=True), + sa.Column("priority", sa.Integer(), nullable=False, server_default="0"), + sa.Column( + "segment_ids", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + schema="noteflow", + ) + op.create_index( + "ix_action_items_summary_id", + "action_items", + ["summary_id"], + schema="noteflow", + ) +def downgrade() -> None: + op.drop_table("action_items", schema="noteflow") + op.drop_table("key_points", schema="noteflow") + op.drop_table("summaries", schema="noteflow") + op.drop_table("word_timings", schema="noteflow") + op.drop_table("segments", schema="noteflow") + op.drop_table("meetings", schema="noteflow") + op.execute("DROP SCHEMA IF EXISTS noteflow CASCADE") +```` + +## File: src/noteflow/infrastructure/persistence/migrations/env.py +````python +from __future__ import annotations +import asyncio +import os +from logging.config import fileConfig +from alembic import context +from sqlalchemy import pool +from sqlalchemy.engine import Connection +from sqlalchemy.ext.asyncio import async_engine_from_config +from noteflow.infrastructure.persistence.models import Base +config = context.config +if config.config_file_name is not None: + fileConfig(config.config_file_name) +target_metadata = Base.metadata +if database_url := os.environ.get("NOTEFLOW_DATABASE_URL"): + if database_url.startswith("postgres://"): + database_url = database_url.replace("postgres://", "postgresql+asyncpg://", 1) + elif database_url.startswith("postgresql://"): + database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1) + config.set_main_option("sqlalchemy.url", database_url) +def include_object( + obj: object, + name: str | None, + type_: str, + reflected: bool, + compare_to: object | None, +) -> bool: + if type_ == "table": + schema = getattr(obj, "schema", None) + return schema == "noteflow" + return True +def run_migrations_offline() -> None: + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + include_schemas=True, + include_object=include_object, + version_table_schema="noteflow", + ) + with context.begin_transaction(): + context.run_migrations() +def do_run_migrations(connection: Connection) -> None: + context.configure( + connection=connection, + target_metadata=target_metadata, + include_schemas=True, + include_object=include_object, + version_table_schema="noteflow", + ) + with context.begin_transaction(): + context.run_migrations() +async def run_async_migrations() -> None: + connectable = async_engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + await connectable.dispose() +def run_migrations_online() -> None: + asyncio.run(run_async_migrations()) +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() +```` + +## File: src/noteflow/infrastructure/persistence/repositories/annotation_repo.py +````python +from __future__ import annotations +from collections.abc import Sequence +from typing import TYPE_CHECKING +from uuid import UUID +from sqlalchemy import and_, delete, or_, select +from noteflow.domain.entities import Annotation +from noteflow.domain.value_objects import AnnotationId +from noteflow.infrastructure.converters import OrmConverter +from noteflow.infrastructure.persistence.models import AnnotationModel +from noteflow.infrastructure.persistence.repositories._base import BaseRepository +if TYPE_CHECKING: + from noteflow.domain.value_objects import MeetingId +class SqlAlchemyAnnotationRepository(BaseRepository): + async def add(self, annotation: Annotation) -> Annotation: + model = AnnotationModel( + annotation_id=UUID(str(annotation.id)), + meeting_id=UUID(str(annotation.meeting_id)), + annotation_type=annotation.annotation_type.value, + text=annotation.text, + start_time=annotation.start_time, + end_time=annotation.end_time, + segment_ids=annotation.segment_ids, + created_at=annotation.created_at, + ) + self._session.add(model) + await self._session.flush() + annotation.db_id = model.id + return annotation + async def get(self, annotation_id: AnnotationId) -> Annotation | None: + stmt = select(AnnotationModel).where( + AnnotationModel.annotation_id == UUID(str(annotation_id)) + ) + model = await self._execute_scalar(stmt) + return None if model is None else OrmConverter.annotation_to_domain(model) + async def get_by_meeting( + self, + meeting_id: MeetingId, + ) -> Sequence[Annotation]: + stmt = ( + select(AnnotationModel) + .where(AnnotationModel.meeting_id == UUID(str(meeting_id))) + .order_by(AnnotationModel.start_time) + ) + models = await self._execute_scalars(stmt) + return [OrmConverter.annotation_to_domain(model) for model in models] + async def get_by_time_range( + self, + meeting_id: MeetingId, + start_time: float, + end_time: float, + ) -> Sequence[Annotation]: + stmt = ( + select(AnnotationModel) + .where( + and_( + AnnotationModel.meeting_id == UUID(str(meeting_id)), + or_( + and_( + AnnotationModel.start_time >= start_time, + AnnotationModel.start_time <= end_time, + ), + and_( + AnnotationModel.end_time >= start_time, + AnnotationModel.end_time <= end_time, + ), + and_( + AnnotationModel.start_time <= start_time, + AnnotationModel.end_time >= end_time, + ), + ), + ) + ) + .order_by(AnnotationModel.start_time) + ) + models = await self._execute_scalars(stmt) + return [OrmConverter.annotation_to_domain(model) for model in models] + async def update(self, annotation: Annotation) -> Annotation: + stmt = select(AnnotationModel).where( + AnnotationModel.annotation_id == UUID(str(annotation.id)) + ) + model = await self._execute_scalar(stmt) + if model is None: + raise ValueError(f"Annotation {annotation.id} not found") + model.annotation_type = annotation.annotation_type.value + model.text = annotation.text + model.start_time = annotation.start_time + model.end_time = annotation.end_time + model.segment_ids = annotation.segment_ids + await self._session.flush() + return annotation + async def delete(self, annotation_id: AnnotationId) -> bool: + stmt = select(AnnotationModel).where( + AnnotationModel.annotation_id == UUID(str(annotation_id)) + ) + model = await self._execute_scalar(stmt) + if model is None: + return False + await self._session.execute(delete(AnnotationModel).where(AnnotationModel.id == model.id)) + await self._session.flush() + return True +```` + +## File: src/noteflow/infrastructure/persistence/repositories/summary_repo.py +````python +from __future__ import annotations +from typing import TYPE_CHECKING +from uuid import UUID +from sqlalchemy import delete, select +from noteflow.domain.entities import ActionItem, KeyPoint, Summary +from noteflow.infrastructure.converters import OrmConverter +from noteflow.infrastructure.persistence.models import ( + ActionItemModel, + KeyPointModel, + SummaryModel, +) +from noteflow.infrastructure.persistence.repositories._base import BaseRepository +if TYPE_CHECKING: + from noteflow.domain.value_objects import MeetingId +class SqlAlchemySummaryRepository(BaseRepository): + async def save(self, summary: Summary) -> Summary: + stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(summary.meeting_id))) + result = await self._session.execute(stmt) + if existing := result.scalar_one_or_none(): + existing.executive_summary = summary.executive_summary + if summary.generated_at is not None: + existing.generated_at = summary.generated_at + existing.model_version = summary.model_version + await self._session.execute( + delete(KeyPointModel).where(KeyPointModel.summary_id == existing.id) + ) + await self._session.execute( + delete(ActionItemModel).where(ActionItemModel.summary_id == existing.id) + ) + kp_models: list[tuple[KeyPointModel, KeyPoint]] = [] + for kp in summary.key_points: + kp_model = KeyPointModel( + summary_id=existing.id, + text=kp.text, + start_time=kp.start_time, + end_time=kp.end_time, + segment_ids=kp.segment_ids, + ) + self._session.add(kp_model) + kp_models.append((kp_model, kp)) + ai_models: list[tuple[ActionItemModel, ActionItem]] = [] + for ai in summary.action_items: + ai_model = ActionItemModel( + summary_id=existing.id, + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date, + priority=ai.priority, + segment_ids=ai.segment_ids, + ) + self._session.add(ai_model) + ai_models.append((ai_model, ai)) + await self._session.flush() + for kp_model, kp in kp_models: + kp.db_id = kp_model.id + for ai_model, ai in ai_models: + ai.db_id = ai_model.id + summary.db_id = existing.id + else: + model = SummaryModel( + meeting_id=UUID(str(summary.meeting_id)), + executive_summary=summary.executive_summary, + generated_at=summary.generated_at, + model_version=summary.model_version, + ) + self._session.add(model) + await self._session.flush() + for kp in summary.key_points: + kp_model = KeyPointModel( + summary_id=model.id, + text=kp.text, + start_time=kp.start_time, + end_time=kp.end_time, + segment_ids=kp.segment_ids, + ) + self._session.add(kp_model) + await self._session.flush() + kp.db_id = kp_model.id + for ai in summary.action_items: + ai_model = ActionItemModel( + summary_id=model.id, + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date, + priority=ai.priority, + segment_ids=ai.segment_ids, + ) + self._session.add(ai_model) + await self._session.flush() + ai.db_id = ai_model.id + summary.db_id = model.id + return summary + async def get_by_meeting(self, meeting_id: MeetingId) -> Summary | None: + stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(meeting_id))) + model = await self._execute_scalar(stmt) + return None if model is None else OrmConverter.summary_to_domain(model, meeting_id) + async def delete_by_meeting(self, meeting_id: MeetingId) -> bool: + stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(meeting_id))) + model = await self._execute_scalar(stmt) + if model is None: + return False + await self._delete_and_flush(model) + return True +```` + +## File: src/noteflow/infrastructure/persistence/models.py +````python +from __future__ import annotations +from datetime import datetime +from typing import ClassVar +from uuid import uuid4 +from pgvector.sqlalchemy import Vector +from sqlalchemy import ( + DateTime, + Float, + ForeignKey, + Integer, + LargeBinary, + String, + Text, +) +from sqlalchemy.dialects.postgresql import JSONB, UUID +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship +EMBEDDING_DIM = 1536 +class Base(DeclarativeBase): + pass +class MeetingModel(Base): + __tablename__ = "meetings" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + primary_key=True, + default=uuid4, + ) + title: Mapped[str] = mapped_column(String(255), nullable=False) + state: Mapped[int] = mapped_column(Integer, nullable=False, default=1) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + started_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), + nullable=True, + ) + ended_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), + nullable=True, + ) + metadata_: Mapped[dict[str, str]] = mapped_column( + "metadata", + JSONB, + nullable=False, + default=dict, + ) + wrapped_dek: Mapped[bytes | None] = mapped_column( + LargeBinary, + nullable=True, + ) + segments: Mapped[list[SegmentModel]] = relationship( + "SegmentModel", + back_populates="meeting", + cascade="all, delete-orphan", + lazy="selectin", + ) + summary: Mapped[SummaryModel | None] = relationship( + "SummaryModel", + back_populates="meeting", + cascade="all, delete-orphan", + uselist=False, + lazy="selectin", + ) + annotations: Mapped[list[AnnotationModel]] = relationship( + "AnnotationModel", + back_populates="meeting", + cascade="all, delete-orphan", + lazy="selectin", + ) +class SegmentModel(Base): + __tablename__ = "segments" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + meeting_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ) + segment_id: Mapped[int] = mapped_column(Integer, nullable=False) + text: Mapped[str] = mapped_column(Text, nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False) + end_time: Mapped[float] = mapped_column(Float, nullable=False) + language: Mapped[str] = mapped_column(String(10), nullable=False, default="en") + language_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + avg_logprob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + no_speech_prob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + embedding: Mapped[list[float] | None] = mapped_column( + Vector(EMBEDDING_DIM), + nullable=True, + ) + speaker_id: Mapped[str | None] = mapped_column(String(50), nullable=True) + speaker_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + meeting: Mapped[MeetingModel] = relationship( + "MeetingModel", + back_populates="segments", + ) + words: Mapped[list[WordTimingModel]] = relationship( + "WordTimingModel", + back_populates="segment", + cascade="all, delete-orphan", + lazy="selectin", + ) +class WordTimingModel(Base): + __tablename__ = "word_timings" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + segment_pk: Mapped[int] = mapped_column( + Integer, + ForeignKey("noteflow.segments.id", ondelete="CASCADE"), + nullable=False, + ) + word: Mapped[str] = mapped_column(String(255), nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False) + end_time: Mapped[float] = mapped_column(Float, nullable=False) + probability: Mapped[float] = mapped_column(Float, nullable=False) + segment: Mapped[SegmentModel] = relationship( + "SegmentModel", + back_populates="words", + ) +class SummaryModel(Base): + __tablename__ = "summaries" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + meeting_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + unique=True, + ) + executive_summary: Mapped[str | None] = mapped_column(Text, nullable=True) + generated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + model_version: Mapped[str | None] = mapped_column(String(50), nullable=True) + meeting: Mapped[MeetingModel] = relationship( + "MeetingModel", + back_populates="summary", + ) + key_points: Mapped[list[KeyPointModel]] = relationship( + "KeyPointModel", + back_populates="summary", + cascade="all, delete-orphan", + lazy="selectin", + ) + action_items: Mapped[list[ActionItemModel]] = relationship( + "ActionItemModel", + back_populates="summary", + cascade="all, delete-orphan", + lazy="selectin", + ) +class KeyPointModel(Base): + __tablename__ = "key_points" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + summary_id: Mapped[int] = mapped_column( + Integer, + ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), + nullable=False, + ) + text: Mapped[str] = mapped_column(Text, nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + segment_ids: Mapped[list[int]] = mapped_column( + JSONB, + nullable=False, + default=list, + ) + summary: Mapped[SummaryModel] = relationship( + "SummaryModel", + back_populates="key_points", + ) +class ActionItemModel(Base): + __tablename__ = "action_items" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + summary_id: Mapped[int] = mapped_column( + Integer, + ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), + nullable=False, + ) + text: Mapped[str] = mapped_column(Text, nullable=False) + assignee: Mapped[str] = mapped_column(String(255), nullable=False, default="") + due_date: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), + nullable=True, + ) + priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + segment_ids: Mapped[list[int]] = mapped_column( + JSONB, + nullable=False, + default=list, + ) + summary: Mapped[SummaryModel] = relationship( + "SummaryModel", + back_populates="action_items", + ) +class AnnotationModel(Base): + __tablename__ = "annotations" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + annotation_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + nullable=False, + unique=True, + default=uuid4, + ) + meeting_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ) + annotation_type: Mapped[str] = mapped_column(String(50), nullable=False) + text: Mapped[str] = mapped_column(Text, nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False) + end_time: Mapped[float] = mapped_column(Float, nullable=False) + segment_ids: Mapped[list[int]] = mapped_column( + JSONB, + nullable=False, + default=list, + ) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + meeting: Mapped[MeetingModel] = relationship( + "MeetingModel", + back_populates="annotations", + ) +```` + +## File: src/noteflow/infrastructure/security/keystore.py +````python +from __future__ import annotations +import base64 +import binascii +import logging +import os +import secrets +from typing import Final +import keyring +logger = logging.getLogger(__name__) +KEY_SIZE: Final[int] = 32 +SERVICE_NAME: Final[str] = "noteflow" +KEY_NAME: Final[str] = "master_key" +ENV_VAR_NAME: Final[str] = "NOTEFLOW_MASTER_KEY" +class KeyringKeyStore: + def __init__( + self, + service_name: str = SERVICE_NAME, + key_name: str = KEY_NAME, + ) -> None: + self._service_name = service_name + self._key_name = key_name + def get_or_create_master_key(self) -> bytes: + if env_key := os.environ.get(ENV_VAR_NAME): + logger.debug("Using master key from environment variable") + try: + decoded = base64.b64decode(env_key, validate=True) + except (binascii.Error, ValueError) as exc: + raise RuntimeError( + f"{ENV_VAR_NAME} must be base64-encoded {KEY_SIZE}-byte key" + ) from exc + if len(decoded) != KEY_SIZE: + raise RuntimeError( + f"{ENV_VAR_NAME} must decode to {KEY_SIZE} bytes, got {len(decoded)}" + ) + return decoded + try: + stored = keyring.get_password(self._service_name, self._key_name) + if stored is not None: + logger.debug("Retrieved existing master key from keyring") + return base64.b64decode(stored) + new_key = secrets.token_bytes(KEY_SIZE) + encoded = base64.b64encode(new_key).decode("ascii") + keyring.set_password(self._service_name, self._key_name, encoded) + logger.info("Generated and stored new master key in keyring") + return new_key + except keyring.errors.KeyringError as e: + raise RuntimeError( + f"Keyring unavailable: {e}. " + f"Set {ENV_VAR_NAME} environment variable for headless mode." + ) from e + def delete_master_key(self) -> None: + try: + keyring.delete_password(self._service_name, self._key_name) + logger.info("Deleted master key") + except keyring.errors.PasswordDeleteError: + logger.debug("Master key not found, nothing to delete") + except keyring.errors.KeyringError as e: + logger.warning("Failed to delete master key: %s", e) + def has_master_key(self) -> bool: + try: + stored = keyring.get_password(self._service_name, self._key_name) + return stored is not None + except keyring.errors.KeyringError: + return False + @property + def service_name(self) -> str: + return self._service_name + @property + def key_name(self) -> str: + return self._key_name +class InMemoryKeyStore: + def __init__(self) -> None: + self._key: bytes | None = None + def get_or_create_master_key(self) -> bytes: + if self._key is None: + self._key = secrets.token_bytes(KEY_SIZE) + logger.debug("Generated in-memory master key") + return self._key + def delete_master_key(self) -> None: + self._key = None + logger.debug("Deleted in-memory master key") + def has_master_key(self) -> bool: + return self._key is not None +```` + +## File: src/noteflow/infrastructure/summarization/__init__.py +````python +from noteflow.infrastructure.summarization.citation_verifier import ( + SegmentCitationVerifier, +) +from noteflow.infrastructure.summarization.cloud_provider import ( + CloudBackend, + CloudSummarizer, +) +from noteflow.infrastructure.summarization.factory import create_summarization_service +from noteflow.infrastructure.summarization.mock_provider import MockSummarizer +from noteflow.infrastructure.summarization.ollama_provider import OllamaSummarizer +__all__ = [ + "CloudBackend", + "CloudSummarizer", + "MockSummarizer", + "OllamaSummarizer", + "SegmentCitationVerifier", + "create_summarization_service", +] +```` + +## File: src/noteflow/infrastructure/summarization/_parsing.py +````python +from __future__ import annotations +import json +from datetime import UTC, datetime +from typing import TYPE_CHECKING +from noteflow.domain.entities import ActionItem, KeyPoint, Summary +from noteflow.domain.summarization import InvalidResponseError +if TYPE_CHECKING: + from noteflow.domain.summarization import SummarizationRequest +SYSTEM_PROMPT = """You are a meeting summarization assistant. Analyze the transcript and produce structured output. +OUTPUT FORMAT (JSON): +{ + "executive_summary": "2-3 sentence high-level overview", + "key_points": [ + {"text": "Key insight or decision", "segment_ids": [0, 1]} + ], + "action_items": [ + {"text": "Action to take", "assignee": "Person name or empty string", "priority": 0, "segment_ids": [2]} + ] +} +RULES: +1. Each key_point and action_item MUST have at least one segment_id referencing the source +2. segment_ids are integers matching the [N] markers in the transcript +3. priority: 0=unspecified, 1=low, 2=medium, 3=high +4. Only extract action items that clearly indicate tasks to be done +5. Output ONLY valid JSON, no markdown or explanation""" +def build_transcript_prompt(request: SummarizationRequest) -> str: + lines = [f"[{seg.segment_id}] {seg.text}" for seg in request.segments] + constraints = "" + if request.segments: + valid_ids = ", ".join(str(seg.segment_id) for seg in request.segments) + constraints = ( + "\n\nCONSTRAINTS:\n" + f"- Maximum {request.max_key_points} key points\n" + f"- Maximum {request.max_action_items} action items\n" + f"- Valid segment_ids: {valid_ids}" + ) + return f"TRANSCRIPT:\n{chr(10).join(lines)}{constraints}" +def parse_llm_response(response_text: str, request: SummarizationRequest) -> Summary: + text = response_text.strip() + if text.startswith("```"): + lines = text.split("\n") + if lines[0].startswith("```"): + lines = lines[1:] + if lines and lines[-1].strip() == "```": + lines = lines[:-1] + text = "\n".join(lines) + try: + data = json.loads(text) + except json.JSONDecodeError as e: + raise InvalidResponseError(f"Invalid JSON response: {e}") from e + valid_ids = {seg.segment_id for seg in request.segments} + key_points: list[KeyPoint] = [] + for kp_data in data.get("key_points", [])[: request.max_key_points]: + seg_ids = [sid for sid in kp_data.get("segment_ids", []) if sid in valid_ids] + start_time = 0.0 + end_time = 0.0 + if seg_ids and (refs := [s for s in request.segments if s.segment_id in seg_ids]): + start_time = min(s.start_time for s in refs) + end_time = max(s.end_time for s in refs) + key_points.append( + KeyPoint( + text=str(kp_data.get("text", "")), + segment_ids=seg_ids, + start_time=start_time, + end_time=end_time, + ) + ) + action_items: list[ActionItem] = [] + for ai_data in data.get("action_items", [])[: request.max_action_items]: + seg_ids = [sid for sid in ai_data.get("segment_ids", []) if sid in valid_ids] + priority = ai_data.get("priority", 0) + if not isinstance(priority, int) or priority not in range(4): + priority = 0 + action_items.append( + ActionItem( + text=str(ai_data.get("text", "")), + assignee=str(ai_data.get("assignee", "")), + priority=priority, + segment_ids=seg_ids, + ) + ) + return Summary( + meeting_id=request.meeting_id, + executive_summary=str(data.get("executive_summary", "")), + key_points=key_points, + action_items=action_items, + generated_at=datetime.now(UTC), + ) +```` + +## File: src/noteflow/infrastructure/summarization/cloud_provider.py +````python +from __future__ import annotations +import asyncio +import os +import time +from datetime import UTC, datetime +from enum import Enum +from typing import TYPE_CHECKING, cast +from noteflow.domain.entities import Summary +from noteflow.domain.summarization import ( + InvalidResponseError, + ProviderUnavailableError, + SummarizationRequest, + SummarizationResult, + SummarizationTimeoutError, +) +from noteflow.infrastructure.summarization._parsing import ( + SYSTEM_PROMPT, + build_transcript_prompt, + parse_llm_response, +) +if TYPE_CHECKING: + import anthropic + import openai +class CloudBackend(Enum): + OPENAI = "openai" + ANTHROPIC = "anthropic" +class CloudSummarizer: + def __init__( + self, + backend: CloudBackend = CloudBackend.OPENAI, + api_key: str | None = None, + model: str | None = None, + timeout_seconds: float = 60.0, + base_url: str | None = None, + ) -> None: + self._backend = backend + self._api_key = api_key + self._timeout = timeout_seconds + self._client: openai.OpenAI | anthropic.Anthropic | None = None + self._openai_base_url = ( + base_url + if base_url is not None + else os.environ.get("OPENAI_BASE_URL") + if backend == CloudBackend.OPENAI + else None + ) + if model is None: + self._model = ( + "gpt-4o-mini" if backend == CloudBackend.OPENAI else "claude-3-haiku-20240307" + ) + else: + self._model = model + def _get_openai_client(self) -> openai.OpenAI: + if self._client is None: + try: + import openai + self._client = openai.OpenAI( + api_key=self._api_key, + timeout=self._timeout, + base_url=self._openai_base_url, + ) + except ImportError as e: + raise ProviderUnavailableError( + "openai package not installed. Install with: pip install openai" + ) from e + return cast(openai.OpenAI, self._client) + def _get_anthropic_client(self) -> anthropic.Anthropic: + if self._client is None: + try: + import anthropic + self._client = anthropic.Anthropic(api_key=self._api_key, timeout=self._timeout) + except ImportError as e: + raise ProviderUnavailableError( + "anthropic package not installed. Install with: pip install anthropic" + ) from e + return cast(anthropic.Anthropic, self._client) + @property + def provider_name(self) -> str: + return self._backend.value + @property + def is_available(self) -> bool: + import os + if self._api_key: + return True + if self._backend == CloudBackend.OPENAI: + return bool(os.environ.get("OPENAI_API_KEY")) + return bool(os.environ.get("ANTHROPIC_API_KEY")) + @property + def requires_cloud_consent(self) -> bool: + return True + async def summarize(self, request: SummarizationRequest) -> SummarizationResult: + start = time.monotonic() + if not request.segments: + return SummarizationResult( + summary=Summary( + meeting_id=request.meeting_id, + executive_summary="No transcript segments to summarize.", + key_points=[], + action_items=[], + generated_at=datetime.now(UTC), + model_version=self._model, + ), + model_name=self._model, + provider_name=self.provider_name, + tokens_used=None, + latency_ms=0.0, + ) + user_prompt = build_transcript_prompt(request) + if self._backend == CloudBackend.OPENAI: + content, tokens_used = await asyncio.to_thread(self._call_openai, user_prompt) + else: + content, tokens_used = await asyncio.to_thread(self._call_anthropic, user_prompt) + summary = parse_llm_response(content, request) + summary = Summary( + meeting_id=summary.meeting_id, + executive_summary=summary.executive_summary, + key_points=summary.key_points, + action_items=summary.action_items, + generated_at=summary.generated_at, + model_version=self._model, + ) + elapsed_ms = (time.monotonic() - start) * 1000 + return SummarizationResult( + summary=summary, + model_name=self._model, + provider_name=self.provider_name, + tokens_used=tokens_used, + latency_ms=elapsed_ms, + ) + def _call_openai(self, user_prompt: str) -> tuple[str, int | None]: + try: + client = self._get_openai_client() + except ProviderUnavailableError: + raise + try: + response = client.chat.completions.create( + model=self._model, + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.3, + response_format={"type": "json_object"}, + ) + except TimeoutError as e: + raise SummarizationTimeoutError(f"OpenAI request timed out: {e}") from e + except Exception as e: + err_str = str(e).lower() + if "api key" in err_str or "authentication" in err_str: + raise ProviderUnavailableError(f"OpenAI authentication failed: {e}") from e + if "rate limit" in err_str: + raise SummarizationTimeoutError(f"OpenAI rate limited: {e}") from e + raise InvalidResponseError(f"OpenAI error: {e}") from e + content = response.choices[0].message.content or "" + if not content: + raise InvalidResponseError("Empty response from OpenAI") + tokens_used = response.usage.total_tokens if response.usage else None + return content, tokens_used + def _call_anthropic(self, user_prompt: str) -> tuple[str, int | None]: + try: + client = self._get_anthropic_client() + except ProviderUnavailableError: + raise + try: + response = client.messages.create( + model=self._model, + max_tokens=4096, + system=SYSTEM_PROMPT, + messages=[{"role": "user", "content": user_prompt}], + ) + except TimeoutError as e: + raise SummarizationTimeoutError(f"Anthropic request timed out: {e}") from e + except Exception as e: + err_str = str(e).lower() + if "api key" in err_str or "authentication" in err_str: + raise ProviderUnavailableError(f"Anthropic authentication failed: {e}") from e + if "rate limit" in err_str: + raise SummarizationTimeoutError(f"Anthropic rate limited: {e}") from e + raise InvalidResponseError(f"Anthropic error: {e}") from e + content = "".join(block.text for block in response.content if hasattr(block, "text")) + if not content: + raise InvalidResponseError("Empty response from Anthropic") + tokens_used = None + if hasattr(response, "usage"): + tokens_used = response.usage.input_tokens + response.usage.output_tokens + return content, tokens_used +```` + +## File: src/noteflow/infrastructure/summarization/mock_provider.py +````python +from __future__ import annotations +import time +from datetime import UTC, datetime +from noteflow.domain.entities import ActionItem, KeyPoint, Summary +from noteflow.domain.summarization import ( + SummarizationRequest, + SummarizationResult, +) +class MockSummarizer: + def __init__(self, latency_ms: float = 10.0) -> None: + self._latency_ms = latency_ms + @property + def provider_name(self) -> str: + return "mock" + @property + def is_available(self) -> bool: + return True + @property + def requires_cloud_consent(self) -> bool: + return False + async def summarize(self, request: SummarizationRequest) -> SummarizationResult: + start = time.monotonic() + segment_count = request.segment_count + total_duration = request.total_duration + executive_summary = ( + f"Meeting with {segment_count} segments spanning {total_duration:.1f} seconds." + ) + key_points: list[KeyPoint] = [] + for i, segment in enumerate(request.segments[: request.max_key_points]): + text = f"{segment.text[:100]}..." if len(segment.text) > 100 else segment.text + key_points.append( + KeyPoint( + text=f"Point {i + 1}: {text}", + segment_ids=[segment.segment_id], + start_time=segment.start_time, + end_time=segment.end_time, + ) + ) + action_items: list[ActionItem] = [] + action_keywords = {"todo", "action", "will", "should", "must", "need to"} + for segment in request.segments: + text_lower = segment.text.lower() + if any(kw in text_lower for kw in action_keywords): + if len(action_items) >= request.max_action_items: + break + action_items.append( + ActionItem( + text=f"Action: {segment.text[:80]}", + assignee="", + segment_ids=[segment.segment_id], + ) + ) + summary = Summary( + meeting_id=request.meeting_id, + executive_summary=executive_summary, + key_points=key_points, + action_items=action_items, + generated_at=datetime.now(UTC), + model_version="mock-1.0", + ) + elapsed = (time.monotonic() - start) * 1000 + self._latency_ms + return SummarizationResult( + summary=summary, + model_name="mock-1.0", + provider_name=self.provider_name, + tokens_used=None, + latency_ms=elapsed, + ) +```` + +## File: src/noteflow/infrastructure/triggers/__init__.py +````python +from noteflow.infrastructure.triggers.app_audio import AppAudioProvider, AppAudioSettings +from noteflow.infrastructure.triggers.audio_activity import ( + AudioActivityProvider, + AudioActivitySettings, +) +from noteflow.infrastructure.triggers.calendar import CalendarProvider, CalendarSettings +from noteflow.infrastructure.triggers.foreground_app import ( + ForegroundAppProvider, + ForegroundAppSettings, +) +__all__ = [ + "AppAudioProvider", + "AppAudioSettings", + "AudioActivityProvider", + "AudioActivitySettings", + "CalendarProvider", + "CalendarSettings", + "ForegroundAppProvider", + "ForegroundAppSettings", +] +```` + +## File: src/noteflow/infrastructure/triggers/audio_activity.py +````python +from __future__ import annotations +import threading +import time +from collections import deque +from dataclasses import dataclass +from typing import TYPE_CHECKING +from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + from noteflow.infrastructure.audio import RmsLevelProvider +@dataclass +class AudioActivitySettings: + enabled: bool + threshold_db: float + window_seconds: float + min_active_ratio: float + min_samples: int + max_history: int + weight: float + def __post_init__(self) -> None: + if self.min_samples > self.max_history: + msg = "min_samples must be <= max_history" + raise ValueError(msg) +class AudioActivityProvider: + def __init__( + self, + level_provider: RmsLevelProvider, + settings: AudioActivitySettings, + ) -> None: + self._level_provider = level_provider + self._settings = settings + self._history: deque[tuple[float, bool]] = deque(maxlen=self._settings.max_history) + self._lock = threading.Lock() + @property + def source(self) -> TriggerSource: + return TriggerSource.AUDIO_ACTIVITY + @property + def max_weight(self) -> float: + return self._settings.weight + def update(self, frames: NDArray[np.float32], timestamp: float) -> None: + if not self._settings.enabled: + return + db = self._level_provider.get_db(frames) + is_active = db >= self._settings.threshold_db + with self._lock: + self._history.append((timestamp, is_active)) + def get_signal(self) -> TriggerSignal | None: + if not self._settings.enabled: + return None + with self._lock: + history = list(self._history) + if len(history) < self._settings.min_samples: + return None + now = time.monotonic() + cutoff = now - self._settings.window_seconds + recent = [(ts, active) for ts, active in history if ts >= cutoff] + if len(recent) < self._settings.min_samples: + return None + active_count = sum(bool(active) for _, active in recent) + ratio = active_count / len(recent) + if ratio < self._settings.min_active_ratio: + return None + return TriggerSignal(source=self.source, weight=self.max_weight) + def is_enabled(self) -> bool: + return self._settings.enabled + def clear_history(self) -> None: + with self._lock: + self._history.clear() +```` + +## File: src/noteflow/infrastructure/__init__.py +````python + +```` + +## File: src/noteflow/application/services/__init__.py +````python +from noteflow.application.services.export_service import ExportFormat, ExportService +from noteflow.application.services.meeting_service import MeetingService +from noteflow.application.services.recovery_service import RecoveryService +from noteflow.application.services.retention_service import RetentionReport, RetentionService +from noteflow.application.services.summarization_service import ( + SummarizationMode, + SummarizationService, + SummarizationServiceResult, + SummarizationServiceSettings, +) +from noteflow.application.services.trigger_service import TriggerService, TriggerServiceSettings +__all__ = [ + "ExportFormat", + "ExportService", + "MeetingService", + "RecoveryService", + "RetentionReport", + "RetentionService", + "SummarizationMode", + "SummarizationService", + "SummarizationServiceResult", + "SummarizationServiceSettings", + "TriggerService", + "TriggerServiceSettings", +] +```` + +## File: src/noteflow/client/components/playback_controls.py +````python +from __future__ import annotations +import logging +from collections.abc import Callable +from typing import TYPE_CHECKING +import flet as ft +from noteflow.infrastructure.audio import PlaybackState +from noteflow.infrastructure.export._formatting import format_timestamp +if TYPE_CHECKING: + from noteflow.client.state import AppState +logger = logging.getLogger(__name__) +class PlaybackControlsComponent: + def __init__( + self, + state: AppState, + on_position_change: Callable[[float], None] | None = None, + ) -> None: + self._state = state + self._on_position_change = on_position_change + self._active = False + self._play_btn: ft.IconButton | None = None + self._stop_btn: ft.IconButton | None = None + self._position_label: ft.Text | None = None + self._duration_label: ft.Text | None = None + self._timeline_slider: ft.Slider | None = None + self._row: ft.Row | None = None + def build(self) -> ft.Row: + self._play_btn = ft.IconButton( + icon=ft.Icons.PLAY_ARROW, + icon_color=ft.Colors.GREEN, + tooltip="Play", + on_click=self._on_play_click, + disabled=True, + ) + self._stop_btn = ft.IconButton( + icon=ft.Icons.STOP, + icon_color=ft.Colors.RED, + tooltip="Stop", + on_click=self._on_stop_click, + disabled=True, + ) + self._position_label = ft.Text("00:00", size=12, width=50) + self._duration_label = ft.Text("00:00", size=12, width=50) + self._timeline_slider = ft.Slider( + min=0, + max=100, + value=0, + expand=True, + on_change=self._on_slider_change, + disabled=True, + ) + self._row = ft.Row( + [ + self._play_btn, + self._stop_btn, + self._position_label, + self._timeline_slider, + self._duration_label, + ], + visible=False, + ) + return self._row + def set_visible(self, visible: bool) -> None: + if self._row: + self._row.visible = visible + self._state.request_update() + def load_audio(self) -> None: + buffer = self._state.session_audio_buffer + if not buffer: + logger.warning("No audio in session buffer") + return + self._state.playback.play(buffer) + self._state.playback.pause() + # Update UI state + duration = self._state.playback.total_duration + self._state.playback_position = 0.0 + self._state.run_on_ui_thread(lambda: self._update_loaded_state(duration)) + def _update_loaded_state(self, duration: float) -> None: + if self._play_btn: + self._play_btn.disabled = False + if self._stop_btn: + self._stop_btn.disabled = False + if self._timeline_slider: + self._timeline_slider.disabled = False + self._timeline_slider.max = max(duration, 0.1) + self._timeline_slider.value = 0 + if self._duration_label: + self._duration_label.value = format_timestamp(duration) + if self._position_label: + self._position_label.value = "00:00" + self.set_visible(True) + self._state.request_update() + def seek(self, position: float) -> None: + if self._state.playback.seek(position): + self._state.playback_position = position + self._state.run_on_ui_thread(self._update_position_display) + def _on_play_click(self, e: ft.ControlEvent) -> None: + playback = self._state.playback + if playback.state == PlaybackState.PLAYING: + playback.pause() + self._stop_position_updates() + self._update_play_button(playing=False) + elif playback.state == PlaybackState.PAUSED: + playback.resume() + self._start_position_updates() + self._update_play_button(playing=True) + elif buffer := self._state.session_audio_buffer: + playback.play(buffer) + self._start_position_updates() + self._update_play_button(playing=True) + def _on_stop_click(self, e: ft.ControlEvent) -> None: + self._stop_position_updates() + self._state.playback.stop() + self._state.playback_position = 0.0 + self._update_play_button(playing=False) + self._state.run_on_ui_thread(self._update_position_display) + def _on_slider_change(self, e: ft.ControlEvent) -> None: + if self._timeline_slider: + position = float(self._timeline_slider.value or 0) + self.seek(position) + def _update_play_button(self, *, playing: bool) -> None: + if self._play_btn: + if playing: + self._play_btn.icon = ft.Icons.PAUSE + self._play_btn.tooltip = "Pause" + else: + self._play_btn.icon = ft.Icons.PLAY_ARROW + self._play_btn.tooltip = "Play" + self._state.request_update() + def _start_position_updates(self) -> None: + if self._active: + return + self._active = True + self._state.playback.add_position_callback(self._on_position_update) + def _stop_position_updates(self) -> None: + if not self._active: + return + self._active = False + self._state.playback.remove_position_callback(self._on_position_update) + def _on_position_update(self, position: float) -> None: + if not self._active: + return + playback = self._state.playback + # Check if playback stopped + if playback.state == PlaybackState.STOPPED: + self._active = False + self._state.playback.remove_position_callback(self._on_position_update) + self._state.run_on_ui_thread(self._on_playback_finished) + return + # Update position state + self._state.playback_position = position + self._state.run_on_ui_thread(self._update_position_display) + # Notify external callback + if self._on_position_change: + try: + self._on_position_change(position) + except Exception as e: + logger.error("Position change callback error: %s", e) + def _update_position_display(self) -> None: + position = self._state.playback_position + if self._position_label: + self._position_label.value = format_timestamp(position) + if self._timeline_slider and not self._timeline_slider.disabled: + # Only update if user isn't dragging + self._timeline_slider.value = position + self._state.request_update() + def _on_playback_finished(self) -> None: + self._update_play_button(playing=False) + self._state.playback_position = 0.0 + self._update_position_display() +```` + +## File: src/noteflow/client/state.py +````python +from __future__ import annotations +import logging +from collections.abc import Callable +from dataclasses import dataclass, field +import flet as ft +from noteflow.domain.entities import Summary +from noteflow.domain.triggers import TriggerDecision +from noteflow.grpc.client import AnnotationInfo, MeetingInfo, ServerInfo, TranscriptSegment +from noteflow.infrastructure.audio import ( + RmsLevelProvider, + SoundDevicePlayback, + TimestampedAudio, +) +logger = logging.getLogger(__name__) +OnTranscriptCallback = Callable[[TranscriptSegment], None] +OnConnectionCallback = Callable[[bool, str], None] +@dataclass +class AppState: + server_address: str = "localhost:50051" + connected: bool = False + server_info: ServerInfo | None = None + recording: bool = False + current_meeting: MeetingInfo | None = None + recording_start_time: float | None = None + elapsed_seconds: int = 0 + level_provider: RmsLevelProvider = field(default_factory=RmsLevelProvider) + current_db_level: float = -60.0 + transcript_segments: list[TranscriptSegment] = field(default_factory=list) + current_partial_text: str = "" + playback: SoundDevicePlayback = field(default_factory=SoundDevicePlayback) + playback_position: float = 0.0 + session_audio_buffer: list[TimestampedAudio] = field(default_factory=list) + highlighted_segment_index: int | None = None + annotations: list[AnnotationInfo] = field(default_factory=list) + meetings: list[MeetingInfo] = field(default_factory=list) + selected_meeting: MeetingInfo | None = None + trigger_enabled: bool = True + trigger_pending: bool = False + trigger_decision: TriggerDecision | None = None + current_summary: Summary | None = None + summary_loading: bool = False + summary_error: str | None = None + _page: ft.Page | None = field(default=None, repr=False) + def set_page(self, page: ft.Page) -> None: + self._page = page + def request_update(self) -> None: + if self._page: + self._page.update() + def run_on_ui_thread(self, callback: Callable[[], None]) -> None: + if not self._page: + return + try: + if hasattr(self._page, "run_task"): + async def _run() -> None: + callback() + self._page.run_task(_run) + else: + self._page.run_thread(callback) + except Exception as e: + logger.error("UI thread callback error: %s", e) + def clear_transcript(self) -> None: + self.transcript_segments.clear() + self.current_partial_text = "" + def reset_recording_state(self) -> None: + self.recording = False + self.current_meeting = None + self.recording_start_time = None + self.elapsed_seconds = 0 + def clear_session_audio(self) -> None: + self.session_audio_buffer.clear() + self.playback_position = 0.0 + def find_segment_at_position(self, position: float) -> int | None: + segments = self.transcript_segments + if not segments: + return None + left, right = 0, len(segments) - 1 + while left <= right: + mid = (left + right) // 2 + segment = segments[mid] + if segment.start_time <= position <= segment.end_time: + return mid + if position < segment.start_time: + right = mid - 1 + else: + left = mid + 1 + return None +```` + +## File: src/noteflow/config/__init__.py +````python +from .constants import DEFAULT_GRPC_PORT, DEFAULT_SAMPLE_RATE, MAX_GRPC_MESSAGE_SIZE +from .settings import Settings, TriggerSettings, get_settings, get_trigger_settings +__all__ = [ + "DEFAULT_GRPC_PORT", + "DEFAULT_SAMPLE_RATE", + "MAX_GRPC_MESSAGE_SIZE", + "Settings", + "TriggerSettings", + "get_settings", + "get_trigger_settings", +] +```` + +## File: src/noteflow/domain/triggers/entities.py +````python +from __future__ import annotations +import time +from dataclasses import dataclass, field +from enum import Enum +class TriggerSource(Enum): + AUDIO_ACTIVITY = "audio_activity" + FOREGROUND_APP = "foreground_app" + CALENDAR = "calendar" +class TriggerAction(Enum): + IGNORE = "ignore" + NOTIFY = "notify" + AUTO_START = "auto_start" +@dataclass(frozen=True) +class TriggerSignal: + source: TriggerSource + weight: float + app_name: str | None = None + timestamp: float = field(default_factory=time.monotonic) + def __post_init__(self) -> None: + if not 0.0 <= self.weight <= 1.0: + msg = f"Weight must be 0.0-1.0, got {self.weight}" + raise ValueError(msg) +@dataclass(frozen=True) +class TriggerDecision: + action: TriggerAction + confidence: float + signals: tuple[TriggerSignal, ...] + timestamp: float = field(default_factory=time.monotonic) + @property + def primary_signal(self) -> TriggerSignal | None: + return max(self.signals, key=lambda s: s.weight) if self.signals else None + @property + def detected_app(self) -> str | None: + return next((signal.app_name for signal in self.signals if signal.app_name), None) +```` + ## File: src/noteflow/grpc/proto/noteflow_pb2.pyi ```` from google.protobuf.internal import containers as _containers @@ -15161,66 +9418,30 @@ class DiarizationJobStatus(_message.Message): ## File: src/noteflow/grpc/meeting_store.py ````python -"""In-memory meeting storage for the NoteFlow gRPC server. - -Provides thread-safe in-memory storage using domain entities directly. -Used as fallback when no database is configured. -""" - from __future__ import annotations - import threading from typing import TYPE_CHECKING - from noteflow.domain.entities import Meeting, Segment, Summary from noteflow.domain.value_objects import MeetingState - if TYPE_CHECKING: from collections.abc import Sequence from datetime import datetime - - class MeetingStore: - """Thread-safe in-memory meeting storage using domain entities.""" - def __init__(self) -> None: - """Initialize the store.""" self._meetings: dict[str, Meeting] = {} self._lock = threading.RLock() - def create( self, title: str = "", metadata: dict[str, str] | None = None, ) -> Meeting: - """Create a new meeting. - - Args: - title: Optional meeting title. - metadata: Optional metadata. - - Returns: - Created meeting. - """ meeting = Meeting.create(title=title or "Untitled Meeting", metadata=metadata or {}) - with self._lock: self._meetings[str(meeting.id)] = meeting - return meeting - def get(self, meeting_id: str) -> Meeting | None: - """Get a meeting by ID. - - Args: - meeting_id: Meeting ID string. - - Returns: - Meeting or None if not found. - """ with self._lock: return self._meetings.get(meeting_id) - def list_all( self, states: Sequence[MeetingState] | None = None, @@ -15228,153 +9449,62 @@ class MeetingStore: offset: int = 0, sort_desc: bool = True, ) -> tuple[list[Meeting], int]: - """List meetings with optional filtering. - - Args: - states: Optional list of states to filter by. - limit: Maximum number of meetings to return. - offset: Number of meetings to skip. - sort_desc: Sort by created_at descending if True. - - Returns: - Tuple of (meetings list, total count). - """ with self._lock: meetings = list(self._meetings.values()) - - # Filter by state if states: state_set = set(states) meetings = [m for m in meetings if m.state in state_set] - total = len(meetings) - - # Sort meetings.sort(key=lambda m: m.created_at, reverse=sort_desc) - - # Paginate meetings = meetings[offset : offset + limit] - return meetings, total - def update(self, meeting: Meeting) -> Meeting: - """Update a meeting in the store. - - Args: - meeting: Meeting with updated fields. - - Returns: - Updated meeting. - """ with self._lock: self._meetings[str(meeting.id)] = meeting return meeting - def add_segment(self, meeting_id: str, segment: Segment) -> Meeting | None: - """Add a segment to a meeting. - - Args: - meeting_id: Meeting ID. - segment: Segment to add. - - Returns: - Updated meeting or None if not found. - """ with self._lock: meeting = self._meetings.get(meeting_id) if meeting is None: return None - meeting.add_segment(segment) return meeting - def set_summary(self, meeting_id: str, summary: Summary) -> Meeting | None: - """Set meeting summary. - - Args: - meeting_id: Meeting ID. - summary: Summary to set. - - Returns: - Updated meeting or None if not found. - """ with self._lock: meeting = self._meetings.get(meeting_id) if meeting is None: return None - meeting.summary = summary return meeting - def update_state(self, meeting_id: str, state: MeetingState) -> bool: - """Atomically update meeting state. - - Args: - meeting_id: Meeting ID. - state: New state. - - Returns: - True if updated, False if meeting not found. - """ with self._lock: meeting = self._meetings.get(meeting_id) if meeting is None: return False meeting.state = state return True - def update_title(self, meeting_id: str, title: str) -> bool: - """Atomically update meeting title. - - Args: - meeting_id: Meeting ID. - title: New title. - - Returns: - True if updated, False if meeting not found. - """ with self._lock: meeting = self._meetings.get(meeting_id) if meeting is None: return False meeting.title = title return True - def update_end_time(self, meeting_id: str, end_time: datetime) -> bool: - """Atomically update meeting end time. - - Args: - meeting_id: Meeting ID. - end_time: New end time. - - Returns: - True if updated, False if meeting not found. - """ with self._lock: meeting = self._meetings.get(meeting_id) if meeting is None: return False meeting.end_time = end_time return True - def delete(self, meeting_id: str) -> bool: - """Delete a meeting. - - Args: - meeting_id: Meeting ID. - - Returns: - True if deleted, False if not found. - """ with self._lock: if meeting_id in self._meetings: del self._meetings[meeting_id] return True return False - @property def active_count(self) -> int: - """Count of meetings in RECORDING or STOPPING state.""" with self._lock: return sum( m.state in (MeetingState.RECORDING, MeetingState.STOPPING) @@ -15384,20 +9514,15 @@ class MeetingStore: ## File: src/noteflow/grpc/server.py ````python -"""NoteFlow gRPC server entry point (async).""" - from __future__ import annotations - import argparse import asyncio import logging import signal import time -from typing import TYPE_CHECKING, Final - +from typing import TYPE_CHECKING, Any, Final import grpc.aio from pydantic import ValidationError - from noteflow.application.services import RecoveryService from noteflow.application.services.summarization_service import SummarizationService from noteflow.config.settings import get_settings @@ -15407,22 +9532,14 @@ from noteflow.infrastructure.diarization import DiarizationEngine from noteflow.infrastructure.persistence.database import create_async_session_factory from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork from noteflow.infrastructure.summarization import create_summarization_service - from .proto import noteflow_pb2_grpc from .service import NoteFlowServicer - if TYPE_CHECKING: from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker - logger = logging.getLogger(__name__) - DEFAULT_PORT: Final[int] = 50051 DEFAULT_MODEL: Final[str] = "base" - - class NoteFlowServer: - """Async gRPC server for NoteFlow.""" - def __init__( self, port: int = DEFAULT_PORT, @@ -15432,18 +9549,8 @@ class NoteFlowServer: session_factory: async_sessionmaker[AsyncSession] | None = None, summarization_service: SummarizationService | None = None, diarization_engine: DiarizationEngine | None = None, + diarization_refinement_enabled: bool = True, ) -> None: - """Initialize the server. - - Args: - port: Port to listen on. - asr_model: ASR model size. - asr_device: Device for ASR ("cpu" or "cuda"). - asr_compute_type: ASR compute type. - session_factory: Optional async session factory for database. - summarization_service: Optional summarization service for generating summaries. - diarization_engine: Optional diarization engine for speaker identification. - """ self._port = port self._asr_model = asr_model self._asr_device = asr_device @@ -15451,14 +9558,11 @@ class NoteFlowServer: self._session_factory = session_factory self._summarization_service = summarization_service self._diarization_engine = diarization_engine + self._diarization_refinement_enabled = diarization_refinement_enabled self._server: grpc.aio.Server | None = None self._servicer: NoteFlowServicer | None = None - async def start(self) -> None: - """Start the async gRPC server.""" logger.info("Starting NoteFlow gRPC server (async)...") - - # Create ASR engine logger.info( "Loading ASR model '%s' on %s (%s)...", self._asr_model, @@ -15466,68 +9570,45 @@ class NoteFlowServer: self._asr_compute_type, ) start_time = time.perf_counter() - asr_engine = FasterWhisperEngine( compute_type=self._asr_compute_type, device=self._asr_device, ) asr_engine.load_model(self._asr_model) - load_time = time.perf_counter() - start_time logger.info("ASR model loaded in %.2f seconds", load_time) - - # Lazy-create summarization service if not provided if self._summarization_service is None: self._summarization_service = create_summarization_service() logger.info("Summarization service initialized (default factory)") - - # Create servicer with session factory, summarization, and diarization self._servicer = NoteFlowServicer( asr_engine=asr_engine, session_factory=self._session_factory, summarization_service=self._summarization_service, diarization_engine=self._diarization_engine, + diarization_refinement_enabled=self._diarization_refinement_enabled, ) - - # Create async gRPC server self._server = grpc.aio.server( options=[ - ("grpc.max_send_message_length", 100 * 1024 * 1024), # 100MB + ("grpc.max_send_message_length", 100 * 1024 * 1024), ("grpc.max_receive_message_length", 100 * 1024 * 1024), ], ) - - # Register service noteflow_pb2_grpc.add_NoteFlowServiceServicer_to_server( self._servicer, self._server, ) - - # Bind to port address = f"[::]:{self._port}" self._server.add_insecure_port(address) - - # Start server await self._server.start() logger.info("Server listening on %s", address) - async def stop(self, grace_period: float = 5.0) -> None: - """Stop the server gracefully. - - Args: - grace_period: Time to wait for in-flight RPCs. - """ if self._server: logger.info("Stopping server (grace period: %.1fs)...", grace_period) await self._server.stop(grace_period) logger.info("Server stopped") - async def wait_for_termination(self) -> None: - """Block until server is terminated.""" if self._server: await self._server.wait_for_termination() - - async def run_server( port: int, asr_model: str, @@ -15537,27 +9618,16 @@ async def run_server( diarization_enabled: bool = False, diarization_hf_token: str | None = None, diarization_device: str = "auto", + diarization_streaming_latency: float | None = None, + diarization_min_speakers: int | None = None, + diarization_max_speakers: int | None = None, + diarization_refinement_enabled: bool = True, ) -> None: - """Run the async gRPC server. - - Args: - port: Port to listen on. - asr_model: ASR model size. - asr_device: Device for ASR. - asr_compute_type: ASR compute type. - database_url: Optional database URL for persistence. - diarization_enabled: Whether to enable speaker diarization. - diarization_hf_token: HuggingFace token for pyannote models. - diarization_device: Device for diarization ("auto", "cpu", "cuda", "mps"). - """ - # Create session factory if database URL provided session_factory = None if database_url: logger.info("Connecting to database...") session_factory = create_async_session_factory(database_url) logger.info("Database connection pool ready") - - # Run crash recovery on startup uow = SqlAlchemyUnitOfWork(session_factory) recovery_service = RecoveryService(uow) recovered = await recovery_service.recover_crashed_meetings() @@ -15566,12 +9636,8 @@ async def run_server( "Recovered %d crashed meetings on startup", len(recovered), ) - - # Create summarization service - auto-detects LOCAL/MOCK providers summarization_service = create_summarization_service() logger.info("Summarization service initialized") - - # Create diarization engine if enabled diarization_engine: DiarizationEngine | None = None if diarization_enabled: if not diarization_hf_token: @@ -15581,12 +9647,18 @@ async def run_server( ) else: logger.info("Initializing diarization engine on %s...", diarization_device) - diarization_engine = DiarizationEngine( - device=diarization_device, - hf_token=diarization_hf_token, - ) + diarization_kwargs: dict[str, Any] = { + "device": diarization_device, + "hf_token": diarization_hf_token, + } + if diarization_streaming_latency is not None: + diarization_kwargs["streaming_latency"] = diarization_streaming_latency + if diarization_min_speakers is not None: + diarization_kwargs["min_speakers"] = diarization_min_speakers + if diarization_max_speakers is not None: + diarization_kwargs["max_speakers"] = diarization_max_speakers + diarization_engine = DiarizationEngine(**diarization_kwargs) logger.info("Diarization engine initialized (models loaded on demand)") - server = NoteFlowServer( port=port, asr_model=asr_model, @@ -15595,19 +9667,15 @@ async def run_server( session_factory=session_factory, summarization_service=summarization_service, diarization_engine=diarization_engine, + diarization_refinement_enabled=diarization_refinement_enabled, ) - - # Set up graceful shutdown loop = asyncio.get_running_loop() shutdown_event = asyncio.Event() - def signal_handler() -> None: logger.info("Received shutdown signal...") shutdown_event.set() - for sig in (signal.SIGINT, signal.SIGTERM): loop.add_signal_handler(sig, signal_handler) - try: await server.start() print(f"\nNoteFlow server running on port {port}") @@ -15621,15 +9689,10 @@ async def run_server( else: print("Diarization: Disabled") print("Press Ctrl+C to stop\n") - - # Wait for shutdown signal or server termination await shutdown_event.wait() finally: await server.stop() - - def main() -> None: - """Entry point for NoteFlow gRPC server.""" parser = argparse.ArgumentParser(description="NoteFlow gRPC Server") parser.add_argument( "-p", @@ -15693,40 +9756,39 @@ def main() -> None: help="Device for diarization (default: auto)", ) args = parser.parse_args() - - # Configure logging log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig( level=log_level, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) - - # Get settings try: settings = get_settings() except (OSError, ValueError, ValidationError) as exc: logger.warning("Failed to load settings: %s", exc) settings = None - - # Get database URL from args or settings database_url = args.database_url if not database_url and settings: database_url = str(settings.database_url) if not database_url: logger.warning("No database URL configured, running in-memory mode") - - # Get diarization config from args or settings diarization_enabled = args.diarization diarization_hf_token = args.diarization_hf_token diarization_device = args.diarization_device + diarization_streaming_latency: float | None = None + diarization_min_speakers: int | None = None + diarization_max_speakers: int | None = None + diarization_refinement_enabled = True if settings and not diarization_enabled: diarization_enabled = settings.diarization_enabled if settings and not diarization_hf_token: diarization_hf_token = settings.diarization_hf_token if settings and diarization_device == "auto": diarization_device = settings.diarization_device - - # Run server + if settings: + diarization_streaming_latency = settings.diarization_streaming_latency + diarization_min_speakers = settings.diarization_min_speakers + diarization_max_speakers = settings.diarization_max_speakers + diarization_refinement_enabled = settings.diarization_refinement_enabled asyncio.run( run_server( port=args.port, @@ -15737,103 +9799,61 @@ def main() -> None: diarization_enabled=diarization_enabled, diarization_hf_token=diarization_hf_token, diarization_device=diarization_device, + diarization_streaming_latency=diarization_streaming_latency, + diarization_min_speakers=diarization_min_speakers, + diarization_max_speakers=diarization_max_speakers, + diarization_refinement_enabled=diarization_refinement_enabled, ) ) - - if __name__ == "__main__": main() ```` ## File: src/noteflow/infrastructure/asr/segmenter.py ````python -"""Audio segmenter with VAD-driven state machine. - -Manages speech segment boundaries using Voice Activity Detection. -""" - from __future__ import annotations - from dataclasses import dataclass, field from enum import Enum, auto from typing import TYPE_CHECKING - import numpy as np from numpy.typing import NDArray - +from noteflow.config.constants import DEFAULT_SAMPLE_RATE if TYPE_CHECKING: from collections.abc import Iterator - - class SegmenterState(Enum): - """Segmenter state machine states.""" - - IDLE = auto() # Waiting for speech - SPEECH = auto() # Speech detected, accumulating audio - TRAILING = auto() # Speech ended, collecting trailing audio - - + IDLE = auto() + SPEECH = auto() + TRAILING = auto() @dataclass class SegmenterConfig: - """Configuration for segmenter behavior.""" - - # Minimum speech duration to consider valid (seconds) min_speech_duration: float = 0.3 - # Maximum segment duration before forced split (seconds) max_segment_duration: float = 30.0 - # Trailing silence to include after speech ends (seconds) trailing_silence: float = 0.5 - # Leading audio to include before speech starts (seconds) leading_buffer: float = 0.2 - # Sample rate for audio processing - sample_rate: int = 16000 - - + sample_rate: int = DEFAULT_SAMPLE_RATE @dataclass class AudioSegment: - """A completed audio segment ready for transcription.""" - audio: NDArray[np.float32] start_time: float end_time: float - @property def duration(self) -> float: - """Segment duration in seconds.""" return self.end_time - self.start_time - - @dataclass class Segmenter: - """VAD-driven audio segmenter with state machine. - - Accumulates audio during speech and emits complete segments - when speech ends or max duration is reached. - """ - config: SegmenterConfig = field(default_factory=SegmenterConfig) - - # State machine _state: SegmenterState = field(default=SegmenterState.IDLE, init=False) - - # Timing tracking _stream_time: float = field(default=0.0, init=False) _speech_start_time: float = field(default=0.0, init=False) _leading_duration: float = field(default=0.0, init=False) - - # Audio buffers _leading_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False) _speech_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False) _trailing_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False) _trailing_duration: float = field(default=0.0, init=False) - @property def state(self) -> SegmenterState: - """Get current segmenter state.""" return self._state - def reset(self) -> None: - """Reset segmenter to initial state.""" self._state = SegmenterState.IDLE self._stream_time = 0.0 self._speech_start_time = 0.0 @@ -15842,72 +9862,43 @@ class Segmenter: self._speech_buffer.clear() self._trailing_buffer.clear() self._trailing_duration = 0.0 - def process_audio( self, audio: NDArray[np.float32], is_speech: bool, ) -> Iterator[AudioSegment]: - """Process audio chunk with VAD decision. - - Args: - audio: Audio samples (float32, mono). - is_speech: VAD decision for this chunk. - - Yields: - Complete AudioSegment when speech ends or max duration reached. - """ chunk_duration = len(audio) / self.config.sample_rate chunk_start = self._stream_time self._stream_time += chunk_duration - if self._state == SegmenterState.IDLE: yield from self._handle_idle(audio, is_speech, chunk_start) elif self._state == SegmenterState.SPEECH: yield from self._handle_speech(audio, is_speech, chunk_start, chunk_duration) elif self._state == SegmenterState.TRAILING: yield from self._handle_trailing(audio, is_speech, chunk_start, chunk_duration) - def flush(self) -> AudioSegment | None: - """Flush any pending audio as a segment. - - Call when stream ends to get final segment. - - Returns: - Remaining audio segment if valid, None otherwise. - """ if self._state in (SegmenterState.SPEECH, SegmenterState.TRAILING): segment = self._emit_segment() self._state = SegmenterState.IDLE return segment return None - def _handle_idle( self, audio: NDArray[np.float32], is_speech: bool, chunk_start: float, ) -> Iterator[AudioSegment]: - """Handle audio in IDLE state.""" if is_speech: - # Speech started - transition to SPEECH state self._state = SegmenterState.SPEECH self._speech_start_time = chunk_start - - # Capture how much pre-speech audio we are including. leading_samples = sum(len(chunk) for chunk in self._leading_buffer) self._leading_duration = leading_samples / self.config.sample_rate - - # Include leading buffer (pre-speech audio) self._speech_buffer = list(self._leading_buffer) self._speech_buffer.append(audio) self._leading_buffer.clear() else: - # Still idle - maintain leading buffer self._update_leading_buffer(audio) - - yield from () # No segments emitted in IDLE - + yield from () def _handle_speech( self, audio: NDArray[np.float32], @@ -15915,34 +9906,25 @@ class Segmenter: chunk_start: float, chunk_duration: float, ) -> Iterator[AudioSegment]: - """Handle audio in SPEECH state.""" if is_speech: self._speech_buffer.append(audio) current_duration = self._stream_time - self._speech_start_time - - # Check max duration limit if current_duration >= self.config.max_segment_duration: segment = self._emit_segment() if segment is not None: yield segment - # Start a fresh segment at the end of this chunk self._speech_start_time = self._stream_time self._leading_duration = 0.0 self._speech_buffer = [] else: - # Speech ended - transition to TRAILING - # Start trailing buffer with this silent chunk self._state = SegmenterState.TRAILING self._trailing_buffer = [audio] self._trailing_duration = chunk_duration - - # Check if already past trailing threshold if self._trailing_duration >= self.config.trailing_silence: segment = self._emit_segment() if segment is not None: yield segment self._state = SegmenterState.IDLE - def _handle_trailing( self, audio: NDArray[np.float32], @@ -15950,59 +9932,37 @@ class Segmenter: chunk_start: float, chunk_duration: float, ) -> Iterator[AudioSegment]: - """Handle audio in TRAILING state.""" if is_speech: - # Speech resumed - merge trailing back and continue self._speech_buffer.extend(self._trailing_buffer) self._speech_buffer.append(audio) self._trailing_buffer.clear() self._trailing_duration = 0.0 self._state = SegmenterState.SPEECH else: - # Still silence - accumulate trailing self._trailing_buffer.append(audio) self._trailing_duration += chunk_duration - if self._trailing_duration >= self.config.trailing_silence: - # Enough trailing silence - emit segment segment = self._emit_segment() if segment is not None: yield segment self._state = SegmenterState.IDLE - def _update_leading_buffer(self, audio: NDArray[np.float32]) -> None: - """Maintain rolling leading buffer.""" self._leading_buffer.append(audio) - - # Calculate total buffer duration total_samples = sum(len(chunk) for chunk in self._leading_buffer) total_duration = total_samples / self.config.sample_rate - - # Trim to configured leading buffer size while total_duration > self.config.leading_buffer and self._leading_buffer: removed = self._leading_buffer.pop(0) total_samples -= len(removed) total_duration = total_samples / self.config.sample_rate - def _emit_segment(self) -> AudioSegment | None: - """Create and emit completed segment.""" - # Combine speech + trailing audio all_audio = self._speech_buffer + self._trailing_buffer - - # Calculate actual start time (account for leading buffer) actual_start = max(0.0, self._speech_start_time - self._leading_duration) - - # Concatenate audio audio = np.concatenate(all_audio) if all_audio else np.array([], dtype=np.float32) - - # If we only have silence/trailing audio, don't emit a segment. if not self._speech_buffer: self._trailing_buffer.clear() self._trailing_duration = 0.0 self._leading_duration = 0.0 return None - - # Check minimum speech duration (excluding leading buffer) speech_samples = sum(len(chunk) for chunk in self._speech_buffer) speech_duration = speech_samples / self.config.sample_rate if speech_duration < self.config.min_speech_duration: @@ -16011,105 +9971,48 @@ class Segmenter: self._trailing_duration = 0.0 self._leading_duration = 0.0 return None - segment = AudioSegment( audio=audio, start_time=actual_start, end_time=self._stream_time, ) - - # Clear buffers self._speech_buffer.clear() self._trailing_buffer.clear() self._trailing_duration = 0.0 self._leading_duration = 0.0 - return segment ```` ## File: src/noteflow/infrastructure/asr/streaming_vad.py ````python -"""Streaming Voice Activity Detection. - -Provides real-time speech detection for audio streams. -""" - from __future__ import annotations - from dataclasses import dataclass, field from typing import TYPE_CHECKING, Protocol - +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.audio import compute_rms - if TYPE_CHECKING: import numpy as np from numpy.typing import NDArray - - class VadEngine(Protocol): - """Protocol for VAD engine implementations.""" - def process(self, audio: NDArray[np.float32]) -> bool: - """Process audio chunk and return speech detection result. - - Args: - audio: Audio samples (float32, mono). - - Returns: - True if speech detected, False otherwise. - """ ... - def reset(self) -> None: - """Reset VAD state.""" ... - - @dataclass class EnergyVadConfig: - """Configuration for energy-based VAD.""" - - # Speech detection threshold (RMS energy) speech_threshold: float = 0.01 - # Silence threshold (lower than speech for hysteresis) silence_threshold: float = 0.005 - # Minimum consecutive speech frames to confirm speech min_speech_frames: int = 2 - # Minimum consecutive silence frames to confirm silence min_silence_frames: int = 3 - - @dataclass class EnergyVad: - """Simple energy-based Voice Activity Detection. - - Uses RMS energy with hysteresis for robust detection. - Suitable for clean audio; use silero-vad for noisy environments. - """ - config: EnergyVadConfig = field(default_factory=EnergyVadConfig) - - # Internal state _is_speech: bool = field(default=False, init=False) _speech_frame_count: int = field(default=0, init=False) _silence_frame_count: int = field(default=0, init=False) - def process(self, audio: NDArray[np.float32]) -> bool: - """Process audio chunk and detect speech. - - Uses RMS energy with hysteresis to detect speech. - State transitions require consecutive frames above/below threshold. - - Args: - audio: Audio samples (float32, mono, normalized to [-1, 1]). - - Returns: - True if speech detected, False for silence. - """ energy = compute_rms(audio) - if self._is_speech: - # Currently in speech - check for silence if energy < self.config.silence_threshold: self._silence_frame_count += 1 self._speech_frame_count = 0 @@ -16124,271 +10027,118 @@ class EnergyVad: self._is_speech = True else: self._speech_frame_count = 0 - return self._is_speech - def reset(self) -> None: - """Reset VAD state to initial values.""" self._is_speech = False self._speech_frame_count = 0 self._silence_frame_count = 0 - - @dataclass class StreamingVad: - """Streaming VAD wrapper with configurable backend. - - Wraps VAD engines to provide a unified streaming interface. - """ - engine: VadEngine = field(default_factory=EnergyVad) - sample_rate: int = 16000 - + sample_rate: int = DEFAULT_SAMPLE_RATE def process_chunk(self, audio: NDArray[np.float32]) -> bool: - """Process audio chunk through VAD engine. - - Args: - audio: Audio samples (float32, mono). - - Returns: - True if speech detected, False otherwise. - """ return self.engine.process(audio) - def reset(self) -> None: - """Reset VAD state.""" self.engine.reset() ```` -## File: src/noteflow/infrastructure/audio/levels.py +## File: src/noteflow/infrastructure/audio/__init__.py ````python -"""Audio level computation implementation. - -Provide RMS and dB level calculation for VU meter display. -""" - -from __future__ import annotations - -import math -from typing import Final - -import numpy as np -from numpy.typing import NDArray - - -def compute_rms(frames: NDArray[np.float32]) -> float: - """Calculate Root Mean Square of audio samples. - - Args: - frames: Audio samples as float32 array. - - Returns: - RMS level as float (0.0 for empty array). - """ - if len(frames) == 0: - return 0.0 - # Use float64 for precision during squaring to avoid overflow - return float(np.sqrt(np.mean(frames.astype(np.float64) ** 2))) - - -class RmsLevelProvider: - """RMS-based audio level provider. - - Compute RMS (Root Mean Square) level from audio frames for VU meter display. - """ - - # Minimum dB value to report (silence threshold) - MIN_DB: Final[float] = -60.0 - - def get_rms(self, frames: NDArray[np.float32]) -> float: - """Calculate RMS level from audio frames. - - Args: - frames: Audio samples as float32 array (normalized -1.0 to 1.0). - - Returns: - RMS level normalized to 0.0-1.0 range. - """ - rms = compute_rms(frames) - # Clamp to 0.0-1.0 range for VU meter display - return min(1.0, max(0.0, rms)) - - def get_db(self, frames: NDArray[np.float32]) -> float: - """Calculate dB level from audio frames. - - Args: - frames: Audio samples as float32 array (normalized -1.0 to 1.0). - - Returns: - Level in dB (MIN_DB to 0 range). - """ - rms = self.get_rms(frames) - - if rms <= 0: - return self.MIN_DB - - # Convert to dB: 20 * log10(rms) - db = 20.0 * math.log10(rms) - - # Clamp to MIN_DB to 0 range - return max(self.MIN_DB, min(0.0, db)) - - def rms_to_db(self, rms: float) -> float: - """Convert RMS value to dB. - - Args: - rms: RMS level (0.0-1.0). - - Returns: - Level in dB (MIN_DB to 0 range). - """ - if rms <= 0: - return self.MIN_DB - - db = 20.0 * math.log10(rms) - return max(self.MIN_DB, min(0.0, db)) - - def db_to_rms(self, db: float) -> float: - """Convert dB value to RMS. - - Args: - db: Level in dB. - - Returns: - RMS level (0.0-1.0). - """ - return 0.0 if db <= self.MIN_DB else 10.0 ** (db / 20.0) +from noteflow.infrastructure.audio.capture import SoundDeviceCapture +from noteflow.infrastructure.audio.dto import ( + AudioDeviceInfo, + AudioFrameCallback, + TimestampedAudio, +) +from noteflow.infrastructure.audio.levels import RmsLevelProvider, compute_rms +from noteflow.infrastructure.audio.playback import PlaybackState, SoundDevicePlayback +from noteflow.infrastructure.audio.protocols import ( + AudioCapture, + AudioLevelProvider, + AudioPlayback, + RingBuffer, +) +from noteflow.infrastructure.audio.reader import MeetingAudioReader +from noteflow.infrastructure.audio.ring_buffer import TimestampedRingBuffer +from noteflow.infrastructure.audio.writer import MeetingAudioWriter +__all__ = [ + "AudioCapture", + "AudioDeviceInfo", + "AudioFrameCallback", + "AudioLevelProvider", + "AudioPlayback", + "MeetingAudioReader", + "MeetingAudioWriter", + "PlaybackState", + "RingBuffer", + "RmsLevelProvider", + "SoundDeviceCapture", + "SoundDevicePlayback", + "TimestampedAudio", + "TimestampedRingBuffer", + "compute_rms", +] ```` ## File: src/noteflow/infrastructure/audio/reader.py ````python -"""Read encrypted audio from archived meetings. - -Mirror of MeetingAudioWriter - reads encrypted PCM16 chunks and converts to float32. -Reuses ChunkedAssetReader from security/crypto.py for decryption. -""" - from __future__ import annotations - import json import logging from pathlib import Path from typing import TYPE_CHECKING - import numpy as np - +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.audio.dto import TimestampedAudio from noteflow.infrastructure.security.crypto import ChunkedAssetReader - if TYPE_CHECKING: from noteflow.infrastructure.security.crypto import AesGcmCryptoBox - logger = logging.getLogger(__name__) - - class MeetingAudioReader: - """Read audio chunks from encrypted meeting file. - - Mirror of MeetingAudioWriter - handles manifest parsing, DEK unwrapping, - and encrypted audio decryption. - - Directory structure (as created by MeetingAudioWriter): - ~/.noteflow/meetings// - ├── manifest.json # Meeting metadata + wrapped DEK - └── audio.enc # Encrypted PCM16 chunks (NFAE format) - """ - def __init__( self, crypto: AesGcmCryptoBox, meetings_dir: Path, ) -> None: - """Initialize audio reader. - - Args: - crypto: CryptoBox instance for decryption and DEK unwrapping. - meetings_dir: Root directory for all meetings (e.g., ~/.noteflow/meetings). - """ self._crypto = crypto self._meetings_dir = meetings_dir self._meeting_dir: Path | None = None - self._sample_rate: int = 16000 - + self._sample_rate: int = DEFAULT_SAMPLE_RATE def load_meeting_audio( self, meeting_id: str, ) -> list[TimestampedAudio]: - """Load all audio from an archived meeting. - - Reads manifest, unwraps DEK, decrypts audio chunks, converts to float32. - - Args: - meeting_id: Meeting UUID string. - - Returns: - List of TimestampedAudio chunks (or empty list if not found/failed). - - Raises: - FileNotFoundError: If meeting directory or audio file not found. - ValueError: If manifest is invalid or audio format unsupported. - """ meeting_dir = self._meetings_dir / meeting_id self._meeting_dir = meeting_dir - - # Load and parse manifest manifest_path = meeting_dir / "manifest.json" if not manifest_path.exists(): raise FileNotFoundError(f"Manifest not found: {manifest_path}") - manifest = json.loads(manifest_path.read_text()) - self._sample_rate = manifest.get("sample_rate", 16000) + self._sample_rate = manifest.get("sample_rate", DEFAULT_SAMPLE_RATE) wrapped_dek_hex = manifest.get("wrapped_dek") - if not wrapped_dek_hex: raise ValueError("Manifest missing wrapped_dek") - - # Unwrap DEK wrapped_dek = bytes.fromhex(wrapped_dek_hex) dek = self._crypto.unwrap_dek(wrapped_dek) - - # Open encrypted audio file audio_path = meeting_dir / "audio.enc" if not audio_path.exists(): raise FileNotFoundError(f"Audio file not found: {audio_path}") - reader = ChunkedAssetReader(self._crypto) reader.open(audio_path, dek) - try: return self._read_all_chunks(reader) finally: reader.close() - def _read_all_chunks( self, reader: ChunkedAssetReader, ) -> list[TimestampedAudio]: - """Read and convert all audio chunks. - - Args: - reader: Open ChunkedAssetReader. - - Returns: - List of TimestampedAudio chunks. - """ chunks: list[TimestampedAudio] = [] current_time = 0.0 - for chunk_bytes in reader.read_chunks(): - # Convert PCM16 bytes back to int16 array pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16) - - # Convert int16 [-32768, 32767] to float32 [-1.0, 1.0] audio_float = pcm16.astype(np.float32) / 32767.0 - - # Calculate duration based on sample rate duration = len(audio_float) / self._sample_rate - chunks.append( TimestampedAudio( frames=audio_float, @@ -16396,1480 +10146,587 @@ class MeetingAudioReader: duration=duration, ) ) - current_time += duration - logger.info( "Loaded audio: meeting_dir=%s, chunks=%d, total_duration=%.2fs", self._meeting_dir, len(chunks), current_time, ) - return chunks - def get_manifest(self, meeting_id: str) -> dict[str, object] | None: - """Get manifest metadata for a meeting. - - Args: - meeting_id: Meeting UUID string. - - Returns: - Manifest dict or None if not found. - """ manifest_path = self._meetings_dir / meeting_id / "manifest.json" if not manifest_path.exists(): return None - return dict(json.loads(manifest_path.read_text())) - def audio_exists(self, meeting_id: str) -> bool: - """Check if audio file exists for a meeting. - - Args: - meeting_id: Meeting UUID string. - - Returns: - True if audio.enc exists. - """ meeting_dir = self._meetings_dir / meeting_id audio_path = meeting_dir / "audio.enc" manifest_path = meeting_dir / "manifest.json" return audio_path.exists() and manifest_path.exists() - @property def sample_rate(self) -> int: - """Return the sample rate from the last loaded manifest.""" return self._sample_rate ```` -## File: src/noteflow/infrastructure/export/markdown.py +## File: src/noteflow/infrastructure/converters/orm_converters.py ````python -"""Markdown exporter implementation. - -Export meeting transcripts to Markdown format. -""" - from __future__ import annotations +from typing import TYPE_CHECKING +from noteflow.domain.entities import ( + ActionItem, + Annotation, + KeyPoint, + Meeting, + Segment, + Summary, +) +from noteflow.domain.entities import ( + WordTiming as DomainWordTiming, +) +from noteflow.domain.value_objects import ( + AnnotationId, + AnnotationType, + MeetingId, + MeetingState, +) +if TYPE_CHECKING: + from noteflow.infrastructure.persistence.models import ( + ActionItemModel, + AnnotationModel, + KeyPointModel, + MeetingModel, + SegmentModel, + SummaryModel, + WordTimingModel, + ) +class OrmConverter: + @staticmethod + def word_timing_to_domain(model: WordTimingModel) -> DomainWordTiming: + return DomainWordTiming( + word=model.word, + start_time=model.start_time, + end_time=model.end_time, + probability=model.probability, + ) + @staticmethod + def word_timing_to_orm_kwargs(word: DomainWordTiming) -> dict[str, str | float]: + return { + "word": word.word, + "start_time": word.start_time, + "end_time": word.end_time, + "probability": word.probability, + } + @staticmethod + def meeting_to_domain(model: MeetingModel) -> Meeting: + return Meeting( + id=MeetingId(model.id), + title=model.title, + state=MeetingState(model.state), + created_at=model.created_at, + started_at=model.started_at, + ended_at=model.ended_at, + metadata=model.metadata_, + wrapped_dek=model.wrapped_dek, + ) + @staticmethod + def segment_to_domain(model: SegmentModel, include_words: bool = True) -> Segment: + words: list[DomainWordTiming] = [] + if include_words: + words = [OrmConverter.word_timing_to_domain(w) for w in model.words] + embedding = list(model.embedding) if model.embedding is not None else None + return Segment( + segment_id=model.segment_id, + text=model.text, + start_time=model.start_time, + end_time=model.end_time, + meeting_id=MeetingId(model.meeting_id), + words=words, + language=model.language, + language_confidence=model.language_confidence, + avg_logprob=model.avg_logprob, + no_speech_prob=model.no_speech_prob, + embedding=embedding, + speaker_id=model.speaker_id, + speaker_confidence=model.speaker_confidence, + db_id=model.id, + ) + @staticmethod + def annotation_to_domain(model: AnnotationModel) -> Annotation: + return Annotation( + id=AnnotationId(model.annotation_id), + meeting_id=MeetingId(model.meeting_id), + annotation_type=AnnotationType(model.annotation_type), + text=model.text, + start_time=model.start_time, + end_time=model.end_time, + segment_ids=model.segment_ids, + created_at=model.created_at, + db_id=model.id, + ) + @staticmethod + def key_point_to_domain(model: KeyPointModel) -> KeyPoint: + return KeyPoint( + text=model.text, + segment_ids=model.segment_ids, + start_time=model.start_time, + end_time=model.end_time, + db_id=model.id, + ) + @staticmethod + def action_item_to_domain(model: ActionItemModel) -> ActionItem: + return ActionItem( + text=model.text, + assignee=model.assignee, + due_date=model.due_date, + priority=model.priority, + segment_ids=model.segment_ids, + db_id=model.id, + ) + @staticmethod + def summary_to_domain(model: SummaryModel, meeting_id: MeetingId) -> Summary: + return Summary( + meeting_id=meeting_id, + executive_summary=model.executive_summary or "", + key_points=[OrmConverter.key_point_to_domain(kp) for kp in model.key_points], + action_items=[OrmConverter.action_item_to_domain(ai) for ai in model.action_items], + generated_at=model.generated_at, + model_version=model.model_version or "", + db_id=model.id, + ) +```` +## File: src/noteflow/infrastructure/export/html.py +````python +from __future__ import annotations +import html from datetime import datetime from typing import TYPE_CHECKING - from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp - if TYPE_CHECKING: from collections.abc import Sequence - from noteflow.domain.entities.meeting import Meeting from noteflow.domain.entities.segment import Segment - - -class MarkdownExporter: - """Export meeting transcripts to Markdown format. - - Produces clean, readable Markdown with meeting metadata header, - transcript sections with timestamps, and optional summary section. - """ - +def _escape(text: str) -> str: + return html.escape(text) +_HTML_TEMPLATE = """ + + + + + {title} + + + +{content} + +""" +class HtmlExporter: @property def format_name(self) -> str: - """Human-readable format name.""" - return "Markdown" - + return "HTML" @property def file_extension(self) -> str: - """File extension for Markdown.""" - return ".md" - + return ".html" def export( self, meeting: Meeting, segments: Sequence[Segment], ) -> str: - """Export meeting transcript to Markdown. - - Args: - meeting: Meeting entity with metadata. - segments: Ordered list of transcript segments. - - Returns: - Markdown-formatted transcript string. - """ - lines: list[str] = [ - f"# {meeting.title}", - "", - "## Meeting Info", - "", - f"- **Date:** {format_datetime(meeting.created_at)}", + content_parts: list[str] = [ + f"

{_escape(meeting.title)}

", + '", + "

Transcript

", + '
', + ) + ) for segment in segments: timestamp = format_timestamp(segment.start_time) - lines.extend((f"**[{timestamp}]** {segment.text}", "")) - # Summary section (if available) + content_parts.append('
') + content_parts.append(f'[{timestamp}]') + content_parts.extend((f"{_escape(segment.text)}", "
")) + content_parts.append("
") if meeting.summary: - lines.extend(("## Summary", "")) + content_parts.extend(('
', "

Summary

")) if meeting.summary.executive_summary: - lines.extend((meeting.summary.executive_summary, "")) + content_parts.append(f"

{_escape(meeting.summary.executive_summary)}

") if meeting.summary.key_points: - lines.extend(("### Key Points", "")) - lines.extend(f"- {point.text}" for point in meeting.summary.key_points) - lines.append("") - + content_parts.extend(("

Key Points

", '
    ')) + content_parts.extend( + f"
  • {_escape(point.text)}
  • " for point in meeting.summary.key_points + ) + content_parts.append("
") if meeting.summary.action_items: - lines.extend(("### Action Items", "")) + content_parts.extend(("

Action Items

", '
    ')) for item in meeting.summary.action_items: - assignee = f" (@{item.assignee})" if item.assignee else "" - lines.append(f"- [ ] {item.text}{assignee}") - lines.append("") - - # Footer - lines.append("---") - lines.append(f"*Exported from NoteFlow on {format_datetime(datetime.now())}*") - - return "\n".join(lines) + assignee = ( + f' @{_escape(item.assignee)}' + if item.assignee + else "" + ) + content_parts.append(f"
  • {_escape(item.text)}{assignee}
  • ") + content_parts.append("
") + content_parts.append("
") + content_parts.append("
") + content_parts.extend( + ( + f"Exported from NoteFlow on {_escape(format_datetime(datetime.now()))}", + "
", + ) + ) + content = "\n".join(content_parts) + return _HTML_TEMPLATE.format(title=_escape(meeting.title), content=content) ```` -## File: src/noteflow/infrastructure/persistence/migrations/env.py +## File: src/noteflow/infrastructure/persistence/repositories/meeting_repo.py ````python -"""Alembic migration environment configuration.""" - from __future__ import annotations - -import asyncio -import os -from logging.config import fileConfig - -from alembic import context -from sqlalchemy import pool -from sqlalchemy.engine import Connection -from sqlalchemy.ext.asyncio import async_engine_from_config - -from noteflow.infrastructure.persistence.models import Base - -# this is the Alembic Config object, which provides -# access to the values within the .ini file in use. -config = context.config - -# Interpret the config file for Python logging. -# This line sets up loggers basically. -if config.config_file_name is not None: - fileConfig(config.config_file_name) - -# Import all models to ensure they're registered with Base.metadata -target_metadata = Base.metadata - -if database_url := os.environ.get("NOTEFLOW_DATABASE_URL"): - # Convert postgres:// to postgresql+asyncpg:// - if database_url.startswith("postgres://"): - database_url = database_url.replace("postgres://", "postgresql+asyncpg://", 1) - elif database_url.startswith("postgresql://"): - database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1) - config.set_main_option("sqlalchemy.url", database_url) - - -def include_object( - obj: object, - name: str | None, - type_: str, - reflected: bool, - compare_to: object | None, -) -> bool: - """Filter objects for autogenerate.""" - # Only include objects in the noteflow schema - if type_ == "table": - schema = getattr(obj, "schema", None) - return schema == "noteflow" - return True - - -def run_migrations_offline() -> None: - """Run migrations in 'offline' mode. - - This configures the context with just a URL - and not an Engine, though an Engine is acceptable - here as well. By skipping the Engine creation - we don't even need a DBAPI to be available. - - Calls to context.execute() here emit the given string to the - script output. - """ - url = config.get_main_option("sqlalchemy.url") - context.configure( - url=url, - target_metadata=target_metadata, - literal_binds=True, - dialect_opts={"paramstyle": "named"}, - include_schemas=True, - include_object=include_object, - version_table_schema="noteflow", - ) - - with context.begin_transaction(): - context.run_migrations() - - -def do_run_migrations(connection: Connection) -> None: - """Execute migrations with the provided connection.""" - context.configure( - connection=connection, - target_metadata=target_metadata, - include_schemas=True, - include_object=include_object, - version_table_schema="noteflow", - ) - - with context.begin_transaction(): - context.run_migrations() - - -async def run_async_migrations() -> None: - """Run migrations in async mode. - - Create an Engine and associate a connection with the context. - """ - connectable = async_engine_from_config( - config.get_section(config.config_ini_section, {}), - prefix="sqlalchemy.", - poolclass=pool.NullPool, - ) - - async with connectable.connect() as connection: - await connection.run_sync(do_run_migrations) - - await connectable.dispose() - - -def run_migrations_online() -> None: - """Run migrations in 'online' mode.""" - asyncio.run(run_async_migrations()) - - -if context.is_offline_mode(): - run_migrations_offline() -else: - run_migrations_online() -```` - -## File: src/noteflow/infrastructure/persistence/repositories/annotation_repo.py -````python -"""SQLAlchemy implementation of AnnotationRepository.""" - -from __future__ import annotations - from collections.abc import Sequence -from typing import TYPE_CHECKING +from datetime import datetime from uuid import UUID - -from sqlalchemy import and_, delete, or_, select - -from noteflow.domain.entities import Annotation -from noteflow.domain.value_objects import AnnotationId +from sqlalchemy import func, select +from noteflow.domain.entities import Meeting +from noteflow.domain.value_objects import MeetingId, MeetingState from noteflow.infrastructure.converters import OrmConverter -from noteflow.infrastructure.persistence.models import AnnotationModel +from noteflow.infrastructure.persistence.models import MeetingModel from noteflow.infrastructure.persistence.repositories._base import BaseRepository - -if TYPE_CHECKING: - from noteflow.domain.value_objects import MeetingId - - -class SqlAlchemyAnnotationRepository(BaseRepository): - """SQLAlchemy implementation of AnnotationRepository.""" - - async def add(self, annotation: Annotation) -> Annotation: - """Add an annotation to a meeting. - - Args: - annotation: Annotation to add. - - Returns: - Added annotation with db_id populated. - - Raises: - ValueError: If meeting does not exist. - """ - model = AnnotationModel( - annotation_id=UUID(str(annotation.id)), - meeting_id=UUID(str(annotation.meeting_id)), - annotation_type=annotation.annotation_type.value, - text=annotation.text, - start_time=annotation.start_time, - end_time=annotation.end_time, - segment_ids=annotation.segment_ids, - created_at=annotation.created_at, +class SqlAlchemyMeetingRepository(BaseRepository): + async def create(self, meeting: Meeting) -> Meeting: + model = MeetingModel( + id=UUID(str(meeting.id)), + title=meeting.title, + state=int(meeting.state), + created_at=meeting.created_at, + started_at=meeting.started_at, + ended_at=meeting.ended_at, + metadata_=meeting.metadata, + wrapped_dek=meeting.wrapped_dek, ) self._session.add(model) await self._session.flush() - annotation.db_id = model.id - return annotation - - async def get(self, annotation_id: AnnotationId) -> Annotation | None: - """Retrieve an annotation by ID. - - Args: - annotation_id: Annotation identifier. - - Returns: - Annotation if found, None otherwise. - """ - stmt = select(AnnotationModel).where( - AnnotationModel.annotation_id == UUID(str(annotation_id)) - ) + return meeting + async def get(self, meeting_id: MeetingId) -> Meeting | None: + stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting_id))) model = await self._execute_scalar(stmt) + return None if model is None else OrmConverter.meeting_to_domain(model) + async def update(self, meeting: Meeting) -> Meeting: + stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting.id))) + model = await self._execute_scalar(stmt) + if model is None: + raise ValueError(f"Meeting {meeting.id} not found") + model.title = meeting.title + model.state = int(meeting.state) + model.started_at = meeting.started_at + model.ended_at = meeting.ended_at + model.metadata_ = meeting.metadata + model.wrapped_dek = meeting.wrapped_dek + await self._session.flush() + return meeting + async def delete(self, meeting_id: MeetingId) -> bool: + stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting_id))) + model = await self._execute_scalar(stmt) + if model is None: + return False + await self._delete_and_flush(model) + return True + async def list_all( + self, + states: list[MeetingState] | None = None, + limit: int = 100, + offset: int = 0, + sort_desc: bool = True, + ) -> tuple[Sequence[Meeting], int]: + stmt = select(MeetingModel) + if states: + state_values = [int(s) for s in states] + stmt = stmt.where(MeetingModel.state.in_(state_values)) + count_stmt = select(func.count()).select_from(stmt.subquery()) + total_result = await self._session.execute(count_stmt) + total = total_result.scalar() or 0 + order_col = MeetingModel.created_at.desc() if sort_desc else MeetingModel.created_at.asc() + stmt = stmt.order_by(order_col).offset(offset).limit(limit) + result = await self._session.execute(stmt) + models = result.scalars().all() + meetings = [OrmConverter.meeting_to_domain(m) for m in models] + return meetings, total + async def count_by_state(self, state: MeetingState) -> int: + stmt = ( + select(func.count()).select_from(MeetingModel).where(MeetingModel.state == int(state)) + ) + result = await self._session.execute(stmt) + return result.scalar() or 0 + async def find_older_than(self, cutoff: datetime) -> Sequence[Meeting]: + stmt = ( + select(MeetingModel) + .where(MeetingModel.ended_at.isnot(None)) + .where(MeetingModel.ended_at < cutoff) + .order_by(MeetingModel.ended_at.asc()) + ) + result = await self._session.execute(stmt) + models = result.scalars().all() + return [OrmConverter.meeting_to_domain(m) for m in models] +```` - return None if model is None else OrmConverter.annotation_to_domain(model) - +## File: src/noteflow/infrastructure/persistence/repositories/segment_repo.py +````python +from __future__ import annotations +from collections.abc import Sequence +from uuid import UUID +from sqlalchemy import func, select +from noteflow.domain.entities import Segment +from noteflow.domain.value_objects import MeetingId +from noteflow.infrastructure.converters import OrmConverter +from noteflow.infrastructure.persistence.models import SegmentModel, WordTimingModel +from noteflow.infrastructure.persistence.repositories._base import BaseRepository +class SqlAlchemySegmentRepository(BaseRepository): + async def add(self, meeting_id: MeetingId, segment: Segment) -> Segment: + model = SegmentModel( + meeting_id=UUID(str(meeting_id)), + segment_id=segment.segment_id, + text=segment.text, + start_time=segment.start_time, + end_time=segment.end_time, + language=segment.language, + language_confidence=segment.language_confidence, + avg_logprob=segment.avg_logprob, + no_speech_prob=segment.no_speech_prob, + embedding=segment.embedding, + speaker_id=segment.speaker_id, + speaker_confidence=segment.speaker_confidence, + ) + for word in segment.words: + word_kwargs = OrmConverter.word_timing_to_orm_kwargs(word) + word_model = WordTimingModel(**word_kwargs) + model.words.append(word_model) + self._session.add(model) + await self._session.flush() + segment.db_id = model.id + segment.meeting_id = meeting_id + return segment + async def add_batch( + self, + meeting_id: MeetingId, + segments: Sequence[Segment], + ) -> Sequence[Segment]: + result_segments: list[Segment] = [] + for segment in segments: + added = await self.add(meeting_id, segment) + result_segments.append(added) + return result_segments async def get_by_meeting( self, meeting_id: MeetingId, - ) -> Sequence[Annotation]: - """Get all annotations for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - List of annotations ordered by start_time. - """ + include_words: bool = True, + ) -> Sequence[Segment]: stmt = ( - select(AnnotationModel) - .where(AnnotationModel.meeting_id == UUID(str(meeting_id))) - .order_by(AnnotationModel.start_time) + select(SegmentModel) + .where(SegmentModel.meeting_id == UUID(str(meeting_id))) + .order_by(SegmentModel.segment_id) ) models = await self._execute_scalars(stmt) - - return [OrmConverter.annotation_to_domain(model) for model in models] - - async def get_by_time_range( + return [OrmConverter.segment_to_domain(m, include_words) for m in models] + async def search_semantic( self, - meeting_id: MeetingId, - start_time: float, - end_time: float, - ) -> Sequence[Annotation]: - """Get annotations within a time range. - - Args: - meeting_id: Meeting identifier. - start_time: Start of time range in seconds. - end_time: End of time range in seconds. - - Returns: - List of annotations overlapping the time range. - """ - # Find annotations that overlap with the given time range - stmt = ( - select(AnnotationModel) - .where( - and_( - AnnotationModel.meeting_id == UUID(str(meeting_id)), - or_( - # Annotation starts within range - and_( - AnnotationModel.start_time >= start_time, - AnnotationModel.start_time <= end_time, - ), - # Annotation ends within range - and_( - AnnotationModel.end_time >= start_time, - AnnotationModel.end_time <= end_time, - ), - # Annotation spans entire range - and_( - AnnotationModel.start_time <= start_time, - AnnotationModel.end_time >= end_time, - ), - ), - ) - ) - .order_by(AnnotationModel.start_time) + query_embedding: list[float], + limit: int = 10, + meeting_id: MeetingId | None = None, + ) -> Sequence[tuple[Segment, float]]: + similarity = SegmentModel.embedding.cosine_distance(query_embedding) + stmt = select(SegmentModel, similarity.label("distance")).where( + SegmentModel.embedding.is_not(None) ) - models = await self._execute_scalars(stmt) - - return [OrmConverter.annotation_to_domain(model) for model in models] - - async def update(self, annotation: Annotation) -> Annotation: - """Update an existing annotation. - - Args: - annotation: Annotation with updated fields. - - Returns: - Updated annotation. - - Raises: - ValueError: If annotation does not exist. - """ - stmt = select(AnnotationModel).where( - AnnotationModel.annotation_id == UUID(str(annotation.id)) - ) - model = await self._execute_scalar(stmt) - - if model is None: - raise ValueError(f"Annotation {annotation.id} not found") - - model.annotation_type = annotation.annotation_type.value - model.text = annotation.text - model.start_time = annotation.start_time - model.end_time = annotation.end_time - model.segment_ids = annotation.segment_ids - - await self._session.flush() - return annotation - - async def delete(self, annotation_id: AnnotationId) -> bool: - """Delete an annotation. - - Args: - annotation_id: Annotation identifier. - - Returns: - True if deleted, False if not found. - """ - stmt = select(AnnotationModel).where( - AnnotationModel.annotation_id == UUID(str(annotation_id)) - ) - model = await self._execute_scalar(stmt) - - if model is None: - return False - - await self._session.execute(delete(AnnotationModel).where(AnnotationModel.id == model.id)) - await self._session.flush() - return True -```` - -## File: src/noteflow/infrastructure/persistence/repositories/summary_repo.py -````python -"""SQLAlchemy implementation of SummaryRepository.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING -from uuid import UUID - -from sqlalchemy import delete, select - -from noteflow.domain.entities import ActionItem, KeyPoint, Summary -from noteflow.infrastructure.converters import OrmConverter -from noteflow.infrastructure.persistence.models import ( - ActionItemModel, - KeyPointModel, - SummaryModel, -) -from noteflow.infrastructure.persistence.repositories._base import BaseRepository - -if TYPE_CHECKING: - from noteflow.domain.value_objects import MeetingId - - -class SqlAlchemySummaryRepository(BaseRepository): - """SQLAlchemy implementation of SummaryRepository.""" - - async def save(self, summary: Summary) -> Summary: - """Save or update a meeting summary. - - Args: - summary: Summary to save. - - Returns: - Saved summary with db_id populated. - """ - # Check if summary exists for this meeting - stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(summary.meeting_id))) + if meeting_id: + stmt = stmt.where(SegmentModel.meeting_id == UUID(str(meeting_id))) + stmt = stmt.order_by(similarity).limit(limit) result = await self._session.execute(stmt) - if existing := result.scalar_one_or_none(): - # Update existing summary - existing.executive_summary = summary.executive_summary - if summary.generated_at is not None: - existing.generated_at = summary.generated_at - existing.model_version = summary.model_version - - # Delete old key points and action items - await self._session.execute( - delete(KeyPointModel).where(KeyPointModel.summary_id == existing.id) - ) - await self._session.execute( - delete(ActionItemModel).where(ActionItemModel.summary_id == existing.id) - ) - - # Add new key points - kp_models: list[tuple[KeyPointModel, KeyPoint]] = [] - for kp in summary.key_points: - kp_model = KeyPointModel( - summary_id=existing.id, - text=kp.text, - start_time=kp.start_time, - end_time=kp.end_time, - segment_ids=kp.segment_ids, - ) - self._session.add(kp_model) - kp_models.append((kp_model, kp)) - - # Add new action items - ai_models: list[tuple[ActionItemModel, ActionItem]] = [] - for ai in summary.action_items: - ai_model = ActionItemModel( - summary_id=existing.id, - text=ai.text, - assignee=ai.assignee, - due_date=ai.due_date, - priority=ai.priority, - segment_ids=ai.segment_ids, - ) - self._session.add(ai_model) - ai_models.append((ai_model, ai)) - - await self._session.flush() - for kp_model, kp in kp_models: - kp.db_id = kp_model.id - for ai_model, ai in ai_models: - ai.db_id = ai_model.id - summary.db_id = existing.id - else: - # Create new summary - model = SummaryModel( - meeting_id=UUID(str(summary.meeting_id)), - executive_summary=summary.executive_summary, - generated_at=summary.generated_at, - model_version=summary.model_version, - ) - self._session.add(model) - await self._session.flush() - - # Add key points - for kp in summary.key_points: - kp_model = KeyPointModel( - summary_id=model.id, - text=kp.text, - start_time=kp.start_time, - end_time=kp.end_time, - segment_ids=kp.segment_ids, - ) - self._session.add(kp_model) - await self._session.flush() - kp.db_id = kp_model.id - - # Add action items - for ai in summary.action_items: - ai_model = ActionItemModel( - summary_id=model.id, - text=ai.text, - assignee=ai.assignee, - due_date=ai.due_date, - priority=ai.priority, - segment_ids=ai.segment_ids, - ) - self._session.add(ai_model) - await self._session.flush() - ai.db_id = ai_model.id - - summary.db_id = model.id - - return summary - - async def get_by_meeting(self, meeting_id: MeetingId) -> Summary | None: - """Get summary for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - Summary if exists, None otherwise. - """ - stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(meeting_id))) - model = await self._execute_scalar(stmt) - - return None if model is None else OrmConverter.summary_to_domain(model, meeting_id) - - async def delete_by_meeting(self, meeting_id: MeetingId) -> bool: - """Delete summary for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - True if deleted, False if not found. - """ - stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(meeting_id))) - model = await self._execute_scalar(stmt) - - if model is None: - return False - - await self._delete_and_flush(model) - return True -```` - -## File: src/noteflow/infrastructure/persistence/models.py -````python -"""SQLAlchemy ORM models for NoteFlow.""" - -from __future__ import annotations - -from datetime import datetime -from typing import ClassVar -from uuid import uuid4 - -from pgvector.sqlalchemy import Vector -from sqlalchemy import ( - DateTime, - Float, - ForeignKey, - Integer, - LargeBinary, - String, - Text, -) -from sqlalchemy.dialects.postgresql import JSONB, UUID -from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship - -# Vector dimension for embeddings (OpenAI compatible) -EMBEDDING_DIM = 1536 - - -class Base(DeclarativeBase): - """Base class for all ORM models.""" - - pass - - -class MeetingModel(Base): - """SQLAlchemy model for meetings table.""" - - __tablename__ = "meetings" - __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} - - id: Mapped[UUID] = mapped_column( - UUID(as_uuid=True), - primary_key=True, - default=uuid4, - ) - title: Mapped[str] = mapped_column(String(255), nullable=False) - state: Mapped[int] = mapped_column(Integer, nullable=False, default=1) - created_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), - nullable=False, - default=datetime.now, - ) - started_at: Mapped[datetime | None] = mapped_column( - DateTime(timezone=True), - nullable=True, - ) - ended_at: Mapped[datetime | None] = mapped_column( - DateTime(timezone=True), - nullable=True, - ) - metadata_: Mapped[dict[str, str]] = mapped_column( - "metadata", - JSONB, - nullable=False, - default=dict, - ) - wrapped_dek: Mapped[bytes | None] = mapped_column( - LargeBinary, - nullable=True, - ) - - # Relationships - segments: Mapped[list[SegmentModel]] = relationship( - "SegmentModel", - back_populates="meeting", - cascade="all, delete-orphan", - lazy="selectin", - ) - summary: Mapped[SummaryModel | None] = relationship( - "SummaryModel", - back_populates="meeting", - cascade="all, delete-orphan", - uselist=False, - lazy="selectin", - ) - annotations: Mapped[list[AnnotationModel]] = relationship( - "AnnotationModel", - back_populates="meeting", - cascade="all, delete-orphan", - lazy="selectin", - ) - - -class SegmentModel(Base): - """SQLAlchemy model for segments table.""" - - __tablename__ = "segments" - __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} - - id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - meeting_id: Mapped[UUID] = mapped_column( - UUID(as_uuid=True), - ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), - nullable=False, - ) - segment_id: Mapped[int] = mapped_column(Integer, nullable=False) - text: Mapped[str] = mapped_column(Text, nullable=False) - start_time: Mapped[float] = mapped_column(Float, nullable=False) - end_time: Mapped[float] = mapped_column(Float, nullable=False) - language: Mapped[str] = mapped_column(String(10), nullable=False, default="en") - language_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) - avg_logprob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) - no_speech_prob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) - embedding: Mapped[list[float] | None] = mapped_column( - Vector(EMBEDDING_DIM), - nullable=True, - ) - speaker_id: Mapped[str | None] = mapped_column(String(50), nullable=True) - speaker_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) - created_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), - nullable=False, - default=datetime.now, - ) - - # Relationships - meeting: Mapped[MeetingModel] = relationship( - "MeetingModel", - back_populates="segments", - ) - words: Mapped[list[WordTimingModel]] = relationship( - "WordTimingModel", - back_populates="segment", - cascade="all, delete-orphan", - lazy="selectin", - ) - - -class WordTimingModel(Base): - """SQLAlchemy model for word_timings table.""" - - __tablename__ = "word_timings" - __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} - - id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - segment_pk: Mapped[int] = mapped_column( - Integer, - ForeignKey("noteflow.segments.id", ondelete="CASCADE"), - nullable=False, - ) - word: Mapped[str] = mapped_column(String(255), nullable=False) - start_time: Mapped[float] = mapped_column(Float, nullable=False) - end_time: Mapped[float] = mapped_column(Float, nullable=False) - probability: Mapped[float] = mapped_column(Float, nullable=False) - - # Relationships - segment: Mapped[SegmentModel] = relationship( - "SegmentModel", - back_populates="words", - ) - - -class SummaryModel(Base): - """SQLAlchemy model for summaries table.""" - - __tablename__ = "summaries" - __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} - - id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - meeting_id: Mapped[UUID] = mapped_column( - UUID(as_uuid=True), - ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), - nullable=False, - unique=True, - ) - executive_summary: Mapped[str | None] = mapped_column(Text, nullable=True) - generated_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), - nullable=False, - default=datetime.now, - ) - model_version: Mapped[str | None] = mapped_column(String(50), nullable=True) - - # Relationships - meeting: Mapped[MeetingModel] = relationship( - "MeetingModel", - back_populates="summary", - ) - key_points: Mapped[list[KeyPointModel]] = relationship( - "KeyPointModel", - back_populates="summary", - cascade="all, delete-orphan", - lazy="selectin", - ) - action_items: Mapped[list[ActionItemModel]] = relationship( - "ActionItemModel", - back_populates="summary", - cascade="all, delete-orphan", - lazy="selectin", - ) - - -class KeyPointModel(Base): - """SQLAlchemy model for key_points table.""" - - __tablename__ = "key_points" - __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} - - id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - summary_id: Mapped[int] = mapped_column( - Integer, - ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), - nullable=False, - ) - text: Mapped[str] = mapped_column(Text, nullable=False) - start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) - end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) - segment_ids: Mapped[list[int]] = mapped_column( - JSONB, - nullable=False, - default=list, - ) - - # Relationships - summary: Mapped[SummaryModel] = relationship( - "SummaryModel", - back_populates="key_points", - ) - - -class ActionItemModel(Base): - """SQLAlchemy model for action_items table.""" - - __tablename__ = "action_items" - __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} - - id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - summary_id: Mapped[int] = mapped_column( - Integer, - ForeignKey("noteflow.summaries.id", ondelete="CASCADE"), - nullable=False, - ) - text: Mapped[str] = mapped_column(Text, nullable=False) - assignee: Mapped[str] = mapped_column(String(255), nullable=False, default="") - due_date: Mapped[datetime | None] = mapped_column( - DateTime(timezone=True), - nullable=True, - ) - priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0) - segment_ids: Mapped[list[int]] = mapped_column( - JSONB, - nullable=False, - default=list, - ) - - # Relationships - summary: Mapped[SummaryModel] = relationship( - "SummaryModel", - back_populates="action_items", - ) - - -class AnnotationModel(Base): - """SQLAlchemy model for annotations table. - - User-created annotations during recording. Distinct from LLM-extracted - ActionItem/KeyPoint which belong to Summary. Annotations belong directly - to Meeting and are created in real-time. - """ - - __tablename__ = "annotations" - __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} - - id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) - annotation_id: Mapped[UUID] = mapped_column( - UUID(as_uuid=True), - nullable=False, - unique=True, - default=uuid4, - ) - meeting_id: Mapped[UUID] = mapped_column( - UUID(as_uuid=True), - ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), - nullable=False, - ) - annotation_type: Mapped[str] = mapped_column(String(50), nullable=False) - text: Mapped[str] = mapped_column(Text, nullable=False) - start_time: Mapped[float] = mapped_column(Float, nullable=False) - end_time: Mapped[float] = mapped_column(Float, nullable=False) - segment_ids: Mapped[list[int]] = mapped_column( - JSONB, - nullable=False, - default=list, - ) - created_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), - nullable=False, - default=datetime.now, - ) - - # Relationships - meeting: Mapped[MeetingModel] = relationship( - "MeetingModel", - back_populates="annotations", - ) -```` - -## File: src/noteflow/infrastructure/summarization/__init__.py -````python -"""Summarization infrastructure module. - -Provides summarization provider implementations and citation verification. -""" - -from noteflow.infrastructure.summarization.citation_verifier import ( - SegmentCitationVerifier, -) -from noteflow.infrastructure.summarization.cloud_provider import ( - CloudBackend, - CloudSummarizer, -) -from noteflow.infrastructure.summarization.factory import create_summarization_service -from noteflow.infrastructure.summarization.mock_provider import MockSummarizer -from noteflow.infrastructure.summarization.ollama_provider import OllamaSummarizer - -__all__ = [ - "CloudBackend", - "CloudSummarizer", - "MockSummarizer", - "OllamaSummarizer", - "SegmentCitationVerifier", - "create_summarization_service", -] -```` - -## File: src/noteflow/infrastructure/summarization/_parsing.py -````python -"""Shared parsing utilities for summarization providers.""" - -from __future__ import annotations - -import json -from datetime import UTC, datetime -from typing import TYPE_CHECKING - -from noteflow.domain.entities import ActionItem, KeyPoint, Summary -from noteflow.domain.summarization import InvalidResponseError - -if TYPE_CHECKING: - from noteflow.domain.summarization import SummarizationRequest - - -# System prompt for structured summarization -SYSTEM_PROMPT = """You are a meeting summarization assistant. Analyze the transcript and produce structured output. - -OUTPUT FORMAT (JSON): -{ - "executive_summary": "2-3 sentence high-level overview", - "key_points": [ - {"text": "Key insight or decision", "segment_ids": [0, 1]} - ], - "action_items": [ - {"text": "Action to take", "assignee": "Person name or empty string", "priority": 0, "segment_ids": [2]} - ] -} - -RULES: -1. Each key_point and action_item MUST have at least one segment_id referencing the source -2. segment_ids are integers matching the [N] markers in the transcript -3. priority: 0=unspecified, 1=low, 2=medium, 3=high -4. Only extract action items that clearly indicate tasks to be done -5. Output ONLY valid JSON, no markdown or explanation""" - - -def build_transcript_prompt(request: SummarizationRequest) -> str: - """Build transcript prompt with segment markers. - - Args: - request: Summarization request with segments. - - Returns: - Formatted prompt string with transcript and constraints. - """ - lines = [f"[{seg.segment_id}] {seg.text}" for seg in request.segments] - constraints = "" - if request.segments: - valid_ids = ", ".join(str(seg.segment_id) for seg in request.segments) - constraints = ( - "\n\nCONSTRAINTS:\n" - f"- Maximum {request.max_key_points} key points\n" - f"- Maximum {request.max_action_items} action items\n" - f"- Valid segment_ids: {valid_ids}" - ) - - return f"TRANSCRIPT:\n{chr(10).join(lines)}{constraints}" - - -def parse_llm_response(response_text: str, request: SummarizationRequest) -> Summary: - """Parse JSON response into Summary entity. - - Args: - response_text: Raw JSON response from LLM. - request: Original request for validation context. - - Returns: - Summary entity with parsed data. - - Raises: - InvalidResponseError: If JSON is malformed. - """ - # Strip markdown code fences if present - text = response_text.strip() - if text.startswith("```"): - lines = text.split("\n") - if lines[0].startswith("```"): - lines = lines[1:] - if lines and lines[-1].strip() == "```": - lines = lines[:-1] - text = "\n".join(lines) - - try: - data = json.loads(text) - except json.JSONDecodeError as e: - raise InvalidResponseError(f"Invalid JSON response: {e}") from e - - valid_ids = {seg.segment_id for seg in request.segments} - - # Parse key points - key_points: list[KeyPoint] = [] - for kp_data in data.get("key_points", [])[: request.max_key_points]: - seg_ids = [sid for sid in kp_data.get("segment_ids", []) if sid in valid_ids] - start_time = 0.0 - end_time = 0.0 - if seg_ids and (refs := [s for s in request.segments if s.segment_id in seg_ids]): - start_time = min(s.start_time for s in refs) - end_time = max(s.end_time for s in refs) - key_points.append( - KeyPoint( - text=str(kp_data.get("text", "")), - segment_ids=seg_ids, - start_time=start_time, - end_time=end_time, - ) - ) - - # Parse action items - action_items: list[ActionItem] = [] - for ai_data in data.get("action_items", [])[: request.max_action_items]: - seg_ids = [sid for sid in ai_data.get("segment_ids", []) if sid in valid_ids] - priority = ai_data.get("priority", 0) - if not isinstance(priority, int) or priority not in range(4): - priority = 0 - action_items.append( - ActionItem( - text=str(ai_data.get("text", "")), - assignee=str(ai_data.get("assignee", "")), - priority=priority, - segment_ids=seg_ids, - ) - ) - - return Summary( - meeting_id=request.meeting_id, - executive_summary=str(data.get("executive_summary", "")), - key_points=key_points, - action_items=action_items, - generated_at=datetime.now(UTC), - ) -```` - -## File: src/noteflow/infrastructure/summarization/cloud_provider.py -````python -"""Cloud summarization provider for OpenAI/Anthropic APIs.""" - -from __future__ import annotations - -import asyncio -import os -import time -from datetime import UTC, datetime -from enum import Enum -from typing import TYPE_CHECKING, cast - -from noteflow.domain.entities import Summary -from noteflow.domain.summarization import ( - InvalidResponseError, - ProviderUnavailableError, - SummarizationRequest, - SummarizationResult, - SummarizationTimeoutError, -) -from noteflow.infrastructure.summarization._parsing import ( - SYSTEM_PROMPT, - build_transcript_prompt, - parse_llm_response, -) - -if TYPE_CHECKING: - import anthropic - import openai - - -class CloudBackend(Enum): - """Supported cloud LLM backends.""" - - OPENAI = "openai" - ANTHROPIC = "anthropic" - - -class CloudSummarizer: - """Cloud-based LLM summarizer using OpenAI or Anthropic. - - Requires explicit user consent as data is sent to external services. - """ - - def __init__( + rows = result.all() + results: list[tuple[Segment, float]] = [] + for row in rows: + model = row[0] + distance = row[1] + similarity_score = 1.0 - float(distance) + segment = OrmConverter.segment_to_domain(model, include_words=False) + results.append((segment, similarity_score)) + return results + async def update_embedding( self, - backend: CloudBackend = CloudBackend.OPENAI, - api_key: str | None = None, - model: str | None = None, - timeout_seconds: float = 60.0, - base_url: str | None = None, + segment_db_id: int, + embedding: list[float], ) -> None: - """Initialize cloud summarizer. - - Args: - backend: Cloud provider backend (OpenAI or Anthropic). - api_key: API key (defaults to env var if not provided). - model: Model name (defaults per backend if not provided). - timeout_seconds: Request timeout in seconds. - base_url: Optional base URL (OpenAI only; defaults to OpenAI API). - """ - self._backend = backend - self._api_key = api_key - self._timeout = timeout_seconds - self._client: openai.OpenAI | anthropic.Anthropic | None = None - # Only used for OpenAI - self._openai_base_url = ( - base_url - if base_url is not None - else os.environ.get("OPENAI_BASE_URL") - if backend == CloudBackend.OPENAI - else None + stmt = select(SegmentModel).where(SegmentModel.id == segment_db_id) + result = await self._session.execute(stmt) + if model := result.scalar_one_or_none(): + model.embedding = embedding + await self._session.flush() + async def update_speaker( + self, + segment_db_id: int, + speaker_id: str | None, + speaker_confidence: float, + ) -> None: + stmt = select(SegmentModel).where(SegmentModel.id == segment_db_id) + result = await self._session.execute(stmt) + if model := result.scalar_one_or_none(): + model.speaker_id = speaker_id + model.speaker_confidence = speaker_confidence + await self._session.flush() + async def get_next_segment_id(self, meeting_id: MeetingId) -> int: + stmt = select(func.max(SegmentModel.segment_id)).where( + SegmentModel.meeting_id == UUID(str(meeting_id)) ) - - # Set default models per backend - if model is None: - self._model = ( - "gpt-4o-mini" if backend == CloudBackend.OPENAI else "claude-3-haiku-20240307" - ) - else: - self._model = model - - def _get_openai_client(self) -> openai.OpenAI: - """Get or create OpenAI client.""" - if self._client is None: - try: - import openai - - self._client = openai.OpenAI( - api_key=self._api_key, - timeout=self._timeout, - base_url=self._openai_base_url, - ) - except ImportError as e: - raise ProviderUnavailableError( - "openai package not installed. Install with: pip install openai" - ) from e - return cast(openai.OpenAI, self._client) - - def _get_anthropic_client(self) -> anthropic.Anthropic: - """Get or create Anthropic client.""" - if self._client is None: - try: - import anthropic - - self._client = anthropic.Anthropic(api_key=self._api_key, timeout=self._timeout) - except ImportError as e: - raise ProviderUnavailableError( - "anthropic package not installed. Install with: pip install anthropic" - ) from e - return cast(anthropic.Anthropic, self._client) - - @property - def provider_name(self) -> str: - """Provider identifier.""" - return self._backend.value - - @property - def is_available(self) -> bool: - """Check if cloud provider is configured with an API key.""" - import os - - if self._api_key: - return True - - # Check environment variables - if self._backend == CloudBackend.OPENAI: - return bool(os.environ.get("OPENAI_API_KEY")) - return bool(os.environ.get("ANTHROPIC_API_KEY")) - - @property - def requires_cloud_consent(self) -> bool: - """Cloud providers require explicit user consent.""" - return True - - async def summarize(self, request: SummarizationRequest) -> SummarizationResult: - """Generate evidence-linked summary using cloud LLM. - - Args: - request: Summarization request with segments. - - Returns: - SummarizationResult with generated summary. - - Raises: - ProviderUnavailableError: If provider not configured. - SummarizationTimeoutError: If request times out. - InvalidResponseError: If response cannot be parsed. - """ - start = time.monotonic() - - # Handle empty segments - if not request.segments: - return SummarizationResult( - summary=Summary( - meeting_id=request.meeting_id, - executive_summary="No transcript segments to summarize.", - key_points=[], - action_items=[], - generated_at=datetime.now(UTC), - model_version=self._model, - ), - model_name=self._model, - provider_name=self.provider_name, - tokens_used=None, - latency_ms=0.0, - ) - - user_prompt = build_transcript_prompt(request) - - if self._backend == CloudBackend.OPENAI: - content, tokens_used = await asyncio.to_thread(self._call_openai, user_prompt) - else: - content, tokens_used = await asyncio.to_thread(self._call_anthropic, user_prompt) - - # Parse into Summary - summary = parse_llm_response(content, request) - summary = Summary( - meeting_id=summary.meeting_id, - executive_summary=summary.executive_summary, - key_points=summary.key_points, - action_items=summary.action_items, - generated_at=summary.generated_at, - model_version=self._model, - ) - - elapsed_ms = (time.monotonic() - start) * 1000 - - return SummarizationResult( - summary=summary, - model_name=self._model, - provider_name=self.provider_name, - tokens_used=tokens_used, - latency_ms=elapsed_ms, - ) - - def _call_openai(self, user_prompt: str) -> tuple[str, int | None]: - """Call OpenAI API and return (content, tokens_used).""" - try: - client = self._get_openai_client() - except ProviderUnavailableError: - raise - - try: - response = client.chat.completions.create( - model=self._model, - messages=[ - {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": user_prompt}, - ], - temperature=0.3, - response_format={"type": "json_object"}, - ) - except TimeoutError as e: - raise SummarizationTimeoutError(f"OpenAI request timed out: {e}") from e - except Exception as e: - err_str = str(e).lower() - if "api key" in err_str or "authentication" in err_str: - raise ProviderUnavailableError(f"OpenAI authentication failed: {e}") from e - if "rate limit" in err_str: - raise SummarizationTimeoutError(f"OpenAI rate limited: {e}") from e - raise InvalidResponseError(f"OpenAI error: {e}") from e - - content = response.choices[0].message.content or "" - if not content: - raise InvalidResponseError("Empty response from OpenAI") - - tokens_used = response.usage.total_tokens if response.usage else None - return content, tokens_used - - def _call_anthropic(self, user_prompt: str) -> tuple[str, int | None]: - """Call Anthropic API and return (content, tokens_used).""" - try: - client = self._get_anthropic_client() - except ProviderUnavailableError: - raise - - try: - response = client.messages.create( - model=self._model, - max_tokens=4096, - system=SYSTEM_PROMPT, - messages=[{"role": "user", "content": user_prompt}], - ) - except TimeoutError as e: - raise SummarizationTimeoutError(f"Anthropic request timed out: {e}") from e - except Exception as e: - err_str = str(e).lower() - if "api key" in err_str or "authentication" in err_str: - raise ProviderUnavailableError(f"Anthropic authentication failed: {e}") from e - if "rate limit" in err_str: - raise SummarizationTimeoutError(f"Anthropic rate limited: {e}") from e - raise InvalidResponseError(f"Anthropic error: {e}") from e - - content = "".join(block.text for block in response.content if hasattr(block, "text")) - if not content: - raise InvalidResponseError("Empty response from Anthropic") - - tokens_used = None - if hasattr(response, "usage"): - tokens_used = response.usage.input_tokens + response.usage.output_tokens - - return content, tokens_used + result = await self._session.execute(stmt) + max_segment_id = result.scalar_one_or_none() + return 0 if max_segment_id is None else int(max_segment_id) + 1 ```` -## File: src/noteflow/infrastructure/summarization/mock_provider.py +## File: src/noteflow/infrastructure/persistence/unit_of_work.py ````python -"""Mock summarization provider for testing.""" - from __future__ import annotations - -import time -from datetime import UTC, datetime - -from noteflow.domain.entities import ActionItem, KeyPoint, Summary -from noteflow.domain.summarization import ( - SummarizationRequest, - SummarizationResult, +from collections.abc import Callable +from typing import Self +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker +from noteflow.config.settings import Settings +from noteflow.infrastructure.persistence.database import ( + create_async_engine, + get_async_session_factory, ) - - -class MockSummarizer: - """Deterministic mock summarizer for testing. - - Generates predictable summaries based on input segments without - requiring an actual LLM. Useful for unit tests and development. - """ - - def __init__(self, latency_ms: float = 10.0) -> None: - """Initialize mock summarizer. - - Args: - latency_ms: Simulated latency in milliseconds. - """ - self._latency_ms = latency_ms - +from .repositories import ( + SqlAlchemyAnnotationRepository, + SqlAlchemyMeetingRepository, + SqlAlchemySegmentRepository, + SqlAlchemySummaryRepository, +) +class SqlAlchemyUnitOfWork: + def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None: + self._session_factory = session_factory + self._session: AsyncSession | None = None + self._annotations_repo: SqlAlchemyAnnotationRepository | None = None + self._meetings_repo: SqlAlchemyMeetingRepository | None = None + self._segments_repo: SqlAlchemySegmentRepository | None = None + self._summaries_repo: SqlAlchemySummaryRepository | None = None + @classmethod + def from_settings(cls, settings: Settings) -> SqlAlchemyUnitOfWork: + engine = create_async_engine(settings) + session_factory = get_async_session_factory(engine) + return cls(session_factory) + @classmethod + def factory_from_settings(cls, settings: Settings) -> Callable[[], SqlAlchemyUnitOfWork]: + engine = create_async_engine(settings) + session_factory = get_async_session_factory(engine) + def _factory() -> SqlAlchemyUnitOfWork: + return cls(session_factory) + return _factory @property - def provider_name(self) -> str: - """Provider identifier.""" - return "mock" - + def annotations(self) -> SqlAlchemyAnnotationRepository: + if self._annotations_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._annotations_repo @property - def is_available(self) -> bool: - """Mock provider is always available.""" - return True - + def meetings(self) -> SqlAlchemyMeetingRepository: + if self._meetings_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._meetings_repo @property - def requires_cloud_consent(self) -> bool: - """Mock provider does not send data externally.""" - return False - - async def summarize(self, request: SummarizationRequest) -> SummarizationResult: - """Generate deterministic mock summary. - - Creates key points and action items based on segment content, - with proper evidence linking to segment_ids. - - Args: - request: Summarization request with segments. - - Returns: - SummarizationResult with mock summary. - """ - start = time.monotonic() - - # Generate executive summary - segment_count = request.segment_count - total_duration = request.total_duration - executive_summary = ( - f"Meeting with {segment_count} segments spanning {total_duration:.1f} seconds." - ) - - # Generate key points from segments (up to max_key_points) - key_points: list[KeyPoint] = [] - for i, segment in enumerate(request.segments[: request.max_key_points]): - # Truncate text for key point - text = f"{segment.text[:100]}..." if len(segment.text) > 100 else segment.text - key_points.append( - KeyPoint( - text=f"Point {i + 1}: {text}", - segment_ids=[segment.segment_id], - start_time=segment.start_time, - end_time=segment.end_time, - ) - ) - - # Generate action items from segments containing action words - action_items: list[ActionItem] = [] - action_keywords = {"todo", "action", "will", "should", "must", "need to"} - for segment in request.segments: - text_lower = segment.text.lower() - if any(kw in text_lower for kw in action_keywords): - if len(action_items) >= request.max_action_items: - break - action_items.append( - ActionItem( - text=f"Action: {segment.text[:80]}", - assignee="", # Mock doesn't extract assignees - segment_ids=[segment.segment_id], - ) - ) - - summary = Summary( - meeting_id=request.meeting_id, - executive_summary=executive_summary, - key_points=key_points, - action_items=action_items, - generated_at=datetime.now(UTC), - model_version="mock-1.0", - ) - - elapsed = (time.monotonic() - start) * 1000 + self._latency_ms - - return SummarizationResult( - summary=summary, - model_name="mock-1.0", - provider_name=self.provider_name, - tokens_used=None, - latency_ms=elapsed, - ) + def segments(self) -> SqlAlchemySegmentRepository: + if self._segments_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._segments_repo + @property + def summaries(self) -> SqlAlchemySummaryRepository: + if self._summaries_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._summaries_repo + async def __aenter__(self) -> Self: + self._session = self._session_factory() + self._annotations_repo = SqlAlchemyAnnotationRepository(self._session) + self._meetings_repo = SqlAlchemyMeetingRepository(self._session) + self._segments_repo = SqlAlchemySegmentRepository(self._session) + self._summaries_repo = SqlAlchemySummaryRepository(self._session) + return self + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: object, + ) -> None: + if self._session is None: + return + if exc_type is not None: + await self.rollback() + await self._session.close() + self._session = None + self._annotations_repo = None + self._meetings_repo = None + self._segments_repo = None + self._summaries_repo = None + async def commit(self) -> None: + if self._session is None: + raise RuntimeError("UnitOfWork not in context") + await self._session.commit() + async def rollback(self) -> None: + if self._session is None: + raise RuntimeError("UnitOfWork not in context") + await self._session.rollback() ```` ## File: src/noteflow/infrastructure/summarization/ollama_provider.py ````python -"""Ollama summarization provider for local LLM inference.""" - from __future__ import annotations - import asyncio import os import time from datetime import UTC, datetime from typing import TYPE_CHECKING - from noteflow.domain.entities import Summary from noteflow.domain.summarization import ( InvalidResponseError, @@ -17883,87 +10740,45 @@ from noteflow.infrastructure.summarization._parsing import ( build_transcript_prompt, parse_llm_response, ) - if TYPE_CHECKING: import ollama - - class OllamaSummarizer: - """Ollama-based local LLM summarizer. - - Uses a local Ollama server for privacy-preserving summarization. - No data is sent to external cloud services. - """ - def __init__( self, model: str | None = None, host: str | None = None, timeout_seconds: float = 120.0, ) -> None: - """Initialize Ollama summarizer. - - Args: - model: Ollama model name (e.g., 'llama3.2', 'mistral'). - host: Ollama server URL. - timeout_seconds: Request timeout in seconds. - """ self._model = model or os.environ.get("OLLAMA_MODEL", "llama3.2") self._host = host or os.environ.get("OLLAMA_HOST", "http://localhost:11434") self._timeout = timeout_seconds self._client: ollama.Client | None = None - def _get_client(self) -> ollama.Client: - """Lazy-load Ollama client.""" if self._client is None: try: import ollama - self._client = ollama.Client(host=self._host) except ImportError as e: raise ProviderUnavailableError( "ollama package not installed. Install with: pip install ollama" ) from e return self._client - @property def provider_name(self) -> str: - """Provider identifier.""" return "ollama" - @property def is_available(self) -> bool: - """Check if Ollama server is reachable.""" try: client = self._get_client() - # Try to list models to verify connectivity client.list() return True except (ConnectionError, TimeoutError, RuntimeError, OSError): return False - @property def requires_cloud_consent(self) -> bool: - """Ollama runs locally, no cloud consent required.""" return False - async def summarize(self, request: SummarizationRequest) -> SummarizationResult: - """Generate evidence-linked summary using Ollama. - - Args: - request: Summarization request with segments. - - Returns: - SummarizationResult with generated summary. - - Raises: - ProviderUnavailableError: If Ollama is not accessible. - SummarizationTimeoutError: If request times out. - InvalidResponseError: If response cannot be parsed. - """ start = time.monotonic() - - # Handle empty segments if not request.segments: return SummarizationResult( summary=Summary( @@ -17979,16 +10794,12 @@ class OllamaSummarizer: tokens_used=None, latency_ms=0.0, ) - try: client = self._get_client() except ProviderUnavailableError: raise - user_prompt = build_transcript_prompt(request) - try: - # Offload blocking call to a worker thread to avoid blocking the event loop response = await asyncio.to_thread( client.chat, model=self._model, @@ -18006,13 +10817,9 @@ class OllamaSummarizer: if "connection" in err_str or "refused" in err_str: raise ProviderUnavailableError(f"Cannot connect to Ollama: {e}") from e raise InvalidResponseError(f"Ollama error: {e}") from e - - # Extract response text content = response.get("message", {}).get("content", "") if not content: raise InvalidResponseError("Empty response from Ollama") - - # Parse into Summary summary = parse_llm_response(content, request) summary = Summary( meeting_id=summary.meeting_id, @@ -18022,14 +10829,10 @@ class OllamaSummarizer: generated_at=summary.generated_at, model_version=self._model, ) - elapsed_ms = (time.monotonic() - start) * 1000 - - # Extract token usage if available tokens_used = None if "eval_count" in response: tokens_used = response.get("eval_count", 0) + response.get("prompt_eval_count", 0) - return SummarizationResult( summary=summary, model_name=self._model, @@ -18039,1920 +10842,19 @@ class OllamaSummarizer: ) ```` -## File: src/noteflow/infrastructure/triggers/audio_activity.py -````python -"""Audio activity signal provider. - -Detect sustained audio activity using existing RmsLevelProvider. -""" - -from __future__ import annotations - -import threading -import time -from collections import deque -from dataclasses import dataclass -from typing import TYPE_CHECKING - -from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource - -if TYPE_CHECKING: - import numpy as np - from numpy.typing import NDArray - - from noteflow.infrastructure.audio import RmsLevelProvider - - -@dataclass -class AudioActivitySettings: - """Configuration for audio activity detection. - - Attributes: - enabled: Whether audio activity detection is enabled. - threshold_db: Minimum dB level to consider as activity (default -40 dB). - window_seconds: Time window for sustained activity detection. - min_active_ratio: Minimum ratio of active samples in window (0.0-1.0). - min_samples: Minimum samples required before evaluation. - max_history: Maximum samples retained in history. - weight: Confidence weight contributed by this provider. - """ - - enabled: bool - threshold_db: float - window_seconds: float - min_active_ratio: float - min_samples: int - max_history: int - weight: float - - def __post_init__(self) -> None: - if self.min_samples > self.max_history: - msg = "min_samples must be <= max_history" - raise ValueError(msg) - - -class AudioActivityProvider: - """Detect sustained audio activity using existing RmsLevelProvider. - - Reuses RmsLevelProvider from infrastructure/audio for dB calculation. - Tracks activity history over a sliding window and generates signals - when sustained speech activity is detected. - """ - - def __init__( - self, - level_provider: RmsLevelProvider, - settings: AudioActivitySettings, - ) -> None: - """Initialize audio activity provider. - - Args: - level_provider: Existing RmsLevelProvider instance to reuse. - settings: Configuration settings for audio activity detection. - """ - self._level_provider = level_provider - self._settings = settings - self._history: deque[tuple[float, bool]] = deque(maxlen=self._settings.max_history) - self._lock = threading.Lock() - - @property - def source(self) -> TriggerSource: - """Get the source type for this provider.""" - return TriggerSource.AUDIO_ACTIVITY - - @property - def max_weight(self) -> float: - """Get the maximum weight this provider can contribute.""" - return self._settings.weight - - def update(self, frames: NDArray[np.float32], timestamp: float) -> None: - """Update activity history with new audio frames. - - Call this from the audio capture callback to feed new samples. - - Args: - frames: Audio samples as float32 array. - timestamp: Monotonic timestamp of the audio chunk. - """ - if not self._settings.enabled: - return - - db = self._level_provider.get_db(frames) - is_active = db >= self._settings.threshold_db - with self._lock: - self._history.append((timestamp, is_active)) - - def get_signal(self) -> TriggerSignal | None: - """Get current signal if sustained activity detected. - - Returns: - TriggerSignal if activity ratio exceeds threshold, None otherwise. - """ - if not self._settings.enabled: - return None - - # Need minimum samples before we can evaluate - with self._lock: - history = list(self._history) - - if len(history) < self._settings.min_samples: - return None - - # Prune old samples outside window - now = time.monotonic() - cutoff = now - self._settings.window_seconds - recent = [(ts, active) for ts, active in history if ts >= cutoff] - - if len(recent) < self._settings.min_samples: - return None - - # Calculate activity ratio - active_count = sum(bool(active) for _, active in recent) - ratio = active_count / len(recent) - - if ratio < self._settings.min_active_ratio: - return None - - return TriggerSignal(source=self.source, weight=self.max_weight) - - def is_enabled(self) -> bool: - """Check if this provider is enabled.""" - return self._settings.enabled - - def clear_history(self) -> None: - """Clear activity history. Useful when recording starts.""" - with self._lock: - self._history.clear() -```` - -## File: src/noteflow/infrastructure/__init__.py -````python -"""NoteFlow infrastructure layer. - -Contains implementations of ports and adapters for external systems: -- asr: Speech-to-text transcription (faster-whisper) -- diarization: Speaker diarization (pyannote.audio + diart) -- persistence: Database access (SQLAlchemy + PostgreSQL) -- security: Encryption and key management (AES-GCM + OS keychain) -""" -```` - -## File: tests/application/test_export_service.py -````python -"""Tests for ExportService application service.""" - -from __future__ import annotations - -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock -from uuid import uuid4 - -import pytest - -from noteflow.application.services.export_service import ExportService -from noteflow.domain.entities import Meeting, Segment -from noteflow.domain.value_objects import MeetingId - - -def _uow_with_meeting(meeting: Meeting | None, segments: list[Segment] | None = None) -> MagicMock: - """Build a minimal async UnitOfWork double.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock(get=AsyncMock(return_value=meeting)) - uow.segments = MagicMock(get_by_meeting=AsyncMock(return_value=segments or [])) - return uow - - -@pytest.mark.asyncio -async def test_export_transcript_meeting_not_found() -> None: - """export_transcript should raise when meeting is missing.""" - meeting_id = MeetingId(uuid4()) - service = ExportService(_uow_with_meeting(meeting=None)) - - with pytest.raises(ValueError, match="not found"): - await service.export_transcript(meeting_id) - - -@pytest.mark.asyncio -async def test_export_to_file_infers_format_and_writes(tmp_path: Path) -> None: - """export_to_file infers markdown from extension and writes content.""" - meeting = Meeting.create(title="Demo") - segments = [ - Segment( - segment_id=0, - text="Hello world", - start_time=0.0, - end_time=1.0, - meeting_id=meeting.id, - ) - ] - uow = _uow_with_meeting(meeting, segments) - service = ExportService(uow) - - output = await service.export_to_file(meeting.id, tmp_path / "export.markdown") - - assert output.suffix == ".md" - assert output.exists() - content = output.read_text(encoding="utf-8") - assert "Hello world" in content - - -def test_infer_format_rejects_unknown_extension() -> None: - """_infer_format_from_extension should raise for unknown suffix.""" - service = ExportService(_uow_with_meeting(None)) - - with pytest.raises(ValueError, match="Cannot infer format"): - service._infer_format_from_extension(".txt") # type: ignore[arg-type] - - -def test_get_exporter_raises_for_unknown_format() -> None: - """_get_exporter should guard against unsupported enums.""" - service = ExportService(_uow_with_meeting(None)) - - class FakeFormat: - HTML = "html" - - with pytest.raises(ValueError, match="Unsupported"): - service._get_exporter(FakeFormat.HTML) # type: ignore[arg-type] - - -def test_get_supported_formats_returns_names_and_extensions() -> None: - """get_supported_formats should expose format metadata.""" - service = ExportService(_uow_with_meeting(None)) - - formats = {name.lower(): ext for name, ext in service.get_supported_formats()} - - assert formats["markdown"] == ".md" - assert formats["html"] == ".html" -```` - -## File: tests/application/test_retention_service.py -````python -"""Tests for RetentionService.""" - -from __future__ import annotations - -from datetime import UTC, datetime, timedelta -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock - -import pytest - -from noteflow.application.services.retention_service import RetentionReport, RetentionService -from noteflow.domain.entities import Meeting - - -def _create_meeting(ended_at: datetime | None = None) -> Meeting: - """Create a test meeting with optional ended_at.""" - meeting = Meeting.create(title="Test Meeting") - if ended_at: - meeting._ended_at = ended_at - return meeting - - -class TestRetentionServiceProperties: - """Tests for RetentionService properties.""" - - def test_is_enabled_reflects_init(self) -> None: - """is_enabled should reflect constructor parameter.""" - uow = MagicMock() - - def factory() -> MagicMock: - return uow - - enabled_service = RetentionService(factory, retention_days=30, enabled=True) - disabled_service = RetentionService(factory, retention_days=30, enabled=False) - - assert enabled_service.is_enabled is True - assert disabled_service.is_enabled is False - - def test_retention_days_property(self) -> None: - """retention_days should return configured value.""" - uow = MagicMock() - service = RetentionService(lambda: uow, retention_days=45) - - assert service.retention_days == 45 - - def test_cutoff_date_calculation(self) -> None: - """cutoff_date should be retention_days in the past.""" - uow = MagicMock() - service = RetentionService(lambda: uow, retention_days=30) - - cutoff = service.cutoff_date - expected = datetime.now(UTC) - timedelta(days=30) - - # Allow 1 second tolerance - assert abs((cutoff - expected).total_seconds()) < 1 - - -class TestRetentionServiceFindExpired: - """Tests for find_expired_meetings method.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.meetings = MagicMock() - return uow - - @pytest.mark.asyncio - async def test_find_expired_returns_meetings(self, mock_uow: MagicMock) -> None: - """find_expired_meetings should return meetings from repository.""" - old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100)) - mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting]) - - service = RetentionService(lambda: mock_uow, retention_days=30) - result = await service.find_expired_meetings() - - assert len(result) == 1 - mock_uow.meetings.find_older_than.assert_awaited_once() - - @pytest.mark.asyncio - async def test_find_expired_returns_empty_list(self, mock_uow: MagicMock) -> None: - """find_expired_meetings should return empty list when none found.""" - mock_uow.meetings.find_older_than = AsyncMock(return_value=[]) - - service = RetentionService(lambda: mock_uow, retention_days=30) - result = await service.find_expired_meetings() - - assert result == [] - - -class TestRetentionServiceRunCleanup: - """Tests for run_cleanup method.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.meetings = MagicMock() - uow.commit = AsyncMock() - return uow - - @pytest.mark.asyncio - async def test_run_cleanup_disabled_returns_empty_report(self, mock_uow: MagicMock) -> None: - """run_cleanup should return empty report when disabled.""" - service = RetentionService(lambda: mock_uow, retention_days=30, enabled=False) - - report = await service.run_cleanup() - - assert report.meetings_checked == 0 - assert report.meetings_deleted == 0 - assert report.errors == () - - @pytest.mark.asyncio - async def test_run_cleanup_dry_run_does_not_delete(self, mock_uow: MagicMock) -> None: - """run_cleanup with dry_run should not delete meetings.""" - old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100)) - mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting]) - - service = RetentionService(lambda: mock_uow, retention_days=30, enabled=False) - report = await service.run_cleanup(dry_run=True) - - # Should report meeting was checked but not deleted - assert report.meetings_checked == 1 - assert report.meetings_deleted == 0 - assert report.errors == () - - @pytest.mark.asyncio - async def test_run_cleanup_deletes_expired_meetings( - self, mock_uow: MagicMock, tmp_path: Path - ) -> None: - """run_cleanup should delete expired meetings when enabled.""" - old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100)) - mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting]) - mock_uow.meetings.get = AsyncMock(return_value=old_meeting) - mock_uow.meetings.delete = AsyncMock(return_value=True) - - service = RetentionService( - lambda: mock_uow, - retention_days=30, - meetings_dir=tmp_path, - enabled=True, - ) - report = await service.run_cleanup() - - assert report.meetings_checked == 1 - assert report.meetings_deleted == 1 - assert report.errors == () - - @pytest.mark.asyncio - async def test_run_cleanup_handles_errors_gracefully(self, mock_uow: MagicMock) -> None: - """run_cleanup should capture errors without failing.""" - old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100)) - mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting]) - mock_uow.meetings.get = AsyncMock(side_effect=RuntimeError("DB error")) - - service = RetentionService(lambda: mock_uow, retention_days=30, enabled=True) - report = await service.run_cleanup() - - assert report.meetings_checked == 1 - assert report.meetings_deleted == 0 - assert len(report.errors) == 1 - assert "DB error" in report.errors[0] - - -class TestRetentionReport: - """Tests for RetentionReport dataclass.""" - - def test_retention_report_is_immutable(self) -> None: - """RetentionReport should be frozen.""" - report = RetentionReport( - meetings_checked=5, - meetings_deleted=3, - errors=("error1",), - ) - - with pytest.raises(AttributeError): - report.meetings_checked = 10 # type: ignore[misc] - - def test_retention_report_stores_values(self) -> None: - """RetentionReport should store all values correctly.""" - report = RetentionReport( - meetings_checked=10, - meetings_deleted=8, - errors=("err1", "err2"), - ) - - assert report.meetings_checked == 10 - assert report.meetings_deleted == 8 - assert report.errors == ("err1", "err2") -```` - -## File: tests/domain/test_meeting.py -````python -"""Tests for Meeting entity.""" - -from __future__ import annotations - -from datetime import datetime, timedelta - -import pytest - -from noteflow.domain.entities.meeting import Meeting -from noteflow.domain.entities.segment import Segment -from noteflow.domain.entities.summary import Summary -from noteflow.domain.value_objects import MeetingState - - -class TestMeetingCreation: - """Tests for Meeting creation methods.""" - - def test_create_with_default_title(self) -> None: - """Test factory method generates default title.""" - meeting = Meeting.create() - assert meeting.title.startswith("Meeting ") - assert meeting.state == MeetingState.CREATED - assert meeting.started_at is None - assert meeting.ended_at is None - assert meeting.segments == [] - assert meeting.summary is None - - def test_create_with_custom_title(self) -> None: - """Test factory method accepts custom title.""" - meeting = Meeting.create(title="Team Standup") - assert meeting.title == "Team Standup" - - def test_create_with_metadata(self) -> None: - """Test factory method accepts metadata.""" - metadata = {"project": "NoteFlow", "team": "Engineering"} - meeting = Meeting.create(title="Sprint Planning", metadata=metadata) - assert meeting.metadata == metadata - - def test_from_uuid_str(self) -> None: - """Test creation from existing UUID string.""" - uuid_str = "12345678-1234-5678-1234-567812345678" - meeting = Meeting.from_uuid_str( - uuid_str=uuid_str, - title="Restored Meeting", - state=MeetingState.STOPPED, - ) - assert str(meeting.id) == uuid_str - assert meeting.title == "Restored Meeting" - assert meeting.state == MeetingState.STOPPED - - -class TestMeetingStateTransitions: - """Tests for Meeting state machine transitions.""" - - def test_start_recording_from_created(self) -> None: - """Test starting recording from CREATED state.""" - meeting = Meeting.create() - meeting.start_recording() - assert meeting.state == MeetingState.RECORDING - assert meeting.started_at is not None - - def test_start_recording_invalid_state_raises(self) -> None: - """Test starting recording from invalid state raises.""" - meeting = Meeting.create() - meeting.start_recording() - meeting.begin_stopping() - meeting.stop_recording() - with pytest.raises(ValueError, match="Cannot start recording"): - meeting.start_recording() - - def test_begin_stopping_from_recording(self) -> None: - """Test transitioning to STOPPING from RECORDING state.""" - meeting = Meeting.create() - meeting.start_recording() - meeting.begin_stopping() - assert meeting.state == MeetingState.STOPPING - - def test_begin_stopping_invalid_state_raises(self) -> None: - """Test begin_stopping from invalid state raises.""" - meeting = Meeting.create() - with pytest.raises(ValueError, match="Cannot begin stopping"): - meeting.begin_stopping() - - def test_stop_recording_from_stopping(self) -> None: - """Test stopping recording from STOPPING state.""" - meeting = Meeting.create() - meeting.start_recording() - meeting.begin_stopping() - meeting.stop_recording() - assert meeting.state == MeetingState.STOPPED - assert meeting.ended_at is not None - - def test_stop_recording_from_recording_raises(self) -> None: - """Test stopping recording directly from RECORDING raises. - - Must go through STOPPING state for graceful shutdown. - """ - meeting = Meeting.create() - meeting.start_recording() - with pytest.raises(ValueError, match="Cannot stop recording"): - meeting.stop_recording() - - def test_stop_recording_from_created_raises(self) -> None: - """Test stopping recording from CREATED state raises.""" - meeting = Meeting.create() - with pytest.raises(ValueError, match="Cannot stop recording"): - meeting.stop_recording() - - def test_complete_from_stopped(self) -> None: - """Test completing meeting from STOPPED state.""" - meeting = Meeting.create() - meeting.start_recording() - meeting.begin_stopping() - meeting.stop_recording() - meeting.complete() - assert meeting.state == MeetingState.COMPLETED - - def test_complete_invalid_state_raises(self) -> None: - """Test completing from invalid state raises.""" - meeting = Meeting.create() - with pytest.raises(ValueError, match="Cannot complete"): - meeting.complete() - - def test_mark_error(self) -> None: - """Test marking meeting as error state.""" - meeting = Meeting.create() - meeting.mark_error() - assert meeting.state == MeetingState.ERROR - - def test_stopping_to_recording_invalid(self) -> None: - """Test cannot transition from STOPPING back to RECORDING.""" - meeting = Meeting.create() - meeting.start_recording() - meeting.begin_stopping() - with pytest.raises(ValueError, match="Cannot start recording"): - meeting.start_recording() - - -class TestMeetingSegments: - """Tests for Meeting segment management.""" - - def test_add_segment(self) -> None: - """Test adding a segment to meeting.""" - meeting = Meeting.create() - segment = Segment(segment_id=0, text="Hello world", start_time=0.0, end_time=1.0) - meeting.add_segment(segment) - assert meeting.segment_count == 1 - assert meeting.segments[0] == segment - - def test_next_segment_id_empty(self) -> None: - """Test next segment ID when no segments exist.""" - meeting = Meeting.create() - assert meeting.next_segment_id == 0 - - def test_next_segment_id_with_segments(self) -> None: - """Test next segment ID increments correctly.""" - meeting = Meeting.create() - meeting.add_segment(Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0)) - meeting.add_segment(Segment(segment_id=1, text="Second", start_time=1.0, end_time=2.0)) - assert meeting.next_segment_id == 2 - - def test_next_segment_id_non_contiguous(self) -> None: - """Test next segment ID uses max + 1 for non-contiguous IDs.""" - meeting = Meeting.create() - meeting.add_segment(Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0)) - meeting.add_segment(Segment(segment_id=5, text="Sixth", start_time=1.0, end_time=2.0)) - assert meeting.next_segment_id == 6 - - def test_full_transcript(self) -> None: - """Test concatenating all segment text.""" - meeting = Meeting.create() - meeting.add_segment(Segment(segment_id=0, text="Hello", start_time=0.0, end_time=1.0)) - meeting.add_segment(Segment(segment_id=1, text="world", start_time=1.0, end_time=2.0)) - assert meeting.full_transcript == "Hello world" - - def test_full_transcript_empty(self) -> None: - """Test full_transcript is empty when there are no segments.""" - meeting = Meeting.create() - assert meeting.full_transcript == "" - - -class TestMeetingProperties: - """Tests for Meeting computed properties.""" - - def test_duration_seconds_not_started(self) -> None: - """Test duration is 0 when not started.""" - meeting = Meeting.create() - assert meeting.duration_seconds == 0.0 - - def test_duration_seconds_with_times(self) -> None: - """Test duration calculation with start and end times.""" - meeting = Meeting.create() - meeting.started_at = datetime(2024, 1, 1, 10, 0, 0) - meeting.ended_at = datetime(2024, 1, 1, 10, 30, 0) - assert meeting.duration_seconds == 1800.0 - - def test_duration_seconds_in_progress(self) -> None: - """Test duration is > 0 when started but not ended.""" - meeting = Meeting.create() - meeting.started_at = datetime.now() - timedelta(seconds=5) - assert meeting.duration_seconds >= 5.0 - - def test_is_active_created(self) -> None: - """Test is_active returns True for CREATED state.""" - meeting = Meeting.create() - assert meeting.is_active() is True - - def test_is_active_recording(self) -> None: - """Test is_active returns True for RECORDING state.""" - meeting = Meeting.create() - meeting.start_recording() - assert meeting.is_active() is True - - def test_is_active_stopping(self) -> None: - """Test is_active returns False for STOPPING state.""" - meeting = Meeting.create() - meeting.start_recording() - meeting.begin_stopping() - assert meeting.is_active() is False - - def test_is_active_stopped(self) -> None: - """Test is_active returns False for STOPPED state.""" - meeting = Meeting.create() - meeting.start_recording() - meeting.begin_stopping() - meeting.stop_recording() - assert meeting.is_active() is False - - def test_has_summary_false(self) -> None: - """Test has_summary returns False when no summary.""" - meeting = Meeting.create() - assert meeting.has_summary() is False - - def test_has_summary_true(self) -> None: - """Test has_summary returns True when summary set.""" - meeting = Meeting.create() - summary = Summary(meeting_id=meeting.id) - meeting.set_summary(summary) - assert meeting.has_summary() is True -```` - -## File: tests/domain/test_triggers.py -````python -"""Tests for trigger domain entities.""" - -from __future__ import annotations - -import pytest - -from noteflow.domain.triggers import TriggerAction, TriggerDecision, TriggerSignal, TriggerSource - - -def test_trigger_signal_weight_bounds() -> None: - """TriggerSignal enforces weight bounds.""" - with pytest.raises(ValueError, match=r"Weight must be 0\.0-1\.0"): - TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=-0.1) - - with pytest.raises(ValueError, match=r"Weight must be 0\.0-1\.0"): - TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=1.1) - - signal = TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=0.5) - assert signal.weight == 0.5 - - -def test_trigger_decision_primary_signal_and_detected_app() -> None: - """TriggerDecision exposes primary signal and detected app.""" - audio = TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=0.2) - foreground = TriggerSignal( - source=TriggerSource.FOREGROUND_APP, - weight=0.4, - app_name="Zoom Meeting", - ) - decision = TriggerDecision( - action=TriggerAction.NOTIFY, - confidence=0.6, - signals=(audio, foreground), - ) - - assert decision.primary_signal == foreground - assert decision.detected_app == "Zoom Meeting" - - empty = TriggerDecision(action=TriggerAction.IGNORE, confidence=0.0, signals=()) - assert empty.primary_signal is None - assert empty.detected_app is None -```` - -## File: tests/grpc/test_generate_summary.py -````python -"""Tests for GenerateSummary RPC fallback behavior.""" - -from __future__ import annotations - -import pytest - -from noteflow.domain.entities import Segment -from noteflow.domain.summarization import ProviderUnavailableError -from noteflow.grpc.proto import noteflow_pb2 -from noteflow.grpc.service import NoteFlowServicer - - -class _DummyContext: - """Minimal gRPC context that raises if abort is invoked.""" - - async def abort(self, code, details): # type: ignore[override] - raise AssertionError(f"abort called: {code} - {details}") - - -@pytest.mark.asyncio -async def test_generate_summary_uses_placeholder_when_service_missing() -> None: - """Ensure RPC returns a placeholder when no summarization service is configured.""" - - servicer = NoteFlowServicer() - store = servicer._get_memory_store() - - meeting = store.create("Test Meeting") - store.add_segment( - str(meeting.id), - Segment(segment_id=0, text="Hello world", start_time=0.0, end_time=1.0, language="en"), - ) - - response = await servicer.GenerateSummary( - noteflow_pb2.GenerateSummaryRequest(meeting_id=str(meeting.id)), - _DummyContext(), - ) - - assert response.executive_summary != "" - assert response.model_version == "placeholder-v0" - retrieved_meeting = store.get(str(meeting.id)) - assert retrieved_meeting is not None, "Meeting should exist after creation" - assert retrieved_meeting.summary is not None - - -class _FailingSummarizationService: - """Summarization service that always reports provider unavailability.""" - - async def summarize(self, meeting_id, segments): # type: ignore[override] - raise ProviderUnavailableError("LLM unavailable") - - -@pytest.mark.asyncio -async def test_generate_summary_falls_back_when_provider_unavailable() -> None: - """Provider errors should fall back to placeholder instead of failing the RPC.""" - - servicer = NoteFlowServicer(summarization_service=_FailingSummarizationService()) - store = servicer._get_memory_store() - - meeting = store.create("Test Meeting") - store.add_segment( - str(meeting.id), - Segment(segment_id=1, text="Action item noted", start_time=0.0, end_time=2.0, language="en"), - ) - - response = await servicer.GenerateSummary( - noteflow_pb2.GenerateSummaryRequest(meeting_id=str(meeting.id)), - _DummyContext(), - ) - - assert response.executive_summary != "" - assert response.model_version == "placeholder-v0" -```` - -## File: tests/infrastructure/asr/test_dto.py -````python -"""Tests for ASR DTO validation and properties.""" - -from __future__ import annotations - -from dataclasses import FrozenInstanceError - -import pytest - -from noteflow.infrastructure.asr.dto import ( - AsrResult, - PartialUpdate, - VadEvent, - VadEventType, - WordTiming, -) - - -class TestWordTimingDto: - """Tests for WordTiming DTO.""" - - def test_word_timing_valid(self) -> None: - word = WordTiming(word="hello", start=0.0, end=0.5, probability=0.75) - assert word.word == "hello" - assert word.start == 0.0 - assert word.end == 0.5 - assert word.probability == 0.75 - - def test_word_timing_invalid_times_raises(self) -> None: - with pytest.raises(ValueError, match=r"Word end .* < start"): - WordTiming(word="bad", start=1.0, end=0.5, probability=0.5) - - @pytest.mark.parametrize("prob", [-0.1, 1.1]) - def test_word_timing_invalid_probability_raises(self, prob: float) -> None: - with pytest.raises(ValueError, match=r"Probability must be 0\.0-1\.0"): - WordTiming(word="bad", start=0.0, end=0.1, probability=prob) - - def test_word_timing_frozen(self) -> None: - word = WordTiming(word="hello", start=0.0, end=0.5, probability=0.9) - with pytest.raises(FrozenInstanceError): - word.word = "mutate" # type: ignore[misc] - - -class TestAsrResultDto: - """Tests for AsrResult DTO.""" - - def test_asr_result_duration(self) -> None: - result = AsrResult(text="hello", start=1.0, end=3.5) - assert result.duration == 2.5 - - def test_asr_result_invalid_times_raises(self) -> None: - with pytest.raises(ValueError, match=r"Segment end .* < start"): - AsrResult(text="bad", start=2.0, end=1.0) - - -class TestPartialUpdateDto: - """Tests for PartialUpdate DTO.""" - - def test_partial_update_invalid_times_raises(self) -> None: - with pytest.raises(ValueError, match=r"Partial end .* < start"): - PartialUpdate(text="partial", start=2.0, end=1.0) - - -class TestVadEventDto: - """Tests for VadEvent DTO.""" - - def test_vad_event_invalid_timestamp_raises(self) -> None: - with pytest.raises(ValueError, match="Timestamp must be non-negative"): - VadEvent(event_type=VadEventType.SPEECH_START, timestamp=-1.0) - - @pytest.mark.parametrize("confidence", [-0.1, 1.1]) - def test_vad_event_invalid_confidence_raises(self, confidence: float) -> None: - with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"): - VadEvent(event_type=VadEventType.SPEECH_END, timestamp=0.5, confidence=confidence) -```` - -## File: tests/infrastructure/asr/test_segmenter.py -````python -"""Tests for Segmenter state machine.""" - -from __future__ import annotations - -import numpy as np -import pytest - -from noteflow.infrastructure.asr.segmenter import ( - AudioSegment, - Segmenter, - SegmenterConfig, - SegmenterState, -) - - -class TestSegmenterInitialization: - """Tests for Segmenter initialization.""" - - def test_default_config(self) -> None: - """Segmenter uses default config when not provided.""" - segmenter = Segmenter() - - assert segmenter.config.sample_rate == 16000 - assert segmenter.config.min_speech_duration == 0.3 - - def test_custom_config(self) -> None: - """Segmenter accepts custom configuration.""" - config = SegmenterConfig(sample_rate=44100, max_segment_duration=60.0) - segmenter = Segmenter(config=config) - - assert segmenter.config.sample_rate == 44100 - assert segmenter.config.max_segment_duration == 60.0 - - def test_initial_state_is_idle(self) -> None: - """Segmenter starts in IDLE state.""" - segmenter = Segmenter() - - assert segmenter.state == SegmenterState.IDLE - - -class TestSegmenterStateTransitions: - """Tests for Segmenter state machine transitions.""" - - @pytest.fixture - def segmenter(self) -> Segmenter: - """Create segmenter with test-friendly config.""" - return Segmenter( - config=SegmenterConfig( - sample_rate=16000, - trailing_silence=0.1, - leading_buffer=0.1, - min_speech_duration=0.1, - ) - ) - - @staticmethod - def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray: - """Create test audio of specified duration.""" - return np.zeros(int(duration * sample_rate), dtype=np.float32) - - def test_idle_to_speech_on_voice_detected(self, segmenter: Segmenter) -> None: - """Transition from IDLE to SPEECH when voice detected.""" - audio = self.make_audio(0.1) - - list(segmenter.process_audio(audio, is_speech=True)) - - assert segmenter.state == SegmenterState.SPEECH - - def test_idle_stays_idle_on_silence(self, segmenter: Segmenter) -> None: - """Stay in IDLE state when no speech detected.""" - audio = self.make_audio(0.1) - - list(segmenter.process_audio(audio, is_speech=False)) - - assert segmenter.state == SegmenterState.IDLE - - def test_speech_to_trailing_on_silence(self, segmenter: Segmenter) -> None: - """Transition from SPEECH to TRAILING when speech ends.""" - speech_audio = self.make_audio(0.1) - short_silence = self.make_audio(0.05) # Less than trailing_silence threshold - - list(segmenter.process_audio(speech_audio, is_speech=True)) - list(segmenter.process_audio(short_silence, is_speech=False)) - - assert segmenter.state == SegmenterState.TRAILING - - def test_trailing_to_idle_after_silence_threshold(self, segmenter: Segmenter) -> None: - """Transition from TRAILING to IDLE after enough silence.""" - audio = self.make_audio(0.1) - - list(segmenter.process_audio(audio, is_speech=True)) - list(segmenter.process_audio(audio, is_speech=False)) - list(segmenter.process_audio(audio, is_speech=False)) - - assert segmenter.state == SegmenterState.IDLE - - def test_trailing_to_speech_if_voice_resumes(self, segmenter: Segmenter) -> None: - """Transition from TRAILING back to SPEECH if voice resumes.""" - audio = self.make_audio(0.05) - - list(segmenter.process_audio(audio, is_speech=True)) - list(segmenter.process_audio(audio, is_speech=False)) - assert segmenter.state == SegmenterState.TRAILING - - list(segmenter.process_audio(audio, is_speech=True)) - - assert segmenter.state == SegmenterState.SPEECH - - -class TestSegmenterEmission: - """Tests for segment emission behavior.""" - - @pytest.fixture - def segmenter(self) -> Segmenter: - """Create segmenter with test-friendly config.""" - return Segmenter( - config=SegmenterConfig( - sample_rate=16000, - trailing_silence=0.1, - leading_buffer=0.1, - min_speech_duration=0.0, - ) - ) - - @staticmethod - def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray: - """Create test audio of specified duration.""" - return np.ones(int(duration * sample_rate), dtype=np.float32) - - def test_emits_segment_after_trailing_silence(self, segmenter: Segmenter) -> None: - """Emit segment when trailing silence threshold is reached.""" - audio = self.make_audio(0.2) - - segments_speech = list(segmenter.process_audio(audio, is_speech=True)) - segments_silence = list(segmenter.process_audio(audio, is_speech=False)) - - assert not segments_speech - assert len(segments_silence) == 1 - assert isinstance(segments_silence[0], AudioSegment) - - def test_emitted_segment_has_correct_timing(self, segmenter: Segmenter) -> None: - """Emitted segment has correct start and end times.""" - audio = self.make_audio(0.2) - - list(segmenter.process_audio(audio, is_speech=True)) - segments = list(segmenter.process_audio(audio, is_speech=False)) - - segment = segments[0] - assert segment.start_time >= 0.0 - assert segment.end_time > segment.start_time - assert segment.duration > 0 - - def test_emitted_segment_contains_audio(self, segmenter: Segmenter) -> None: - """Emitted segment contains concatenated audio.""" - audio = self.make_audio(0.2) - - list(segmenter.process_audio(audio, is_speech=True)) - segments = list(segmenter.process_audio(audio, is_speech=False)) - - assert len(segments[0].audio) > 0 - - def test_emits_on_max_duration(self) -> None: - """Force emit segment when max duration is reached.""" - segmenter = Segmenter( - config=SegmenterConfig( - sample_rate=16000, - max_segment_duration=0.3, - ) - ) - audio = self.make_audio(0.2) - - segments_1 = list(segmenter.process_audio(audio, is_speech=True)) - segments_2 = list(segmenter.process_audio(audio, is_speech=True)) - - assert not segments_1 - assert len(segments_2) == 1 - - def test_min_speech_duration_filters_short_segments(self) -> None: - """Segments shorter than min_speech_duration should be ignored.""" - segmenter = Segmenter( - config=SegmenterConfig( - sample_rate=16000, - min_speech_duration=0.5, - trailing_silence=0.1, - ) - ) - short_speech = self.make_audio(0.1) - silence = self.make_audio(0.1) - - list(segmenter.process_audio(short_speech, is_speech=True)) - emitted = list(segmenter.process_audio(silence, is_speech=False)) - - assert not emitted - - -class TestSegmenterFlush: - """Tests for flush behavior.""" - - @pytest.fixture - def segmenter(self) -> Segmenter: - """Create segmenter with test-friendly config.""" - return Segmenter( - config=SegmenterConfig( - sample_rate=16000, - trailing_silence=0.5, - min_speech_duration=0.0, - ) - ) - - @staticmethod - def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray: - """Create test audio of specified duration.""" - return np.ones(int(duration * sample_rate), dtype=np.float32) - - def test_flush_returns_none_when_idle(self, segmenter: Segmenter) -> None: - """Flush returns None when no pending audio.""" - result = segmenter.flush() - - assert result is None - - def test_flush_returns_segment_when_in_speech(self, segmenter: Segmenter) -> None: - """Flush returns pending segment when in SPEECH state.""" - audio = self.make_audio(0.2) - list(segmenter.process_audio(audio, is_speech=True)) - - result = segmenter.flush() - - assert result is not None - assert isinstance(result, AudioSegment) - - def test_flush_returns_segment_when_in_trailing(self, segmenter: Segmenter) -> None: - """Flush returns pending segment when in TRAILING state.""" - audio = self.make_audio(0.1) - list(segmenter.process_audio(audio, is_speech=True)) - list(segmenter.process_audio(audio, is_speech=False)) - assert segmenter.state == SegmenterState.TRAILING - - result = segmenter.flush() - - assert result is not None - assert isinstance(result, AudioSegment) - - def test_flush_resets_to_idle(self, segmenter: Segmenter) -> None: - """Flush resets state to IDLE.""" - audio = self.make_audio(0.2) - list(segmenter.process_audio(audio, is_speech=True)) - - segmenter.flush() - - assert segmenter.state == SegmenterState.IDLE - - -class TestSegmenterReset: - """Tests for reset behavior.""" - - def test_reset_clears_state(self) -> None: - """Reset returns segmenter to initial state.""" - segmenter = Segmenter() - audio = np.ones(1600, dtype=np.float32) - - list(segmenter.process_audio(audio, is_speech=True)) - assert segmenter.state == SegmenterState.SPEECH - - segmenter.reset() - - assert segmenter.state == SegmenterState.IDLE - - -class TestAudioSegmentDataclass: - """Tests for AudioSegment dataclass.""" - - def test_duration_property(self) -> None: - """Duration property calculates correctly.""" - segment = AudioSegment( - audio=np.zeros(1600, dtype=np.float32), - start_time=1.0, - end_time=2.5, - ) - - assert segment.duration == 1.5 -```` - -## File: tests/infrastructure/asr/test_streaming_vad.py -````python -"""Tests for StreamingVad and EnergyVad.""" - -from __future__ import annotations - -import numpy as np - -from noteflow.infrastructure.asr.streaming_vad import ( - EnergyVad, - EnergyVadConfig, - StreamingVad, -) - - -class TestEnergyVadBasics: - """Basic tests for EnergyVad.""" - - def test_default_config(self) -> None: - """EnergyVad uses default config when not provided.""" - vad = EnergyVad() - - assert vad.config.speech_threshold == 0.01 - assert vad.config.silence_threshold == 0.005 - - def test_custom_config(self) -> None: - """EnergyVad accepts custom configuration.""" - config = EnergyVadConfig(speech_threshold=0.02, min_speech_frames=5) - vad = EnergyVad(config=config) - - assert vad.config.speech_threshold == 0.02 - assert vad.config.min_speech_frames == 5 - - def test_initial_state_is_silence(self) -> None: - """EnergyVad starts in silence state.""" - vad = EnergyVad() - - assert vad._is_speech is False - - -class TestEnergyVadDetection: - """Tests for EnergyVad speech detection.""" - - def test_detects_silence_for_zeros(self) -> None: - """Silent audio detected as non-speech.""" - vad = EnergyVad() - audio = np.zeros(1600, dtype=np.float32) - - result = vad.process(audio) - - assert result is False - - def test_detects_speech_for_high_energy(self) -> None: - """High energy audio eventually detected as speech.""" - vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=2)) - # Audio with energy above threshold - audio = np.ones(1600, dtype=np.float32) * 0.1 - - vad.process(audio) - result = vad.process(audio) - - assert result is True - - def test_speech_requires_consecutive_frames(self) -> None: - """Speech detection requires min_speech_frames consecutive frames.""" - vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=3)) - audio = np.ones(1600, dtype=np.float32) * 0.1 - - assert vad.process(audio) is False - assert vad.process(audio) is False - assert vad.process(audio) is True - - def test_silence_after_speech_requires_frames(self) -> None: - """Transition to silence requires min_silence_frames.""" - config = EnergyVadConfig(min_speech_frames=1, min_silence_frames=2) - vad = EnergyVad(config=config) - speech = np.ones(1600, dtype=np.float32) * 0.1 - silence = np.zeros(1600, dtype=np.float32) - - vad.process(speech) - assert vad._is_speech is True - - vad.process(silence) - assert vad._is_speech is True - - vad.process(silence) - assert vad._is_speech is False - - def test_hysteresis_prevents_chatter(self) -> None: - """Hysteresis prevents rapid speech/silence toggling.""" - config = EnergyVadConfig( - speech_threshold=0.01, - silence_threshold=0.005, - min_speech_frames=1, - min_silence_frames=1, - ) - vad = EnergyVad(config=config) - - # Just above speech threshold -> speech - high = np.ones(1600, dtype=np.float32) * 0.015 - vad.process(high) - assert vad._is_speech is True - - # Between thresholds (below speech, above silence) -> stays speech - mid = np.ones(1600, dtype=np.float32) * 0.007 - vad.process(mid) - assert vad._is_speech is True - - # Below silence threshold -> silence - low = np.ones(1600, dtype=np.float32) * 0.003 - vad.process(low) - assert vad._is_speech is False - - -class TestEnergyVadReset: - """Tests for EnergyVad reset behavior.""" - - def test_reset_clears_state(self) -> None: - """Reset returns VAD to initial state.""" - vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=1)) - audio = np.ones(1600, dtype=np.float32) * 0.1 - vad.process(audio) - - vad.reset() - - assert vad._is_speech is False - assert vad._speech_frame_count == 0 - assert vad._silence_frame_count == 0 - - -class TestStreamingVad: - """Tests for StreamingVad wrapper.""" - - def test_default_engine_is_energy_vad(self) -> None: - """StreamingVad uses EnergyVad by default.""" - vad = StreamingVad() - - assert isinstance(vad.engine, EnergyVad) - - def test_process_chunk_delegates_to_engine(self) -> None: - """process_chunk delegates to underlying engine.""" - vad = StreamingVad() - silence = np.zeros(1600, dtype=np.float32) - - result = vad.process_chunk(silence) - - assert result is False - - def test_reset_delegates_to_engine(self) -> None: - """reset delegates to underlying engine.""" - vad = StreamingVad() - speech = np.ones(1600, dtype=np.float32) * 0.1 - - vad.process_chunk(speech) - vad.process_chunk(speech) - vad.reset() - - assert vad.engine._is_speech is False -```` - -## File: tests/infrastructure/audio/test_levels.py -````python -"""Tests for RmsLevelProvider and compute_rms.""" - -from __future__ import annotations - -import math -from typing import TYPE_CHECKING - -import numpy as np -import pytest - -from noteflow.infrastructure.audio import RmsLevelProvider, compute_rms - -if TYPE_CHECKING: - from numpy.typing import NDArray - - -class TestComputeRms: - """Tests for compute_rms function.""" - - def test_empty_array_returns_zero(self) -> None: - """RMS of empty array is zero.""" - frames = np.array([], dtype=np.float32) - assert compute_rms(frames) == 0.0 - - def test_zeros_returns_zero(self) -> None: - """RMS of zeros is zero.""" - frames = np.zeros(100, dtype=np.float32) - assert compute_rms(frames) == 0.0 - - def test_ones_returns_one(self) -> None: - """RMS of all ones is one.""" - frames = np.ones(100, dtype=np.float32) - assert compute_rms(frames) == 1.0 - - def test_half_amplitude_returns_half(self) -> None: - """RMS of constant 0.5 is 0.5.""" - frames = np.full(100, 0.5, dtype=np.float32) - assert compute_rms(frames) == 0.5 - - def test_sine_wave_returns_sqrt_half(self) -> None: - """RMS of sine wave is approximately 1/sqrt(2).""" - t = np.linspace(0, 2 * np.pi, 1000, dtype=np.float32) - frames = np.sin(t).astype(np.float32) - result = compute_rms(frames) - assert 0.7 < result < 0.72 # ~0.707 - - -class TestRmsLevelProvider: - """Tests for RmsLevelProvider class.""" - - @pytest.fixture - def provider(self) -> RmsLevelProvider: - """Create RmsLevelProvider instance.""" - return RmsLevelProvider() - - def test_get_rms_empty_array_returns_zero(self, provider: RmsLevelProvider) -> None: - """Test RMS of empty array is zero.""" - frames = np.array([], dtype=np.float32) - assert provider.get_rms(frames) == 0.0 - - def test_get_rms_silence_returns_zero( - self, provider: RmsLevelProvider, silence_audio: NDArray[np.float32] - ) -> None: - """Test RMS of silence is zero.""" - assert provider.get_rms(silence_audio) == 0.0 - - def test_get_rms_full_scale_returns_one( - self, provider: RmsLevelProvider, full_scale_audio: NDArray[np.float32] - ) -> None: - """Test RMS of full scale signal is one.""" - assert provider.get_rms(full_scale_audio) == 1.0 - - def test_get_rms_half_scale_returns_half( - self, provider: RmsLevelProvider, half_scale_audio: NDArray[np.float32] - ) -> None: - """Test RMS of half scale signal is 0.5.""" - assert provider.get_rms(half_scale_audio) == 0.5 - - def test_get_rms_normalized_range(self, provider: RmsLevelProvider) -> None: - """Test RMS is always in 0.0-1.0 range.""" - # Test with values > 1.0 (should clamp) - frames = np.full(100, 2.0, dtype=np.float32) - rms = provider.get_rms(frames) - assert 0.0 <= rms <= 1.0 - - def test_get_db_silence_returns_min_db( - self, provider: RmsLevelProvider, silence_audio: NDArray[np.float32] - ) -> None: - """Test dB of silence returns MIN_DB.""" - assert provider.get_db(silence_audio) == provider.MIN_DB - - def test_get_db_full_scale_returns_zero( - self, provider: RmsLevelProvider, full_scale_audio: NDArray[np.float32] - ) -> None: - """Test dB of full scale signal is 0 dB.""" - assert provider.get_db(full_scale_audio) == 0.0 - - def test_get_db_half_scale_is_negative_six( - self, provider: RmsLevelProvider, half_scale_audio: NDArray[np.float32] - ) -> None: - """Test dB of half scale is approximately -6 dB.""" - db = provider.get_db(half_scale_audio) - # -6.02 dB for half amplitude - assert -7.0 < db < -5.0 - - def test_rms_to_db_zero_returns_min_db(self, provider: RmsLevelProvider) -> None: - """Test rms_to_db(0) returns MIN_DB.""" - assert provider.rms_to_db(0.0) == provider.MIN_DB - - def test_rms_to_db_one_returns_zero(self, provider: RmsLevelProvider) -> None: - """Test rms_to_db(1.0) returns 0 dB.""" - assert provider.rms_to_db(1.0) == 0.0 - - def test_db_to_rms_min_db_returns_zero(self, provider: RmsLevelProvider) -> None: - """Test db_to_rms(MIN_DB) returns 0.""" - assert provider.db_to_rms(provider.MIN_DB) == 0.0 - - def test_db_to_rms_zero_returns_one(self, provider: RmsLevelProvider) -> None: - """Test db_to_rms(0) returns 1.0.""" - assert provider.db_to_rms(0.0) == 1.0 - - @pytest.mark.parametrize("rms", [0.1, 0.25, 0.5, 0.75, 1.0]) - def test_rms_db_roundtrip(self, provider: RmsLevelProvider, rms: float) -> None: - """Test RMS -> dB -> RMS roundtrip preserves value.""" - db = provider.rms_to_db(rms) - recovered = provider.db_to_rms(db) - assert math.isclose(recovered, rms, rel_tol=1e-9) -```` - -## File: tests/infrastructure/security/test_keystore.py -````python -"""Tests for KeyringKeyStore and InMemoryKeyStore.""" - -from __future__ import annotations - -import types -from typing import Any - -import pytest - -from noteflow.infrastructure.security import keystore - - -def _install_fake_keyring(monkeypatch: pytest.MonkeyPatch) -> dict[tuple[str, str], str]: - """Install a fake keyring backend backed by a dictionary.""" - storage: dict[tuple[str, str], str] = {} - - class DummyErrors: - class KeyringError(Exception): ... - - class PasswordDeleteError(KeyringError): ... - - def get_password(service: str, key: str) -> str | None: - return storage.get((service, key)) - - def set_password(service: str, key: str, value: str) -> None: - storage[(service, key)] = value - - def delete_password(service: str, key: str) -> None: - storage.pop((service, key), None) - - monkeypatch.setattr( - keystore, - "keyring", - types.SimpleNamespace( - get_password=get_password, - set_password=set_password, - delete_password=delete_password, - errors=DummyErrors, - ), - ) - return storage - - -def test_get_or_create_master_key_creates_and_reuses(monkeypatch: pytest.MonkeyPatch) -> None: - """Master key should be created once and then reused.""" - storage = _install_fake_keyring(monkeypatch) - ks = keystore.KeyringKeyStore(service_name="svc", key_name="key") - - first = ks.get_or_create_master_key() - second = ks.get_or_create_master_key() - - assert len(first) == keystore.KEY_SIZE - assert first == second - assert ("svc", "key") in storage - - -def test_get_or_create_master_key_wraps_keyring_errors(monkeypatch: pytest.MonkeyPatch) -> None: - """Keyring errors should surface as RuntimeError.""" - - class DummyErrors: - class KeyringError(Exception): ... - - def raise_error(*_: Any, **__: Any) -> None: - raise DummyErrors.KeyringError("unavailable") - - monkeypatch.setattr( - keystore, - "keyring", - types.SimpleNamespace( - get_password=raise_error, - set_password=raise_error, - errors=DummyErrors, - delete_password=raise_error, - ), - ) - - ks = keystore.KeyringKeyStore() - with pytest.raises(RuntimeError, match="Keyring unavailable"): - ks.get_or_create_master_key() - - -def test_delete_master_key_handles_missing(monkeypatch: pytest.MonkeyPatch) -> None: - """delete_master_key should swallow missing-key errors.""" - storage = _install_fake_keyring(monkeypatch) - - class DummyErrors: - class KeyringError(Exception): ... - - class PasswordDeleteError(KeyringError): ... - - # Reinstall with errors that raise on delete to exercise branch - def delete_password(*_: Any, **__: Any) -> None: - raise DummyErrors.PasswordDeleteError("not found") - - monkeypatch.setattr( - keystore, - "keyring", - types.SimpleNamespace( - get_password=lambda s, k: storage.get((s, k)), - set_password=lambda s, k, v: storage.setdefault((s, k), v), - delete_password=delete_password, - errors=DummyErrors, - ), - ) - - ks = keystore.KeyringKeyStore() - # Should not raise even when delete_password errors - ks.delete_master_key() - - -def test_has_master_key_false_on_errors(monkeypatch: pytest.MonkeyPatch) -> None: - """has_master_key should return False when keyring raises.""" - - class DummyErrors: - class KeyringError(Exception): ... - - def raise_error(*_: Any, **__: Any) -> None: - raise DummyErrors.KeyringError("oops") - - monkeypatch.setattr( - keystore, - "keyring", - types.SimpleNamespace( - get_password=raise_error, - errors=DummyErrors, - delete_password=lambda *a, **k: None, - set_password=lambda *a, **k: None, - ), - ) - - ks = keystore.KeyringKeyStore() - assert ks.has_master_key() is False -```` - -## File: tests/integration/test_trigger_settings.py -````python -"""Integration tests for trigger and retention settings loading.""" - -from __future__ import annotations - -import pytest - -from noteflow.config.settings import Settings, get_settings, get_trigger_settings - -pytestmark = pytest.mark.integration - - -@pytest.fixture(autouse=True) -def _clear_settings_cache() -> None: - get_trigger_settings.cache_clear() - get_settings.cache_clear() - - -def test_trigger_settings_env_parsing(monkeypatch: pytest.MonkeyPatch) -> None: - """TriggerSettings should parse CSV lists from environment variables.""" - monkeypatch.setenv("NOTEFLOW_TRIGGER_MEETING_APPS", "zoom, teams") - monkeypatch.setenv("NOTEFLOW_TRIGGER_SUPPRESSED_APPS", "spotify") - monkeypatch.setenv("NOTEFLOW_TRIGGER_AUDIO_MIN_SAMPLES", "5") - monkeypatch.setenv("NOTEFLOW_TRIGGER_POLL_INTERVAL_SECONDS", "1.5") - - settings = get_trigger_settings() - - assert settings.trigger_meeting_apps == ["zoom", "teams"] - assert settings.trigger_suppressed_apps == ["spotify"] - assert settings.trigger_audio_min_samples == 5 - assert settings.trigger_poll_interval_seconds == pytest.approx(1.5) - - -class TestRetentionSettings: - """Tests for retention settings.""" - - def test_retention_defaults(self) -> None: - """Retention settings should have correct defaults.""" - # Access via class to check field defaults without loading from env - assert Settings.model_fields["retention_enabled"].default is False - assert Settings.model_fields["retention_days"].default == 90 - assert Settings.model_fields["retention_check_interval_hours"].default == 24 - - def test_retention_env_parsing(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Retention settings should parse from environment variables.""" - monkeypatch.setenv("NOTEFLOW_DATABASE_URL", "postgresql+asyncpg://user:pass@localhost/db") - monkeypatch.setenv("NOTEFLOW_RETENTION_ENABLED", "true") - monkeypatch.setenv("NOTEFLOW_RETENTION_DAYS", "30") - monkeypatch.setenv("NOTEFLOW_RETENTION_CHECK_INTERVAL_HOURS", "12") - - settings = get_settings() - - assert settings.retention_enabled is True - assert settings.retention_days == 30 - assert settings.retention_check_interval_hours == 12 - - def test_retention_days_validation(self) -> None: - """Retention days should be validated within range.""" - from pydantic import ValidationError - - # ge=1, le=3650 - with pytest.raises(ValidationError): - Settings.model_validate( - {"database_url": "postgresql+asyncpg://x:x@x/x", "retention_days": 0} - ) - with pytest.raises(ValidationError): - Settings.model_validate( - {"database_url": "postgresql+asyncpg://x:x@x/x", "retention_days": 4000} - ) - - def test_retention_check_interval_validation(self) -> None: - """Retention check interval should be validated within range.""" - from pydantic import ValidationError - - # ge=1, le=168 - with pytest.raises(ValidationError): - Settings.model_validate( - { - "database_url": "postgresql+asyncpg://x:x@x/x", - "retention_check_interval_hours": 0, - } - ) - with pytest.raises(ValidationError): - Settings.model_validate( - { - "database_url": "postgresql+asyncpg://x:x@x/x", - "retention_check_interval_hours": 200, - } - ) -```` - -## File: src/noteflow/application/services/__init__.py -````python -"""Application services for NoteFlow use cases.""" - -from noteflow.application.services.export_service import ExportFormat, ExportService -from noteflow.application.services.meeting_service import MeetingService -from noteflow.application.services.recovery_service import RecoveryService -from noteflow.application.services.retention_service import RetentionReport, RetentionService -from noteflow.application.services.summarization_service import ( - SummarizationMode, - SummarizationService, - SummarizationServiceResult, - SummarizationServiceSettings, -) -from noteflow.application.services.trigger_service import TriggerService, TriggerServiceSettings - -__all__ = [ - "ExportFormat", - "ExportService", - "MeetingService", - "RecoveryService", - "RetentionReport", - "RetentionService", - "SummarizationMode", - "SummarizationService", - "SummarizationServiceResult", - "SummarizationServiceSettings", - "TriggerService", - "TriggerServiceSettings", -] -```` - -## File: src/noteflow/client/state.py -````python -"""Centralized application state for NoteFlow client. - -Composes existing types from grpc.client and infrastructure.audio. -Does not recreate any dataclasses - imports and uses existing ones. -""" - -from __future__ import annotations - -import logging -from collections.abc import Callable -from dataclasses import dataclass, field - -import flet as ft - -# REUSE existing types - do not recreate -from noteflow.domain.entities import Summary -from noteflow.domain.triggers import TriggerDecision -from noteflow.grpc.client import AnnotationInfo, MeetingInfo, ServerInfo, TranscriptSegment -from noteflow.infrastructure.audio import ( - RmsLevelProvider, - SoundDevicePlayback, - TimestampedAudio, -) - -logger = logging.getLogger(__name__) - -# Callback type aliases (follow NoteFlowClient pattern from grpc/client.py) -OnTranscriptCallback = Callable[[TranscriptSegment], None] -OnConnectionCallback = Callable[[bool, str], None] - - -@dataclass -class AppState: - """Centralized application state for NoteFlow client. - - Composes existing types from grpc.client and infrastructure.audio. - All state is centralized here for component access. - """ - - # Connection state - server_address: str = "localhost:50051" - connected: bool = False - server_info: ServerInfo | None = None # REUSE existing type - - # Recording state - recording: bool = False - current_meeting: MeetingInfo | None = None # REUSE existing type - recording_start_time: float | None = None - elapsed_seconds: int = 0 - - # Audio state (REUSE existing RmsLevelProvider) - level_provider: RmsLevelProvider = field(default_factory=RmsLevelProvider) - current_db_level: float = -60.0 - - # Transcript state (REUSE existing TranscriptSegment) - transcript_segments: list[TranscriptSegment] = field(default_factory=list) - current_partial_text: str = "" # Live partial transcript (not yet final) - - # Playback state (REUSE existing SoundDevicePlayback) - playback: SoundDevicePlayback = field(default_factory=SoundDevicePlayback) - playback_position: float = 0.0 - session_audio_buffer: list[TimestampedAudio] = field(default_factory=list) - - # Transcript sync state - highlighted_segment_index: int | None = None - - # Annotations state (REUSE existing AnnotationInfo) - annotations: list[AnnotationInfo] = field(default_factory=list) - - # Meeting library state (REUSE existing MeetingInfo) - meetings: list[MeetingInfo] = field(default_factory=list) - selected_meeting: MeetingInfo | None = None - - # Trigger state (REUSE existing TriggerDecision) - trigger_enabled: bool = True - trigger_pending: bool = False # True when prompt is shown - trigger_decision: TriggerDecision | None = None # Last trigger decision - - # Summary state (REUSE existing Summary entity) - current_summary: Summary | None = None - summary_loading: bool = False - summary_error: str | None = None - - # UI page reference (private) - _page: ft.Page | None = field(default=None, repr=False) - - def set_page(self, page: ft.Page) -> None: - """Set page reference for thread-safe updates. - - Args: - page: Flet page instance. - """ - self._page = page - - def request_update(self) -> None: - """Request UI update from any thread. - - Safe to call from background threads. - """ - if self._page: - self._page.update() - - def run_on_ui_thread(self, callback: Callable[[], None]) -> None: - """Schedule callback on the UI event loop safely. - - Follows NoteFlowClient callback pattern with error handling. - - Args: - callback: Function to execute on the UI event loop. - """ - if not self._page: - return - - try: - if hasattr(self._page, "run_task"): - - async def _run() -> None: - callback() - - self._page.run_task(_run) - else: - self._page.run_thread(callback) - except Exception as e: - logger.error("UI thread callback error: %s", e) - - def clear_transcript(self) -> None: - """Clear all transcript segments and partial text.""" - self.transcript_segments.clear() - self.current_partial_text = "" - - def reset_recording_state(self) -> None: - """Reset recording-related state.""" - self.recording = False - self.current_meeting = None - self.recording_start_time = None - self.elapsed_seconds = 0 - - def clear_session_audio(self) -> None: - """Clear session audio buffer and reset playback state.""" - self.session_audio_buffer.clear() - self.playback_position = 0.0 - - def find_segment_at_position(self, position: float) -> int | None: - """Find segment index containing the given position using binary search. - - Args: - position: Time in seconds. - - Returns: - Index of segment containing position, or None if not found. - """ - segments = self.transcript_segments - if not segments: - return None - - left, right = 0, len(segments) - 1 - - while left <= right: - mid = (left + right) // 2 - segment = segments[mid] - - if segment.start_time <= position <= segment.end_time: - return mid - if position < segment.start_time: - right = mid - 1 - else: - left = mid + 1 - - return None -```` - ## File: src/noteflow/grpc/proto/noteflow_pb2_grpc.py ````python -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" - import grpc import warnings - import noteflow_pb2 as noteflow__pb2 - GRPC_VERSION = grpc.__version__ _version_not_supported = False - GRPC_GENERATED_VERSION = '1.76.0' try: from grpc._utilities import first_version_is_lower _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) except ImportError: _version_not_supported = True - if _version_not_supported: raise RuntimeError( f'The grpc package installed is at version {GRPC_VERSION}, but the generated code in noteflow_pb2_grpc.py depends on' @@ -19960,21 +10862,8 @@ if _version_not_supported: + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' ) - - class NoteFlowServiceStub(object): - """============================================================================= - Core Service - ============================================================================= - - """ - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ self.StreamTranscription = channel.stream_stream( '/noteflow.NoteFlowService/StreamTranscription', request_serializer=noteflow__pb2.AudioChunk.SerializeToString, @@ -20060,125 +10949,75 @@ class NoteFlowServiceStub(object): request_serializer=noteflow__pb2.ServerInfoRequest.SerializeToString, response_deserializer=noteflow__pb2.ServerInfo.FromString, _registered_method=True) - - class NoteFlowServiceServicer(object): - """============================================================================= - Core Service - ============================================================================= - - """ - def StreamTranscription(self, request_iterator, context): - """Bidirectional streaming: client sends audio chunks, server returns transcripts - """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def CreateMeeting(self, request, context): - """Meeting lifecycle management - """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def StopMeeting(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def ListMeetings(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def GetMeeting(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def DeleteMeeting(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def GenerateSummary(self, request, context): - """Summary generation - """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def AddAnnotation(self, request, context): - """Annotation management - """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def GetAnnotation(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def ListAnnotations(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def UpdateAnnotation(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def DeleteAnnotation(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def ExportTranscript(self, request, context): - """Export functionality - """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def RefineSpeakerDiarization(self, request, context): - """Speaker diarization - """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def RenameSpeaker(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def GetDiarizationJobStatus(self, request, context): - """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def GetServerInfo(self, request, context): - """Server health and capabilities - """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - - def add_NoteFlowServiceServicer_to_server(servicer, server): rpc_method_handlers = { 'StreamTranscription': grpc.stream_stream_rpc_method_handler( @@ -20271,16 +11110,7 @@ def add_NoteFlowServiceServicer_to_server(servicer, server): 'noteflow.NoteFlowService', rpc_method_handlers) server.add_generic_rpc_handlers((generic_handler,)) server.add_registered_method_handlers('noteflow.NoteFlowService', rpc_method_handlers) - - - # This class is part of an EXPERIMENTAL API. class NoteFlowService(object): - """============================================================================= - Core Service - ============================================================================= - - """ - @staticmethod def StreamTranscription(request_iterator, target, @@ -20307,7 +11137,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def CreateMeeting(request, target, @@ -20334,7 +11163,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def StopMeeting(request, target, @@ -20361,7 +11189,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def ListMeetings(request, target, @@ -20388,7 +11215,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def GetMeeting(request, target, @@ -20415,7 +11241,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def DeleteMeeting(request, target, @@ -20442,7 +11267,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def GenerateSummary(request, target, @@ -20469,7 +11293,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def AddAnnotation(request, target, @@ -20496,7 +11319,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def GetAnnotation(request, target, @@ -20523,7 +11345,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def ListAnnotations(request, target, @@ -20550,7 +11371,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def UpdateAnnotation(request, target, @@ -20577,7 +11397,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def DeleteAnnotation(request, target, @@ -20604,7 +11423,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def ExportTranscript(request, target, @@ -20631,7 +11449,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def RefineSpeakerDiarization(request, target, @@ -20658,7 +11475,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def RenameSpeaker(request, target, @@ -20685,7 +11501,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def GetDiarizationJobStatus(request, target, @@ -20712,7 +11527,6 @@ class NoteFlowService(object): timeout, metadata, _registered_method=True) - @staticmethod def GetServerInfo(request, target, @@ -20743,12 +11557,6 @@ class NoteFlowService(object): ## File: src/noteflow/grpc/proto/noteflow_pb2.py ````python -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# NO CHECKED-IN PROTOBUF GENCODE -# source: noteflow.proto -# Protobuf Python Version: 6.31.1 -"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import runtime_version as _runtime_version @@ -20762,15 +11570,8 @@ _runtime_version.ValidateProtobufRuntimeVersion( '', 'noteflow.proto' ) -# @@protoc_insertion_point(imports) - _sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0enoteflow.proto\x12\x08noteflow\"n\n\nAudioChunk\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\naudio_data\x18\x02 \x01(\x0c\x12\x11\n\ttimestamp\x18\x03 \x01(\x01\x12\x13\n\x0bsample_rate\x18\x04 \x01(\x05\x12\x10\n\x08\x63hannels\x18\x05 \x01(\x05\"\xaa\x01\n\x10TranscriptUpdate\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12)\n\x0bupdate_type\x18\x02 \x01(\x0e\x32\x14.noteflow.UpdateType\x12\x14\n\x0cpartial_text\x18\x03 \x01(\t\x12\'\n\x07segment\x18\x04 \x01(\x0b\x32\x16.noteflow.FinalSegment\x12\x18\n\x10server_timestamp\x18\x05 \x01(\x01\"\x87\x02\n\x0c\x46inalSegment\x12\x12\n\nsegment_id\x18\x01 \x01(\x05\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\x12#\n\x05words\x18\x05 \x03(\x0b\x32\x14.noteflow.WordTiming\x12\x10\n\x08language\x18\x06 \x01(\t\x12\x1b\n\x13language_confidence\x18\x07 \x01(\x02\x12\x13\n\x0b\x61vg_logprob\x18\x08 \x01(\x02\x12\x16\n\x0eno_speech_prob\x18\t \x01(\x02\x12\x12\n\nspeaker_id\x18\n \x01(\t\x12\x1a\n\x12speaker_confidence\x18\x0b \x01(\x02\"U\n\nWordTiming\x12\x0c\n\x04word\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\x12\x13\n\x0bprobability\x18\x04 \x01(\x02\"\xd1\x02\n\x07Meeting\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12%\n\x05state\x18\x03 \x01(\x0e\x32\x16.noteflow.MeetingState\x12\x12\n\ncreated_at\x18\x04 \x01(\x01\x12\x12\n\nstarted_at\x18\x05 \x01(\x01\x12\x10\n\x08\x65nded_at\x18\x06 \x01(\x01\x12\x18\n\x10\x64uration_seconds\x18\x07 \x01(\x01\x12(\n\x08segments\x18\x08 \x03(\x0b\x32\x16.noteflow.FinalSegment\x12\"\n\x07summary\x18\t \x01(\x0b\x32\x11.noteflow.Summary\x12\x31\n\x08metadata\x18\n \x03(\x0b\x32\x1f.noteflow.Meeting.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x96\x01\n\x14\x43reateMeetingRequest\x12\r\n\x05title\x18\x01 \x01(\t\x12>\n\x08metadata\x18\x02 \x03(\x0b\x32,.noteflow.CreateMeetingRequest.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"(\n\x12StopMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"\x85\x01\n\x13ListMeetingsRequest\x12&\n\x06states\x18\x01 \x03(\x0e\x32\x16.noteflow.MeetingState\x12\r\n\x05limit\x18\x02 \x01(\x05\x12\x0e\n\x06offset\x18\x03 \x01(\x05\x12\'\n\nsort_order\x18\x04 \x01(\x0e\x32\x13.noteflow.SortOrder\"P\n\x14ListMeetingsResponse\x12#\n\x08meetings\x18\x01 \x03(\x0b\x32\x11.noteflow.Meeting\x12\x13\n\x0btotal_count\x18\x02 \x01(\x05\"Z\n\x11GetMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10include_segments\x18\x02 \x01(\x08\x12\x17\n\x0finclude_summary\x18\x03 \x01(\x08\"*\n\x14\x44\x65leteMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"(\n\x15\x44\x65leteMeetingResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\xb9\x01\n\x07Summary\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x19\n\x11\x65xecutive_summary\x18\x02 \x01(\t\x12&\n\nkey_points\x18\x03 \x03(\x0b\x32\x12.noteflow.KeyPoint\x12*\n\x0c\x61\x63tion_items\x18\x04 \x03(\x0b\x32\x14.noteflow.ActionItem\x12\x14\n\x0cgenerated_at\x18\x05 \x01(\x01\x12\x15\n\rmodel_version\x18\x06 \x01(\t\"S\n\x08KeyPoint\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x13\n\x0bsegment_ids\x18\x02 \x03(\x05\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\"y\n\nActionItem\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x10\n\x08\x61ssignee\x18\x02 \x01(\t\x12\x10\n\x08\x64ue_date\x18\x03 \x01(\x01\x12$\n\x08priority\x18\x04 \x01(\x0e\x32\x12.noteflow.Priority\x12\x13\n\x0bsegment_ids\x18\x05 \x03(\x05\"F\n\x16GenerateSummaryRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10\x66orce_regenerate\x18\x02 \x01(\x08\"\x13\n\x11ServerInfoRequest\"\xe4\x01\n\nServerInfo\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\x11\n\tasr_model\x18\x02 \x01(\t\x12\x11\n\tasr_ready\x18\x03 \x01(\x08\x12\x1e\n\x16supported_sample_rates\x18\x04 \x03(\x05\x12\x16\n\x0emax_chunk_size\x18\x05 \x01(\x05\x12\x16\n\x0euptime_seconds\x18\x06 \x01(\x01\x12\x17\n\x0f\x61\x63tive_meetings\x18\x07 \x01(\x05\x12\x1b\n\x13\x64iarization_enabled\x18\x08 \x01(\x08\x12\x19\n\x11\x64iarization_ready\x18\t \x01(\x08\"\xbc\x01\n\nAnnotation\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\nmeeting_id\x18\x02 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x03 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x12\n\nstart_time\x18\x05 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x06 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x07 \x03(\x05\x12\x12\n\ncreated_at\x18\x08 \x01(\x01\"\xa6\x01\n\x14\x41\x64\x64\x41nnotationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"-\n\x14GetAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"R\n\x16ListAnnotationsRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\"D\n\x17ListAnnotationsResponse\x12)\n\x0b\x61nnotations\x18\x01 \x03(\x0b\x32\x14.noteflow.Annotation\"\xac\x01\n\x17UpdateAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"0\n\x17\x44\x65leteAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"+\n\x18\x44\x65leteAnnotationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"U\n\x17\x45xportTranscriptRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12&\n\x06\x66ormat\x18\x02 \x01(\x0e\x32\x16.noteflow.ExportFormat\"X\n\x18\x45xportTranscriptResponse\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x13\n\x0b\x66ormat_name\x18\x02 \x01(\t\x12\x16\n\x0e\x66ile_extension\x18\x03 \x01(\t\"K\n\x1fRefineSpeakerDiarizationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x14\n\x0cnum_speakers\x18\x02 \x01(\x05\"\x9d\x01\n RefineSpeakerDiarizationResponse\x12\x18\n\x10segments_updated\x18\x01 \x01(\x05\x12\x13\n\x0bspeaker_ids\x18\x02 \x03(\t\x12\x15\n\rerror_message\x18\x03 \x01(\t\x12\x0e\n\x06job_id\x18\x04 \x01(\t\x12#\n\x06status\x18\x05 \x01(\x0e\x32\x13.noteflow.JobStatus\"\\\n\x14RenameSpeakerRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x16\n\x0eold_speaker_id\x18\x02 \x01(\t\x12\x18\n\x10new_speaker_name\x18\x03 \x01(\t\"B\n\x15RenameSpeakerResponse\x12\x18\n\x10segments_updated\x18\x01 \x01(\x05\x12\x0f\n\x07success\x18\x02 \x01(\x08\"0\n\x1eGetDiarizationJobStatusRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x91\x01\n\x14\x44iarizationJobStatus\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12#\n\x06status\x18\x02 \x01(\x0e\x32\x13.noteflow.JobStatus\x12\x18\n\x10segments_updated\x18\x03 \x01(\x05\x12\x13\n\x0bspeaker_ids\x18\x04 \x03(\t\x12\x15\n\rerror_message\x18\x05 \x01(\t*\x8d\x01\n\nUpdateType\x12\x1b\n\x17UPDATE_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13UPDATE_TYPE_PARTIAL\x10\x01\x12\x15\n\x11UPDATE_TYPE_FINAL\x10\x02\x12\x19\n\x15UPDATE_TYPE_VAD_START\x10\x03\x12\x17\n\x13UPDATE_TYPE_VAD_END\x10\x04*\xb6\x01\n\x0cMeetingState\x12\x1d\n\x19MEETING_STATE_UNSPECIFIED\x10\x00\x12\x19\n\x15MEETING_STATE_CREATED\x10\x01\x12\x1b\n\x17MEETING_STATE_RECORDING\x10\x02\x12\x19\n\x15MEETING_STATE_STOPPED\x10\x03\x12\x1b\n\x17MEETING_STATE_COMPLETED\x10\x04\x12\x17\n\x13MEETING_STATE_ERROR\x10\x05*`\n\tSortOrder\x12\x1a\n\x16SORT_ORDER_UNSPECIFIED\x10\x00\x12\x1b\n\x17SORT_ORDER_CREATED_DESC\x10\x01\x12\x1a\n\x16SORT_ORDER_CREATED_ASC\x10\x02*^\n\x08Priority\x12\x18\n\x14PRIORITY_UNSPECIFIED\x10\x00\x12\x10\n\x0cPRIORITY_LOW\x10\x01\x12\x13\n\x0fPRIORITY_MEDIUM\x10\x02\x12\x11\n\rPRIORITY_HIGH\x10\x03*\xa4\x01\n\x0e\x41nnotationType\x12\x1f\n\x1b\x41NNOTATION_TYPE_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x41NNOTATION_TYPE_ACTION_ITEM\x10\x01\x12\x1c\n\x18\x41NNOTATION_TYPE_DECISION\x10\x02\x12\x18\n\x14\x41NNOTATION_TYPE_NOTE\x10\x03\x12\x18\n\x14\x41NNOTATION_TYPE_RISK\x10\x04*a\n\x0c\x45xportFormat\x12\x1d\n\x19\x45XPORT_FORMAT_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x45XPORT_FORMAT_MARKDOWN\x10\x01\x12\x16\n\x12\x45XPORT_FORMAT_HTML\x10\x02*\x87\x01\n\tJobStatus\x12\x1a\n\x16JOB_STATUS_UNSPECIFIED\x10\x00\x12\x15\n\x11JOB_STATUS_QUEUED\x10\x01\x12\x16\n\x12JOB_STATUS_RUNNING\x10\x02\x12\x18\n\x14JOB_STATUS_COMPLETED\x10\x03\x12\x15\n\x11JOB_STATUS_FAILED\x10\x04\x32\xe0\n\n\x0fNoteFlowService\x12K\n\x13StreamTranscription\x12\x14.noteflow.AudioChunk\x1a\x1a.noteflow.TranscriptUpdate(\x01\x30\x01\x12\x42\n\rCreateMeeting\x12\x1e.noteflow.CreateMeetingRequest\x1a\x11.noteflow.Meeting\x12>\n\x0bStopMeeting\x12\x1c.noteflow.StopMeetingRequest\x1a\x11.noteflow.Meeting\x12M\n\x0cListMeetings\x12\x1d.noteflow.ListMeetingsRequest\x1a\x1e.noteflow.ListMeetingsResponse\x12<\n\nGetMeeting\x12\x1b.noteflow.GetMeetingRequest\x1a\x11.noteflow.Meeting\x12P\n\rDeleteMeeting\x12\x1e.noteflow.DeleteMeetingRequest\x1a\x1f.noteflow.DeleteMeetingResponse\x12\x46\n\x0fGenerateSummary\x12 .noteflow.GenerateSummaryRequest\x1a\x11.noteflow.Summary\x12\x45\n\rAddAnnotation\x12\x1e.noteflow.AddAnnotationRequest\x1a\x14.noteflow.Annotation\x12\x45\n\rGetAnnotation\x12\x1e.noteflow.GetAnnotationRequest\x1a\x14.noteflow.Annotation\x12V\n\x0fListAnnotations\x12 .noteflow.ListAnnotationsRequest\x1a!.noteflow.ListAnnotationsResponse\x12K\n\x10UpdateAnnotation\x12!.noteflow.UpdateAnnotationRequest\x1a\x14.noteflow.Annotation\x12Y\n\x10\x44\x65leteAnnotation\x12!.noteflow.DeleteAnnotationRequest\x1a\".noteflow.DeleteAnnotationResponse\x12Y\n\x10\x45xportTranscript\x12!.noteflow.ExportTranscriptRequest\x1a\".noteflow.ExportTranscriptResponse\x12q\n\x18RefineSpeakerDiarization\x12).noteflow.RefineSpeakerDiarizationRequest\x1a*.noteflow.RefineSpeakerDiarizationResponse\x12P\n\rRenameSpeaker\x12\x1e.noteflow.RenameSpeakerRequest\x1a\x1f.noteflow.RenameSpeakerResponse\x12\x63\n\x17GetDiarizationJobStatus\x12(.noteflow.GetDiarizationJobStatusRequest\x1a\x1e.noteflow.DiarizationJobStatus\x12\x42\n\rGetServerInfo\x12\x1b.noteflow.ServerInfoRequest\x1a\x14.noteflow.ServerInfob\x06proto3') - +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0enoteflow.proto\x12\x08noteflow\"n\n\nAudioChunk\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\naudio_data\x18\x02 \x01(\x0c\x12\x11\n\ttimestamp\x18\x03 \x01(\x01\x12\x13\n\x0bsample_rate\x18\x04 \x01(\x05\x12\x10\n\x08\x63hannels\x18\x05 \x01(\x05\"\xaa\x01\n\x10TranscriptUpdate\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12)\n\x0bupdate_type\x18\x02 \x01(\x0e\x32\x14.noteflow.UpdateType\x12\x14\n\x0cpartial_text\x18\x03 \x01(\t\x12\'\n\x07segment\x18\x04 \x01(\x0b\x32\x16.noteflow.FinalSegment\x12\x18\n\x10server_timestamp\x18\x05 \x01(\x01\"\x87\x02\n\x0c\x46inalSegment\x12\x12\n\nsegment_id\x18\x01 \x01(\x05\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\x12 _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'noteflow_pb2', _globals) @@ -20868,7 +11669,6 @@ if not _descriptor._USE_C_DESCRIPTORS: _globals['_DIARIZATIONJOBSTATUS']._serialized_end=3920 _globals['_NOTEFLOWSERVICE']._serialized_start=4850 _globals['_NOTEFLOWSERVICE']._serialized_end=6226 -# @@protoc_insertion_point(module_scope) ```` ## File: src/noteflow/grpc/proto/noteflow.proto @@ -21416,4476 +12216,61 @@ message DiarizationJobStatus { } ```` -## File: src/noteflow/infrastructure/audio/__init__.py +## File: src/noteflow/client/components/__init__.py ````python -"""Audio infrastructure module. - -Provide audio capture, level metering, buffering, playback, and encrypted storage. -""" - -from noteflow.infrastructure.audio.capture import SoundDeviceCapture -from noteflow.infrastructure.audio.dto import ( - AudioDeviceInfo, - AudioFrameCallback, - TimestampedAudio, -) -from noteflow.infrastructure.audio.levels import RmsLevelProvider, compute_rms -from noteflow.infrastructure.audio.playback import PlaybackState, SoundDevicePlayback -from noteflow.infrastructure.audio.protocols import ( - AudioCapture, - AudioLevelProvider, - AudioPlayback, - RingBuffer, -) -from noteflow.infrastructure.audio.reader import MeetingAudioReader -from noteflow.infrastructure.audio.ring_buffer import TimestampedRingBuffer -from noteflow.infrastructure.audio.writer import MeetingAudioWriter - -__all__ = [ - "AudioCapture", - "AudioDeviceInfo", - "AudioFrameCallback", - "AudioLevelProvider", - "AudioPlayback", - "MeetingAudioReader", - "MeetingAudioWriter", - "PlaybackState", - "RingBuffer", - "RmsLevelProvider", - "SoundDeviceCapture", - "SoundDevicePlayback", - "TimestampedAudio", - "TimestampedRingBuffer", - "compute_rms", -] -```` - -## File: src/noteflow/infrastructure/converters/orm_converters.py -````python -"""Convert between ORM models and domain entities.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from noteflow.domain.entities import ( - ActionItem, - Annotation, - KeyPoint, - Meeting, - Segment, - Summary, -) -from noteflow.domain.entities import ( - WordTiming as DomainWordTiming, -) -from noteflow.domain.value_objects import ( - AnnotationId, - AnnotationType, - MeetingId, - MeetingState, -) - -if TYPE_CHECKING: - from noteflow.infrastructure.persistence.models import ( - ActionItemModel, - AnnotationModel, - KeyPointModel, - MeetingModel, - SegmentModel, - SummaryModel, - WordTimingModel, - ) - - -class OrmConverter: - """Convert between ORM models and domain entities.""" - - # --- WordTiming --- - - @staticmethod - def word_timing_to_domain(model: WordTimingModel) -> DomainWordTiming: - """Convert ORM WordTiming model to domain entity. - - Args: - model: SQLAlchemy WordTimingModel instance. - - Returns: - Domain WordTiming entity. - - Raises: - ValueError: If timing validation fails during entity construction. - """ - return DomainWordTiming( - word=model.word, - start_time=model.start_time, - end_time=model.end_time, - probability=model.probability, - ) - - @staticmethod - def word_timing_to_orm_kwargs(word: DomainWordTiming) -> dict[str, str | float]: - """Convert domain WordTiming to ORM model kwargs. - - Return a dict of kwargs rather than instantiating WordTimingModel directly - to avoid circular imports and allow the repository to handle ORM construction. - - Args: - word: Domain WordTiming entity. - - Returns: - Dict with word, start_time, end_time, probability for ORM construction. - """ - return { - "word": word.word, - "start_time": word.start_time, - "end_time": word.end_time, - "probability": word.probability, - } - - # --- Meeting --- - - @staticmethod - def meeting_to_domain(model: MeetingModel) -> Meeting: - """Convert ORM Meeting model to domain entity. - - Args: - model: SQLAlchemy MeetingModel instance. - - Returns: - Domain Meeting entity. - """ - return Meeting( - id=MeetingId(model.id), - title=model.title, - state=MeetingState(model.state), - created_at=model.created_at, - started_at=model.started_at, - ended_at=model.ended_at, - metadata=model.metadata_, - wrapped_dek=model.wrapped_dek, - ) - - # --- Segment --- - - @staticmethod - def segment_to_domain(model: SegmentModel, include_words: bool = True) -> Segment: - """Convert ORM Segment model to domain entity. - - Args: - model: SQLAlchemy SegmentModel instance. - include_words: Whether to include word-level timing. - - Returns: - Domain Segment entity. - """ - words: list[DomainWordTiming] = [] - if include_words: - words = [OrmConverter.word_timing_to_domain(w) for w in model.words] - - embedding = list(model.embedding) if model.embedding is not None else None - - return Segment( - segment_id=model.segment_id, - text=model.text, - start_time=model.start_time, - end_time=model.end_time, - meeting_id=MeetingId(model.meeting_id), - words=words, - language=model.language, - language_confidence=model.language_confidence, - avg_logprob=model.avg_logprob, - no_speech_prob=model.no_speech_prob, - embedding=embedding, - speaker_id=model.speaker_id, - speaker_confidence=model.speaker_confidence, - db_id=model.id, - ) - - # --- Annotation --- - - @staticmethod - def annotation_to_domain(model: AnnotationModel) -> Annotation: - """Convert ORM Annotation model to domain entity. - - Args: - model: SQLAlchemy AnnotationModel instance. - - Returns: - Domain Annotation entity. - """ - return Annotation( - id=AnnotationId(model.annotation_id), - meeting_id=MeetingId(model.meeting_id), - annotation_type=AnnotationType(model.annotation_type), - text=model.text, - start_time=model.start_time, - end_time=model.end_time, - segment_ids=model.segment_ids, - created_at=model.created_at, - db_id=model.id, - ) - - # --- Summary --- - - @staticmethod - def key_point_to_domain(model: KeyPointModel) -> KeyPoint: - """Convert ORM KeyPoint model to domain entity. - - Args: - model: SQLAlchemy KeyPointModel instance. - - Returns: - Domain KeyPoint entity. - """ - return KeyPoint( - text=model.text, - segment_ids=model.segment_ids, - start_time=model.start_time, - end_time=model.end_time, - db_id=model.id, - ) - - @staticmethod - def action_item_to_domain(model: ActionItemModel) -> ActionItem: - """Convert ORM ActionItem model to domain entity. - - Args: - model: SQLAlchemy ActionItemModel instance. - - Returns: - Domain ActionItem entity. - """ - return ActionItem( - text=model.text, - assignee=model.assignee, - due_date=model.due_date, - priority=model.priority, - segment_ids=model.segment_ids, - db_id=model.id, - ) - - @staticmethod - def summary_to_domain(model: SummaryModel, meeting_id: MeetingId) -> Summary: - """Convert ORM Summary model to domain entity. - - Args: - model: SQLAlchemy SummaryModel instance. - meeting_id: Meeting identifier (passed for type safety). - - Returns: - Domain Summary entity. - """ - return Summary( - meeting_id=meeting_id, - executive_summary=model.executive_summary or "", - key_points=[OrmConverter.key_point_to_domain(kp) for kp in model.key_points], - action_items=[OrmConverter.action_item_to_domain(ai) for ai in model.action_items], - generated_at=model.generated_at, - model_version=model.model_version or "", - db_id=model.id, - ) -```` - -## File: src/noteflow/infrastructure/export/html.py -````python -"""HTML exporter implementation. - -Export meeting transcripts to HTML format. -""" - -from __future__ import annotations - -import html -from datetime import datetime -from typing import TYPE_CHECKING - -from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp - -if TYPE_CHECKING: - from collections.abc import Sequence - - from noteflow.domain.entities.meeting import Meeting - from noteflow.domain.entities.segment import Segment - - -def _escape(text: str) -> str: - """Escape HTML special characters. - - Args: - text: Raw text to escape. - - Returns: - HTML-safe text. - """ - return html.escape(text) - - -# HTML template with embedded CSS for print-friendly output -_HTML_TEMPLATE = """ - - - - - {title} - - - -{content} - -""" - - -class HtmlExporter: - """Export meeting transcripts to HTML format. - - Produces clean, print-friendly HTML with embedded CSS styling, - meeting metadata, transcript with timestamps, and optional summary. - """ - - @property - def format_name(self) -> str: - """Human-readable format name.""" - return "HTML" - - @property - def file_extension(self) -> str: - """File extension for HTML.""" - return ".html" - - def export( - self, - meeting: Meeting, - segments: Sequence[Segment], - ) -> str: - """Export meeting transcript to HTML. - - Args: - meeting: Meeting entity with metadata. - segments: Ordered list of transcript segments. - - Returns: - HTML-formatted transcript string. - """ - content_parts: list[str] = [ - f"

{_escape(meeting.title)}

", - '", - "

Transcript

", - '
', - ) - ) - for segment in segments: - timestamp = format_timestamp(segment.start_time) - content_parts.append('
') - content_parts.append(f'[{timestamp}]') - content_parts.extend((f"{_escape(segment.text)}", "
")) - content_parts.append("
") - - # Summary section (if available) - if meeting.summary: - content_parts.extend(('
', "

Summary

")) - if meeting.summary.executive_summary: - content_parts.append(f"

{_escape(meeting.summary.executive_summary)}

") - - if meeting.summary.key_points: - content_parts.extend(("

Key Points

", '
    ')) - content_parts.extend( - f"
  • {_escape(point.text)}
  • " for point in meeting.summary.key_points - ) - content_parts.append("
") - - if meeting.summary.action_items: - content_parts.extend(("

Action Items

", '
    ')) - for item in meeting.summary.action_items: - assignee = ( - f' @{_escape(item.assignee)}' - if item.assignee - else "" - ) - content_parts.append(f"
  • {_escape(item.text)}{assignee}
  • ") - content_parts.append("
") - - content_parts.append("
") - - # Footer - content_parts.append("
") - content_parts.extend( - ( - f"Exported from NoteFlow on {_escape(format_datetime(datetime.now()))}", - "
", - ) - ) - content = "\n".join(content_parts) - return _HTML_TEMPLATE.format(title=_escape(meeting.title), content=content) -```` - -## File: src/noteflow/infrastructure/persistence/repositories/meeting_repo.py -````python -"""SQLAlchemy implementation of MeetingRepository.""" - -from __future__ import annotations - -from collections.abc import Sequence -from datetime import datetime -from uuid import UUID - -from sqlalchemy import func, select - -from noteflow.domain.entities import Meeting -from noteflow.domain.value_objects import MeetingId, MeetingState -from noteflow.infrastructure.converters import OrmConverter -from noteflow.infrastructure.persistence.models import MeetingModel -from noteflow.infrastructure.persistence.repositories._base import BaseRepository - - -class SqlAlchemyMeetingRepository(BaseRepository): - """SQLAlchemy implementation of MeetingRepository.""" - - async def create(self, meeting: Meeting) -> Meeting: - """Persist a new meeting. - - Args: - meeting: Meeting to create. - - Returns: - Created meeting. - """ - model = MeetingModel( - id=UUID(str(meeting.id)), - title=meeting.title, - state=int(meeting.state), - created_at=meeting.created_at, - started_at=meeting.started_at, - ended_at=meeting.ended_at, - metadata_=meeting.metadata, - wrapped_dek=meeting.wrapped_dek, - ) - self._session.add(model) - await self._session.flush() - return meeting - - async def get(self, meeting_id: MeetingId) -> Meeting | None: - """Retrieve a meeting by ID. - - Args: - meeting_id: Meeting identifier. - - Returns: - Meeting if found, None otherwise. - """ - stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting_id))) - model = await self._execute_scalar(stmt) - - return None if model is None else OrmConverter.meeting_to_domain(model) - - async def update(self, meeting: Meeting) -> Meeting: - """Update an existing meeting. - - Args: - meeting: Meeting with updated fields. - - Returns: - Updated meeting. - - Raises: - ValueError: If meeting does not exist. - """ - stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting.id))) - model = await self._execute_scalar(stmt) - - if model is None: - raise ValueError(f"Meeting {meeting.id} not found") - - model.title = meeting.title - model.state = int(meeting.state) - model.started_at = meeting.started_at - model.ended_at = meeting.ended_at - model.metadata_ = meeting.metadata - model.wrapped_dek = meeting.wrapped_dek - - await self._session.flush() - return meeting - - async def delete(self, meeting_id: MeetingId) -> bool: - """Delete a meeting and all associated data. - - Args: - meeting_id: Meeting identifier. - - Returns: - True if deleted, False if not found. - """ - stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting_id))) - model = await self._execute_scalar(stmt) - - if model is None: - return False - - await self._delete_and_flush(model) - return True - - async def list_all( - self, - states: list[MeetingState] | None = None, - limit: int = 100, - offset: int = 0, - sort_desc: bool = True, - ) -> tuple[Sequence[Meeting], int]: - """List meetings with optional filtering. - - Args: - states: Optional list of states to filter by. - limit: Maximum number of meetings to return. - offset: Number of meetings to skip. - sort_desc: Sort by created_at descending if True. - - Returns: - Tuple of (meetings list, total count matching filter). - """ - # Build base query - stmt = select(MeetingModel) - - # Filter by states - if states: - state_values = [int(s) for s in states] - stmt = stmt.where(MeetingModel.state.in_(state_values)) - - # Count total - count_stmt = select(func.count()).select_from(stmt.subquery()) - total_result = await self._session.execute(count_stmt) - total = total_result.scalar() or 0 - - # Sort and paginate - order_col = MeetingModel.created_at.desc() if sort_desc else MeetingModel.created_at.asc() - stmt = stmt.order_by(order_col).offset(offset).limit(limit) - - result = await self._session.execute(stmt) - models = result.scalars().all() - - meetings = [OrmConverter.meeting_to_domain(m) for m in models] - return meetings, total - - async def count_by_state(self, state: MeetingState) -> int: - """Count meetings in a specific state. - - Args: - state: Meeting state to count. - - Returns: - Number of meetings in the specified state. - """ - stmt = ( - select(func.count()).select_from(MeetingModel).where(MeetingModel.state == int(state)) - ) - result = await self._session.execute(stmt) - return result.scalar() or 0 - - async def find_older_than(self, cutoff: datetime) -> Sequence[Meeting]: - """Find completed meetings older than cutoff date. - - Args: - cutoff: Cutoff datetime; meetings ended before this are returned. - - Returns: - Sequence of meetings with ended_at before cutoff. - """ - # Only consider completed meetings (have ended_at set) - stmt = ( - select(MeetingModel) - .where(MeetingModel.ended_at.isnot(None)) - .where(MeetingModel.ended_at < cutoff) - .order_by(MeetingModel.ended_at.asc()) - ) - result = await self._session.execute(stmt) - models = result.scalars().all() - return [OrmConverter.meeting_to_domain(m) for m in models] -```` - -## File: src/noteflow/infrastructure/persistence/repositories/segment_repo.py -````python -"""SQLAlchemy implementation of SegmentRepository.""" - -from __future__ import annotations - -from collections.abc import Sequence -from uuid import UUID - -from sqlalchemy import func, select - -from noteflow.domain.entities import Segment -from noteflow.domain.value_objects import MeetingId -from noteflow.infrastructure.converters import OrmConverter -from noteflow.infrastructure.persistence.models import SegmentModel, WordTimingModel -from noteflow.infrastructure.persistence.repositories._base import BaseRepository - - -class SqlAlchemySegmentRepository(BaseRepository): - """SQLAlchemy implementation of SegmentRepository.""" - - async def add(self, meeting_id: MeetingId, segment: Segment) -> Segment: - """Add a segment to a meeting. - - Args: - meeting_id: Meeting identifier. - segment: Segment to add. - - Returns: - Added segment with db_id populated. - """ - model = SegmentModel( - meeting_id=UUID(str(meeting_id)), - segment_id=segment.segment_id, - text=segment.text, - start_time=segment.start_time, - end_time=segment.end_time, - language=segment.language, - language_confidence=segment.language_confidence, - avg_logprob=segment.avg_logprob, - no_speech_prob=segment.no_speech_prob, - embedding=segment.embedding, - speaker_id=segment.speaker_id, - speaker_confidence=segment.speaker_confidence, - ) - - # Add word timings - for word in segment.words: - word_kwargs = OrmConverter.word_timing_to_orm_kwargs(word) - word_model = WordTimingModel(**word_kwargs) - model.words.append(word_model) - - self._session.add(model) - await self._session.flush() - - # Update segment with db_id - segment.db_id = model.id - segment.meeting_id = meeting_id - return segment - - async def add_batch( - self, - meeting_id: MeetingId, - segments: Sequence[Segment], - ) -> Sequence[Segment]: - """Add multiple segments to a meeting in batch. - - Args: - meeting_id: Meeting identifier. - segments: Segments to add. - - Returns: - Added segments with db_ids populated. - """ - result_segments: list[Segment] = [] - - for segment in segments: - added = await self.add(meeting_id, segment) - result_segments.append(added) - - return result_segments - - async def get_by_meeting( - self, - meeting_id: MeetingId, - include_words: bool = True, - ) -> Sequence[Segment]: - """Get all segments for a meeting. - - Args: - meeting_id: Meeting identifier. - include_words: Include word-level timing. - - Returns: - List of segments ordered by segment_id. - """ - stmt = ( - select(SegmentModel) - .where(SegmentModel.meeting_id == UUID(str(meeting_id))) - .order_by(SegmentModel.segment_id) - ) - - models = await self._execute_scalars(stmt) - - return [OrmConverter.segment_to_domain(m, include_words) for m in models] - - async def search_semantic( - self, - query_embedding: list[float], - limit: int = 10, - meeting_id: MeetingId | None = None, - ) -> Sequence[tuple[Segment, float]]: - """Search segments by semantic similarity. - - Args: - query_embedding: Query embedding vector. - limit: Maximum number of results. - meeting_id: Optional meeting to restrict search to. - - Returns: - List of (segment, similarity_score) tuples. - """ - # Build query with cosine similarity - similarity = SegmentModel.embedding.cosine_distance(query_embedding) - - stmt = select(SegmentModel, similarity.label("distance")).where( - SegmentModel.embedding.is_not(None) - ) - - if meeting_id: - stmt = stmt.where(SegmentModel.meeting_id == UUID(str(meeting_id))) - - stmt = stmt.order_by(similarity).limit(limit) - - result = await self._session.execute(stmt) - rows = result.all() - - results: list[tuple[Segment, float]] = [] - for row in rows: - model = row[0] - distance = row[1] - # Convert distance to similarity (1 - distance for cosine) - similarity_score = 1.0 - float(distance) - segment = OrmConverter.segment_to_domain(model, include_words=False) - results.append((segment, similarity_score)) - - return results - - async def update_embedding( - self, - segment_db_id: int, - embedding: list[float], - ) -> None: - """Update the embedding for a segment. - - Args: - segment_db_id: Segment database primary key. - embedding: New embedding vector. - """ - stmt = select(SegmentModel).where(SegmentModel.id == segment_db_id) - result = await self._session.execute(stmt) - if model := result.scalar_one_or_none(): - model.embedding = embedding - await self._session.flush() - - async def update_speaker( - self, - segment_db_id: int, - speaker_id: str | None, - speaker_confidence: float, - ) -> None: - """Update speaker diarization fields for a segment. - - Args: - segment_db_id: Segment database primary key. - speaker_id: Speaker identifier from diarization. - speaker_confidence: Confidence of speaker assignment (0.0-1.0). - """ - stmt = select(SegmentModel).where(SegmentModel.id == segment_db_id) - result = await self._session.execute(stmt) - if model := result.scalar_one_or_none(): - model.speaker_id = speaker_id - model.speaker_confidence = speaker_confidence - await self._session.flush() - - async def get_next_segment_id(self, meeting_id: MeetingId) -> int: - """Get the next segment_id for a meeting. - - Args: - meeting_id: Meeting identifier. - - Returns: - Next segment_id (max + 1), or 0 if no segments exist. - """ - stmt = select(func.max(SegmentModel.segment_id)).where( - SegmentModel.meeting_id == UUID(str(meeting_id)) - ) - result = await self._session.execute(stmt) - max_segment_id = result.scalar_one_or_none() - return 0 if max_segment_id is None else int(max_segment_id) + 1 -```` - -## File: src/noteflow/infrastructure/persistence/unit_of_work.py -````python -"""SQLAlchemy implementation of Unit of Work pattern.""" - -from __future__ import annotations - -from collections.abc import Callable -from typing import Self - -from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker - -from noteflow.config.settings import Settings -from noteflow.infrastructure.persistence.database import ( - create_async_engine, - get_async_session_factory, -) - -from .repositories import ( - SqlAlchemyAnnotationRepository, - SqlAlchemyMeetingRepository, - SqlAlchemySegmentRepository, - SqlAlchemySummaryRepository, -) - - -class SqlAlchemyUnitOfWork: - """SQLAlchemy implementation of Unit of Work. - - Provides transactional consistency across repositories. - Use as an async context manager for automatic commit/rollback. - - Example: - async with SqlAlchemyUnitOfWork(session_factory) as uow: - meeting = await uow.meetings.get(meeting_id) - await uow.segments.add(meeting_id, segment) - await uow.commit() - """ - - def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None: - """Initialize unit of work with session factory. - - Args: - session_factory: Factory for creating async sessions. - """ - self._session_factory = session_factory - self._session: AsyncSession | None = None - self._annotations_repo: SqlAlchemyAnnotationRepository | None = None - self._meetings_repo: SqlAlchemyMeetingRepository | None = None - self._segments_repo: SqlAlchemySegmentRepository | None = None - self._summaries_repo: SqlAlchemySummaryRepository | None = None - - # --- Constructors ------------------------------------------------- - - @classmethod - def from_settings(cls, settings: Settings) -> SqlAlchemyUnitOfWork: - """Create a unit of work from application settings. - - Builds an async engine and session factory using configured database - settings (URL, pool size, echo), then returns a new unit of work - instance bound to that factory. - """ - - engine = create_async_engine(settings) - session_factory = get_async_session_factory(engine) - return cls(session_factory) - - @classmethod - def factory_from_settings(cls, settings: Settings) -> Callable[[], SqlAlchemyUnitOfWork]: - """Create a reusable factory that yields fresh UoW instances. - - The factory reuses a shared async session factory (and engine) while - returning a new `SqlAlchemyUnitOfWork` object each time. Useful when - callers need independent UoW instances for sequential operations - (e.g., retention cleanup) to avoid re-entrancy issues. - """ - - engine = create_async_engine(settings) - session_factory = get_async_session_factory(engine) - - def _factory() -> SqlAlchemyUnitOfWork: - return cls(session_factory) - - return _factory - - @property - def annotations(self) -> SqlAlchemyAnnotationRepository: - """Get annotations repository.""" - if self._annotations_repo is None: - raise RuntimeError("UnitOfWork not in context") - return self._annotations_repo - - @property - def meetings(self) -> SqlAlchemyMeetingRepository: - """Get meetings repository.""" - if self._meetings_repo is None: - raise RuntimeError("UnitOfWork not in context") - return self._meetings_repo - - @property - def segments(self) -> SqlAlchemySegmentRepository: - """Get segments repository.""" - if self._segments_repo is None: - raise RuntimeError("UnitOfWork not in context") - return self._segments_repo - - @property - def summaries(self) -> SqlAlchemySummaryRepository: - """Get summaries repository.""" - if self._summaries_repo is None: - raise RuntimeError("UnitOfWork not in context") - return self._summaries_repo - - async def __aenter__(self) -> Self: - """Enter the unit of work context. - - Creates session and caches repository instances. - - Returns: - Self for use in async with statement. - """ - self._session = self._session_factory() - self._annotations_repo = SqlAlchemyAnnotationRepository(self._session) - self._meetings_repo = SqlAlchemyMeetingRepository(self._session) - self._segments_repo = SqlAlchemySegmentRepository(self._session) - self._summaries_repo = SqlAlchemySummaryRepository(self._session) - return self - - async def __aexit__( - self, - exc_type: type[BaseException] | None, - exc_val: BaseException | None, - exc_tb: object, - ) -> None: - """Exit the unit of work context. - - Rolls back on exception, otherwise does nothing (explicit commit required). - - Args: - exc_type: Exception type if raised. - exc_val: Exception value if raised. - exc_tb: Exception traceback if raised. - """ - if self._session is None: - return - - if exc_type is not None: - await self.rollback() - - await self._session.close() - self._session = None - self._annotations_repo = None - self._meetings_repo = None - self._segments_repo = None - self._summaries_repo = None - - async def commit(self) -> None: - """Commit the current transaction.""" - if self._session is None: - raise RuntimeError("UnitOfWork not in context") - await self._session.commit() - - async def rollback(self) -> None: - """Rollback the current transaction.""" - if self._session is None: - raise RuntimeError("UnitOfWork not in context") - await self._session.rollback() -```` - -## File: tests/application/test_meeting_service.py -````python -"""Tests for MeetingService application service.""" - -from __future__ import annotations - -from datetime import UTC, datetime -from pathlib import Path -from typing import TYPE_CHECKING -from unittest.mock import AsyncMock, MagicMock -from uuid import uuid4 - -import pytest - -from noteflow.application.services.meeting_service import MeetingService -from noteflow.domain.entities import Annotation, Meeting, Segment, Summary -from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState - -if TYPE_CHECKING: - from collections.abc import Sequence - - -class TestMeetingServiceCreation: - """Tests for meeting creation operations.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.rollback = AsyncMock() - uow.meetings = MagicMock() - uow.segments = MagicMock() - uow.summaries = MagicMock() - return uow - - async def test_create_meeting_success(self, mock_uow: MagicMock) -> None: - """Test successful meeting creation.""" - created_meeting = Meeting.create(title="Test Meeting") - mock_uow.meetings.create = AsyncMock(return_value=created_meeting) - - service = MeetingService(mock_uow) - result = await service.create_meeting(title="Test Meeting") - - assert result.title == "Test Meeting" - mock_uow.meetings.create.assert_called_once() - mock_uow.commit.assert_called_once() - - async def test_create_meeting_with_metadata(self, mock_uow: MagicMock) -> None: - """Test meeting creation with metadata.""" - metadata = {"project": "NoteFlow"} - created_meeting = Meeting.create(title="Test", metadata=metadata) - mock_uow.meetings.create = AsyncMock(return_value=created_meeting) - - service = MeetingService(mock_uow) - result = await service.create_meeting(title="Test", metadata=metadata) - - assert result.metadata == metadata - - -class TestMeetingServiceRetrieval: - """Tests for meeting retrieval operations.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - uow.segments = MagicMock() - uow.summaries = MagicMock() - return uow - - async def test_get_meeting_found(self, mock_uow: MagicMock) -> None: - """Test retrieving existing meeting.""" - meeting_id = MeetingId(uuid4()) - expected_meeting = Meeting.create(title="Found") - mock_uow.meetings.get = AsyncMock(return_value=expected_meeting) - - service = MeetingService(mock_uow) - result = await service.get_meeting(meeting_id) - - assert result is not None - assert result.title == "Found" - - async def test_get_meeting_not_found(self, mock_uow: MagicMock) -> None: - """Test retrieving non-existent meeting.""" - meeting_id = MeetingId(uuid4()) - mock_uow.meetings.get = AsyncMock(return_value=None) - - service = MeetingService(mock_uow) - result = await service.get_meeting(meeting_id) - - assert result is None - - async def test_list_meetings(self, mock_uow: MagicMock) -> None: - """Test listing meetings with pagination.""" - meetings: Sequence[Meeting] = [ - Meeting.create(title="Meeting 1"), - Meeting.create(title="Meeting 2"), - ] - mock_uow.meetings.list_all = AsyncMock(return_value=(meetings, 10)) - - service = MeetingService(mock_uow) - result, total = await service.list_meetings(limit=2, offset=0) - - assert len(result) == 2 - assert total == 10 - mock_uow.meetings.list_all.assert_called_once_with( - states=None, limit=2, offset=0, sort_desc=True - ) - - -class TestMeetingServiceStateTransitions: - """Tests for meeting state transition operations.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - return uow - - async def test_start_recording_success(self, mock_uow: MagicMock) -> None: - """Test starting recording on existing meeting.""" - meeting = Meeting.create(title="Test") - meeting_id = meeting.id - mock_uow.meetings.get = AsyncMock(return_value=meeting) - mock_uow.meetings.update = AsyncMock(return_value=meeting) - - service = MeetingService(mock_uow) - result = await service.start_recording(meeting_id) - - assert result is not None - assert result.state == MeetingState.RECORDING - mock_uow.commit.assert_called_once() - - async def test_start_recording_invalid_state_raises(self, mock_uow: MagicMock) -> None: - """Test start_recording propagates invalid transition errors.""" - meeting = Meeting.create(title="Test") - meeting.start_recording() - mock_uow.meetings.get = AsyncMock(return_value=meeting) - - service = MeetingService(mock_uow) - - with pytest.raises(ValueError, match="Cannot start recording"): - await service.start_recording(meeting.id) - - mock_uow.commit.assert_not_called() - - async def test_start_recording_not_found(self, mock_uow: MagicMock) -> None: - """Test starting recording on non-existent meeting.""" - meeting_id = MeetingId(uuid4()) - mock_uow.meetings.get = AsyncMock(return_value=None) - - service = MeetingService(mock_uow) - result = await service.start_recording(meeting_id) - - assert result is None - mock_uow.commit.assert_not_called() - - async def test_stop_meeting_success(self, mock_uow: MagicMock) -> None: - """Test stopping recording on meeting.""" - meeting = Meeting.create(title="Test") - meeting.start_recording() # Move to RECORDING state - meeting_id = meeting.id - mock_uow.meetings.get = AsyncMock(return_value=meeting) - mock_uow.meetings.update = AsyncMock(return_value=meeting) - - service = MeetingService(mock_uow) - result = await service.stop_meeting(meeting_id) - - assert result is not None - assert result.state == MeetingState.STOPPED - mock_uow.commit.assert_called_once() - - async def test_stop_meeting_invalid_state_raises(self, mock_uow: MagicMock) -> None: - """Test stop_meeting raises when not in RECORDING state.""" - meeting = Meeting.create(title="Test") - mock_uow.meetings.get = AsyncMock(return_value=meeting) - - service = MeetingService(mock_uow) - - with pytest.raises(ValueError, match="Cannot begin stopping"): - await service.stop_meeting(meeting.id) - - mock_uow.commit.assert_not_called() - - async def test_complete_meeting_success(self, mock_uow: MagicMock) -> None: - """Test completing a stopped meeting.""" - meeting = Meeting.create(title="Test") - meeting.start_recording() - meeting.begin_stopping() - meeting.stop_recording() # Move to STOPPED state (via STOPPING) - meeting_id = meeting.id - mock_uow.meetings.get = AsyncMock(return_value=meeting) - mock_uow.meetings.update = AsyncMock(return_value=meeting) - - service = MeetingService(mock_uow) - result = await service.complete_meeting(meeting_id) - - assert result is not None - assert result.state == MeetingState.COMPLETED - mock_uow.commit.assert_called_once() - - async def test_complete_meeting_invalid_state_raises(self, mock_uow: MagicMock) -> None: - """Test complete_meeting raises from invalid state.""" - meeting = Meeting.create(title="Test") - mock_uow.meetings.get = AsyncMock(return_value=meeting) - - service = MeetingService(mock_uow) - - with pytest.raises(ValueError, match="Cannot complete"): - await service.complete_meeting(meeting.id) - - mock_uow.commit.assert_not_called() - - -class TestMeetingServiceDeletion: - """Tests for meeting deletion operations.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - return uow - - async def test_delete_meeting_success(self, mock_uow: MagicMock) -> None: - """Test successful meeting deletion.""" - meeting_id = MeetingId(uuid4()) - mock_meeting = Meeting.create(title="Test Meeting") - mock_uow.meetings.get = AsyncMock(return_value=mock_meeting) - mock_uow.meetings.delete = AsyncMock(return_value=True) - - service = MeetingService(mock_uow) - result = await service.delete_meeting(meeting_id) - - assert result is True - mock_uow.commit.assert_called_once() - - async def test_delete_meeting_not_found(self, mock_uow: MagicMock) -> None: - """Test deleting non-existent meeting returns False.""" - meeting_id = MeetingId(uuid4()) - mock_uow.meetings.get = AsyncMock(return_value=None) - mock_uow.meetings.delete = AsyncMock(return_value=False) - - service = MeetingService(mock_uow) - result = await service.delete_meeting(meeting_id) - - assert result is False - mock_uow.meetings.delete.assert_not_called() - mock_uow.commit.assert_not_called() - - async def test_delete_meeting_removes_filesystem_assets( - self, mock_uow: MagicMock, tmp_path: Path - ) -> None: - """Test deletion removes filesystem assets when directory provided.""" - meeting_id = MeetingId(uuid4()) - mock_meeting = Meeting.create(title="Test Meeting") - mock_uow.meetings.get = AsyncMock(return_value=mock_meeting) - mock_uow.meetings.delete = AsyncMock(return_value=True) - - # Create meeting directory with test files - meeting_dir = tmp_path / str(meeting_id) - meeting_dir.mkdir() - (meeting_dir / "audio.wav").touch() - (meeting_dir / "manifest.json").touch() - - service = MeetingService(mock_uow) - result = await service.delete_meeting(meeting_id, meetings_dir=tmp_path) - - assert result is True - assert not meeting_dir.exists() - - async def test_delete_meeting_handles_missing_assets( - self, mock_uow: MagicMock, tmp_path: Path - ) -> None: - """Test deletion succeeds even when assets directory doesn't exist.""" - meeting_id = MeetingId(uuid4()) - mock_meeting = Meeting.create(title="Test Meeting") - mock_uow.meetings.get = AsyncMock(return_value=mock_meeting) - mock_uow.meetings.delete = AsyncMock(return_value=True) - - # Don't create the meeting directory - service = MeetingService(mock_uow) - result = await service.delete_meeting(meeting_id, meetings_dir=tmp_path) - - assert result is True - mock_uow.commit.assert_called_once() - - async def test_delete_meeting_without_dir_only_deletes_db(self, mock_uow: MagicMock) -> None: - """Test deletion without meetings_dir only deletes database records.""" - meeting_id = MeetingId(uuid4()) - mock_meeting = Meeting.create(title="Test Meeting") - mock_uow.meetings.get = AsyncMock(return_value=mock_meeting) - mock_uow.meetings.delete = AsyncMock(return_value=True) - - service = MeetingService(mock_uow) - result = await service.delete_meeting(meeting_id) - - assert result is True - mock_uow.meetings.delete.assert_called_once_with(meeting_id) - mock_uow.commit.assert_called_once() - - -class TestMeetingServiceSegments: - """Tests for segment operations.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.segments = MagicMock() - return uow - - async def test_add_segment_success(self, mock_uow: MagicMock) -> None: - """Test adding a segment to meeting.""" - meeting_id = MeetingId(uuid4()) - segment = Segment( - segment_id=0, text="Hello", start_time=0.0, end_time=1.0, meeting_id=meeting_id - ) - mock_uow.segments.add = AsyncMock(return_value=segment) - - service = MeetingService(mock_uow) - result = await service.add_segment( - meeting_id=meeting_id, - segment_id=0, - text="Hello", - start_time=0.0, - end_time=1.0, - ) - - assert result.text == "Hello" - mock_uow.segments.add.assert_called_once() - mock_uow.commit.assert_called_once() - - async def test_get_segments(self, mock_uow: MagicMock) -> None: - """Test retrieving segments for meeting.""" - meeting_id = MeetingId(uuid4()) - segments: Sequence[Segment] = [ - Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0), - Segment(segment_id=1, text="Second", start_time=1.0, end_time=2.0), - ] - mock_uow.segments.get_by_meeting = AsyncMock(return_value=segments) - - service = MeetingService(mock_uow) - result = await service.get_segments(meeting_id) - - assert len(result) == 2 - mock_uow.segments.get_by_meeting.assert_called_once_with(meeting_id, include_words=True) - - async def test_add_segments_batch(self, mock_uow: MagicMock) -> None: - """Test batch adding segments commits once.""" - meeting_id = MeetingId(uuid4()) - segments = [ - Segment(segment_id=0, text="A", start_time=0.0, end_time=1.0), - Segment(segment_id=1, text="B", start_time=1.0, end_time=2.0), - ] - mock_uow.segments.add_batch = AsyncMock(return_value=segments) - - service = MeetingService(mock_uow) - result = await service.add_segments_batch(meeting_id=meeting_id, segments=segments) - - assert len(result) == 2 - mock_uow.segments.add_batch.assert_called_once_with(meeting_id, segments) - mock_uow.commit.assert_called_once() - - -class TestMeetingServiceSummaries: - """Tests for summary operations.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.summaries = MagicMock() - return uow - - async def test_save_summary_success(self, mock_uow: MagicMock) -> None: - """Test saving a meeting summary.""" - meeting_id = MeetingId(uuid4()) - summary = Summary( - meeting_id=meeting_id, - executive_summary="Test summary", - generated_at=datetime.now(UTC), - model_version="test-v1", - ) - mock_uow.summaries.save = AsyncMock(return_value=summary) - - service = MeetingService(mock_uow) - result = await service.save_summary( - meeting_id=meeting_id, - executive_summary="Test summary", - model_version="test-v1", - ) - - assert result.executive_summary == "Test summary" - mock_uow.summaries.save.assert_called_once() - mock_uow.commit.assert_called_once() - - async def test_get_summary_found(self, mock_uow: MagicMock) -> None: - """Test retrieving existing summary.""" - meeting_id = MeetingId(uuid4()) - summary = Summary(meeting_id=meeting_id, executive_summary="Found") - mock_uow.summaries.get_by_meeting = AsyncMock(return_value=summary) - - service = MeetingService(mock_uow) - result = await service.get_summary(meeting_id) - - assert result is not None - assert result.executive_summary == "Found" - - async def test_get_summary_not_found(self, mock_uow: MagicMock) -> None: - """Test retrieving non-existent summary.""" - meeting_id = MeetingId(uuid4()) - mock_uow.summaries.get_by_meeting = AsyncMock(return_value=None) - - service = MeetingService(mock_uow) - result = await service.get_summary(meeting_id) - - assert result is None - - -class TestMeetingServiceSearch: - """Tests for semantic search operations.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.segments = MagicMock() - return uow - - async def test_search_segments_delegates(self, mock_uow: MagicMock) -> None: - """Test search_segments delegates to repository.""" - meeting_id = MeetingId(uuid4()) - segment = Segment(segment_id=0, text="A", start_time=0.0, end_time=1.0) - mock_uow.segments.search_semantic = AsyncMock(return_value=[(segment, 0.9)]) - - service = MeetingService(mock_uow) - result = await service.search_segments(query_embedding=[0.1], meeting_id=meeting_id) - - assert len(result) == 1 - mock_uow.segments.search_semantic.assert_called_once_with( - query_embedding=[0.1], limit=10, meeting_id=meeting_id - ) - - -class TestMeetingServiceAnnotations: - """Tests for annotation operations.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.annotations = MagicMock() - return uow - - async def test_add_annotation_success(self, mock_uow: MagicMock) -> None: - """Test adding an annotation commits and returns saved entity.""" - meeting_id = MeetingId(uuid4()) - mock_uow.annotations.add = AsyncMock() - - service = MeetingService(mock_uow) - await service.add_annotation( - meeting_id=meeting_id, - annotation_type=AnnotationType.NOTE, - text="Note", - start_time=0.0, - end_time=1.0, - ) - - mock_uow.annotations.add.assert_called_once() - mock_uow.commit.assert_called_once() - - async def test_get_annotations_in_range(self, mock_uow: MagicMock) -> None: - """Test get_annotations_in_range delegates to repository.""" - meeting_id = MeetingId(uuid4()) - mock_uow.annotations.get_by_time_range = AsyncMock(return_value=[]) - - service = MeetingService(mock_uow) - await service.get_annotations_in_range(meeting_id, start_time=1.0, end_time=2.0) - - mock_uow.annotations.get_by_time_range.assert_called_once_with(meeting_id, 1.0, 2.0) - - async def test_update_annotation_not_found_raises(self, mock_uow: MagicMock) -> None: - """Test update_annotation propagates repository errors.""" - meeting_id = MeetingId(uuid4()) - annotation = Annotation( - id=AnnotationId(uuid4()), - meeting_id=meeting_id, - annotation_type=AnnotationType.NOTE, - text="Note", - start_time=0.0, - end_time=1.0, - ) - mock_uow.annotations.update = AsyncMock(side_effect=ValueError("Annotation not found")) - - service = MeetingService(mock_uow) - with pytest.raises(ValueError, match="Annotation not found"): - await service.update_annotation(annotation) - - mock_uow.commit.assert_not_called() - - async def test_delete_annotation_not_found(self, mock_uow: MagicMock) -> None: - """Test delete_annotation returns False when missing.""" - annotation_id = AnnotationId(uuid4()) - mock_uow.annotations.delete = AsyncMock(return_value=False) - - service = MeetingService(mock_uow) - result = await service.delete_annotation(annotation_id) - - assert result is False - mock_uow.commit.assert_not_called() - - -class TestMeetingServiceAdditionalBranches: - """Additional branch coverage for MeetingService.""" - - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork with all repos.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - uow.segments = MagicMock() - uow.summaries = MagicMock() - uow.annotations = MagicMock() - return uow - - async def test_stop_meeting_not_found(self, mock_uow: MagicMock) -> None: - """stop_meeting should return None when meeting is missing.""" - mock_uow.meetings.get = AsyncMock(return_value=None) - service = MeetingService(mock_uow) - - result = await service.stop_meeting(MeetingId(uuid4())) - - assert result is None - mock_uow.commit.assert_not_called() - - async def test_complete_meeting_not_found(self, mock_uow: MagicMock) -> None: - """complete_meeting should return None when meeting is missing.""" - mock_uow.meetings.get = AsyncMock(return_value=None) - service = MeetingService(mock_uow) - - result = await service.complete_meeting(MeetingId(uuid4())) - - assert result is None - mock_uow.commit.assert_not_called() - - async def test_get_annotation_delegates_repository(self, mock_uow: MagicMock) -> None: - """get_annotation should delegate to repository.""" - annotation = Annotation( - id=AnnotationId(uuid4()), - meeting_id=MeetingId(uuid4()), - annotation_type=AnnotationType.NOTE, - text="note", - start_time=0.0, - end_time=1.0, - ) - mock_uow.annotations.get = AsyncMock(return_value=annotation) - service = MeetingService(mock_uow) - - result = await service.get_annotation(annotation.id) - - assert result is annotation - mock_uow.annotations.get.assert_called_once_with(annotation.id) - - async def test_get_annotations_delegates_repository(self, mock_uow: MagicMock) -> None: - """get_annotations should delegate to repository.""" - meeting_id = MeetingId(uuid4()) - mock_uow.annotations.get_by_meeting = AsyncMock(return_value=[]) - service = MeetingService(mock_uow) - - await service.get_annotations(meeting_id) - - mock_uow.annotations.get_by_meeting.assert_called_once_with(meeting_id) - - async def test_delete_annotation_success_commits(self, mock_uow: MagicMock) -> None: - """delete_annotation should commit on success.""" - annotation_id = AnnotationId(uuid4()) - mock_uow.annotations.delete = AsyncMock(return_value=True) - service = MeetingService(mock_uow) - - result = await service.delete_annotation(annotation_id) - - assert result is True - mock_uow.commit.assert_called_once() -```` - -## File: tests/application/test_summarization_service.py -````python -"""Tests for summarization service.""" - -from __future__ import annotations - -from datetime import UTC, datetime -from uuid import uuid4 - -import pytest - -from noteflow.application.services import ( - SummarizationMode, - SummarizationService, - SummarizationServiceSettings, -) -from noteflow.domain.entities import KeyPoint, Segment, Summary -from noteflow.domain.summarization import ( - CitationVerificationResult, - ProviderUnavailableError, - SummarizationRequest, - SummarizationResult, -) -from noteflow.domain.value_objects import MeetingId - - -def _segment(segment_id: int, text: str = "Test") -> Segment: - """Create a test segment.""" - return Segment( - segment_id=segment_id, - text=text, - start_time=segment_id * 5.0, - end_time=(segment_id + 1) * 5.0, - ) - - -class MockProvider: - """Mock summarizer provider for testing.""" - - def __init__( - self, - name: str = "mock", - available: bool = True, - requires_consent: bool = False, - ) -> None: - self._name = name - self._available = available - self._requires_consent = requires_consent - self.call_count = 0 - - @property - def provider_name(self) -> str: - return self._name - - @property - def is_available(self) -> bool: - return self._available - - @property - def requires_cloud_consent(self) -> bool: - return self._requires_consent - - async def summarize(self, request: SummarizationRequest) -> SummarizationResult: - self.call_count += 1 - summary = Summary( - meeting_id=request.meeting_id, - executive_summary=f"Summary from {self._name}", - key_points=[KeyPoint(text=f"Point from {self._name}", segment_ids=[0])], - generated_at=datetime.now(UTC), - ) - return SummarizationResult( - summary=summary, - model_name=f"{self._name}-model", - provider_name=self._name, - ) - - -class MockVerifier: - """Mock citation verifier for testing.""" - - def __init__(self, is_valid: bool = True) -> None: - self._is_valid = is_valid - self.verify_call_count = 0 - self.filter_call_count = 0 - - def verify_citations( - self, summary: Summary, segments: list[Segment] - ) -> CitationVerificationResult: - self.verify_call_count += 1 - if self._is_valid: - return CitationVerificationResult(is_valid=True) - return CitationVerificationResult( - is_valid=False, - invalid_key_point_indices=(0,), - missing_segment_ids=(99,), - ) - - def filter_invalid_citations(self, summary: Summary, segments: list[Segment]) -> Summary: - self.filter_call_count += 1 - # Return summary with empty segment_ids for key points - return Summary( - meeting_id=summary.meeting_id, - executive_summary=summary.executive_summary, - key_points=[KeyPoint(text=kp.text, segment_ids=[]) for kp in summary.key_points], - action_items=[], - generated_at=summary.generated_at, - ) - - -class TestSummarizationServiceConfiguration: - """Tests for SummarizationService configuration.""" - - def test_register_provider(self) -> None: - """Provider should be registered for mode.""" - service = SummarizationService() - provider = MockProvider() - - service.register_provider(SummarizationMode.LOCAL, provider) - - assert SummarizationMode.LOCAL in service.providers - - def test_set_verifier(self) -> None: - """Verifier should be set.""" - service = SummarizationService() - verifier = MockVerifier() - - service.set_verifier(verifier) - - assert service.verifier is verifier - - def test_get_available_modes_with_local(self) -> None: - """Available modes should include local when provider is available.""" - service = SummarizationService() - service.register_provider(SummarizationMode.LOCAL, MockProvider()) - - available = service.get_available_modes() - - assert SummarizationMode.LOCAL in available - - def test_get_available_modes_excludes_unavailable(self) -> None: - """Unavailable providers should not be in available modes.""" - service = SummarizationService() - service.register_provider(SummarizationMode.LOCAL, MockProvider(available=False)) - - available = service.get_available_modes() - - assert SummarizationMode.LOCAL not in available - - def test_cloud_requires_consent(self) -> None: - """Cloud mode should require consent to be available.""" - service = SummarizationService() - service.register_provider( - SummarizationMode.CLOUD, - MockProvider(name="cloud", requires_consent=True), - ) - - available_without_consent = service.get_available_modes() - service.grant_cloud_consent() - available_with_consent = service.get_available_modes() - - assert SummarizationMode.CLOUD not in available_without_consent - assert SummarizationMode.CLOUD in available_with_consent - - def test_revoke_cloud_consent(self) -> None: - """Revoking consent should remove cloud from available modes.""" - service = SummarizationService() - service.register_provider( - SummarizationMode.CLOUD, - MockProvider(name="cloud", requires_consent=True), - ) - service.grant_cloud_consent() - - service.revoke_cloud_consent() - available = service.get_available_modes() - - assert SummarizationMode.CLOUD not in available - - -class TestSummarizationServiceSummarize: - """Tests for SummarizationService.summarize method.""" - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Create test meeting ID.""" - return MeetingId(uuid4()) - - @pytest.mark.asyncio - async def test_summarize_uses_default_mode(self, meeting_id: MeetingId) -> None: - """Summarize should use default mode when not specified.""" - provider = MockProvider() - service = SummarizationService( - settings=SummarizationServiceSettings(default_mode=SummarizationMode.LOCAL) - ) - service.register_provider(SummarizationMode.LOCAL, provider) - - segments = [_segment(0)] - result = await service.summarize(meeting_id, segments) - - assert result.provider_used == "mock" - assert provider.call_count == 1 - - @pytest.mark.asyncio - async def test_summarize_uses_specified_mode(self, meeting_id: MeetingId) -> None: - """Summarize should use specified mode.""" - local_provider = MockProvider(name="local") - mock_provider = MockProvider(name="mock") - service = SummarizationService() - service.register_provider(SummarizationMode.LOCAL, local_provider) - service.register_provider(SummarizationMode.MOCK, mock_provider) - - segments = [_segment(0)] - result = await service.summarize(meeting_id, segments, mode=SummarizationMode.MOCK) - - assert result.provider_used == "mock" - assert mock_provider.call_count == 1 - assert local_provider.call_count == 0 - - @pytest.mark.asyncio - async def test_summarize_falls_back_on_unavailable(self, meeting_id: MeetingId) -> None: - """Should fall back to available provider when primary unavailable.""" - unavailable = MockProvider(name="cloud", available=False) - fallback = MockProvider(name="local") - service = SummarizationService( - settings=SummarizationServiceSettings( - fallback_to_local=True, - cloud_consent_granted=True, - ) - ) - service.register_provider(SummarizationMode.CLOUD, unavailable) - service.register_provider(SummarizationMode.LOCAL, fallback) - - segments = [_segment(0)] - result = await service.summarize(meeting_id, segments, mode=SummarizationMode.CLOUD) - - assert result.provider_used == "local" - assert result.fallback_used is True - - @pytest.mark.asyncio - async def test_summarize_raises_when_no_fallback(self, meeting_id: MeetingId) -> None: - """Should raise error when no fallback available.""" - unavailable = MockProvider(name="local", available=False) - service = SummarizationService( - settings=SummarizationServiceSettings(fallback_to_local=False) - ) - service.register_provider(SummarizationMode.LOCAL, unavailable) - - segments = [_segment(0)] - with pytest.raises(ProviderUnavailableError): - await service.summarize(meeting_id, segments, mode=SummarizationMode.LOCAL) - - @pytest.mark.asyncio - async def test_summarize_verifies_citations(self, meeting_id: MeetingId) -> None: - """Citations should be verified when enabled.""" - provider = MockProvider() - verifier = MockVerifier(is_valid=True) - service = SummarizationService(settings=SummarizationServiceSettings(verify_citations=True)) - service.register_provider(SummarizationMode.LOCAL, provider) - service.set_verifier(verifier) - - segments = [_segment(0)] - result = await service.summarize(meeting_id, segments) - - assert verifier.verify_call_count == 1 - assert result.verification is not None - assert result.verification.is_valid is True - - @pytest.mark.asyncio - async def test_summarize_filters_invalid_citations(self, meeting_id: MeetingId) -> None: - """Invalid citations should be filtered when enabled.""" - provider = MockProvider() - verifier = MockVerifier(is_valid=False) - service = SummarizationService( - settings=SummarizationServiceSettings( - verify_citations=True, - filter_invalid_citations=True, - ) - ) - service.register_provider(SummarizationMode.LOCAL, provider) - service.set_verifier(verifier) - - segments = [_segment(0)] - result = await service.summarize(meeting_id, segments) - - assert verifier.filter_call_count == 1 - assert result.filtered_summary is not None - assert result.has_invalid_citations is True - - @pytest.mark.asyncio - async def test_summarize_passes_max_limits(self, meeting_id: MeetingId) -> None: - """Max limits should be passed to provider.""" - captured_request: SummarizationRequest | None = None - - class CapturingProvider(MockProvider): - async def summarize(self, request: SummarizationRequest) -> SummarizationResult: - nonlocal captured_request - captured_request = request - return await super().summarize(request) - - provider = CapturingProvider() - service = SummarizationService() - service.register_provider(SummarizationMode.LOCAL, provider) - - segments = [_segment(0)] - await service.summarize(meeting_id, segments, max_key_points=3, max_action_items=5) - - assert captured_request is not None - assert captured_request.max_key_points == 3 - assert captured_request.max_action_items == 5 - - @pytest.mark.asyncio - async def test_summarize_requires_cloud_consent(self, meeting_id: MeetingId) -> None: - """Cloud mode should require consent.""" - cloud = MockProvider(name="cloud", requires_consent=True) - fallback = MockProvider(name="local") - service = SummarizationService( - settings=SummarizationServiceSettings( - cloud_consent_granted=False, fallback_to_local=True - ) - ) - service.register_provider(SummarizationMode.CLOUD, cloud) - service.register_provider(SummarizationMode.LOCAL, fallback) - - segments = [_segment(0)] - result = await service.summarize(meeting_id, segments, mode=SummarizationMode.CLOUD) - - assert result.provider_used == "local" - assert result.fallback_used is True - assert cloud.call_count == 0 - - @pytest.mark.asyncio - async def test_summarize_calls_persist_callback(self, meeting_id: MeetingId) -> None: - """Persist callback should be called with final summary.""" - persisted: list[Summary] = [] - - async def mock_persist(summary: Summary) -> None: - persisted.append(summary) - - provider = MockProvider() - service = SummarizationService(on_persist=mock_persist) - service.register_provider(SummarizationMode.LOCAL, provider) - - segments = [_segment(0)] - await service.summarize(meeting_id, segments) - - assert len(persisted) == 1 - assert persisted[0].meeting_id == meeting_id - - @pytest.mark.asyncio - async def test_summarize_persist_callback_receives_filtered_summary( - self, meeting_id: MeetingId - ) -> None: - """Persist callback should receive filtered summary when available.""" - persisted: list[Summary] = [] - - async def mock_persist(summary: Summary) -> None: - persisted.append(summary) - - provider = MockProvider() - verifier = MockVerifier(is_valid=False) - service = SummarizationService( - settings=SummarizationServiceSettings( - verify_citations=True, - filter_invalid_citations=True, - ), - on_persist=mock_persist, - ) - service.register_provider(SummarizationMode.LOCAL, provider) - service.set_verifier(verifier) - - segments = [_segment(0)] - result = await service.summarize(meeting_id, segments) - - assert len(persisted) == 1 - # Should persist the filtered summary, not original - assert persisted[0] is result.filtered_summary - - -class TestSummarizationServiceResult: - """Tests for SummarizationServiceResult.""" - - def test_summary_returns_filtered_when_available(self) -> None: - """summary property should return filtered_summary if available.""" - from noteflow.application.services import SummarizationServiceResult - - original = Summary( - meeting_id=MeetingId(uuid4()), - executive_summary="Original", - key_points=[KeyPoint(text="Point", segment_ids=[99])], - ) - filtered = Summary( - meeting_id=original.meeting_id, - executive_summary="Original", - key_points=[KeyPoint(text="Point", segment_ids=[])], - ) - result = SummarizationServiceResult( - result=SummarizationResult( - summary=original, - model_name="test", - provider_name="test", - ), - filtered_summary=filtered, - ) - - assert result.summary is filtered - - def test_summary_returns_original_when_no_filter(self) -> None: - """summary property should return original when no filter applied.""" - from noteflow.application.services import SummarizationServiceResult - - original = Summary( - meeting_id=MeetingId(uuid4()), - executive_summary="Original", - key_points=[], - ) - result = SummarizationServiceResult( - result=SummarizationResult( - summary=original, - model_name="test", - provider_name="test", - ), - ) - - assert result.summary is original - - def test_has_invalid_citations_true(self) -> None: - """has_invalid_citations should be True when verification fails.""" - from noteflow.application.services import SummarizationServiceResult - - result = SummarizationServiceResult( - result=SummarizationResult( - summary=Summary( - meeting_id=MeetingId(uuid4()), - executive_summary="Test", - key_points=[], - ), - model_name="test", - provider_name="test", - ), - verification=CitationVerificationResult(is_valid=False, invalid_key_point_indices=(0,)), - ) - - assert result.has_invalid_citations is True - - def test_has_invalid_citations_false_when_valid(self) -> None: - """has_invalid_citations should be False when verification passes.""" - from noteflow.application.services import SummarizationServiceResult - - result = SummarizationServiceResult( - result=SummarizationResult( - summary=Summary( - meeting_id=MeetingId(uuid4()), - executive_summary="Test", - key_points=[], - ), - model_name="test", - provider_name="test", - ), - verification=CitationVerificationResult(is_valid=True), - ) - - assert result.has_invalid_citations is False - - -class TestSummarizationServiceAdditionalBranches: - """Additional branch and utility coverage.""" - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Create test meeting ID.""" - return MeetingId(uuid4()) - - def test_is_mode_available_false_when_not_registered(self) -> None: - """is_mode_available should respect registered providers.""" - service = SummarizationService() - assert service.is_mode_available(SummarizationMode.LOCAL) is False - - @pytest.mark.asyncio - async def test_cloud_without_consent_and_no_fallback_raises( - self, meeting_id: MeetingId - ) -> None: - """Requesting cloud without consent should raise when fallback disabled.""" - provider = MockProvider(name="cloud", available=True) - service = SummarizationService( - providers={SummarizationMode.CLOUD: provider}, - settings=SummarizationServiceSettings( - default_mode=SummarizationMode.CLOUD, - cloud_consent_granted=False, - fallback_to_local=False, - ), - ) - - with pytest.raises(ProviderUnavailableError): - await service.summarize(meeting_id, [_segment(0)], mode=SummarizationMode.CLOUD) - - @pytest.mark.asyncio - async def test_no_fallback_provider_available_raises(self, meeting_id: MeetingId) -> None: - """When no fallback provider exists, provider selection should fail.""" - unavailable = MockProvider(name="cloud", available=False) - service = SummarizationService( - providers={SummarizationMode.CLOUD: unavailable}, - settings=SummarizationServiceSettings(fallback_to_local=True), - ) - - with pytest.raises(ProviderUnavailableError): - await service.summarize(meeting_id, [_segment(0)], mode=SummarizationMode.CLOUD) - - def test_filter_citations_returns_summary_when_no_verifier(self) -> None: - """_filter_citations should return original summary when verifier is absent.""" - summary = Summary( - meeting_id=MeetingId(uuid4()), - executive_summary="Exec", - generated_at=datetime.now(UTC), - ) - service = SummarizationService() - - result = service._filter_citations(summary, []) - - assert result is summary - - def test_set_default_mode_updates_settings(self) -> None: - """set_default_mode should update default mode.""" - service = SummarizationService() - service.set_default_mode(SummarizationMode.MOCK) - assert service.settings.default_mode == SummarizationMode.MOCK - - def test_set_persist_callback_updates_callback(self) -> None: - """set_persist_callback should update on_persist field.""" - - async def callback(summary: Summary) -> None: - pass - - service = SummarizationService() - assert service.on_persist is None - - service.set_persist_callback(callback) - assert service.on_persist is callback - - service.set_persist_callback(None) - assert service.on_persist is None -```` - -## File: tests/application/test_trigger_service.py -````python -"""Tests for TriggerService application logic.""" - -from __future__ import annotations - -import time -from dataclasses import dataclass - -import pytest - -from noteflow.application.services.trigger_service import ( - TriggerService, - TriggerServiceSettings, -) -from noteflow.domain.triggers import TriggerAction, TriggerSignal, TriggerSource - - -@dataclass -class FakeProvider: - """Simple signal provider for testing.""" - - signal: TriggerSignal | None - enabled: bool = True - calls: int = 0 - - @property - def source(self) -> TriggerSource: - return TriggerSource.AUDIO_ACTIVITY - - @property - def max_weight(self) -> float: - return 1.0 - - def is_enabled(self) -> bool: - return self.enabled - - def get_signal(self) -> TriggerSignal | None: - self.calls += 1 - return self.signal - - -def _settings( - *, - enabled: bool = True, - auto_start: bool = False, - rate_limit_seconds: int = 60, - snooze_seconds: int = 30, - threshold_ignore: float = 0.2, - threshold_auto: float = 0.8, -) -> TriggerServiceSettings: - return TriggerServiceSettings( - enabled=enabled, - auto_start_enabled=auto_start, - rate_limit_seconds=rate_limit_seconds, - snooze_seconds=snooze_seconds, - threshold_ignore=threshold_ignore, - threshold_auto_start=threshold_auto, - ) - - -def test_trigger_service_disabled_skips_providers() -> None: - """Disabled trigger service should ignore without evaluating providers.""" - provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5)) - service = TriggerService([provider], settings=_settings(enabled=False)) - - decision = service.evaluate() - - assert decision.action == TriggerAction.IGNORE - assert decision.confidence == 0.0 - assert decision.signals == () - assert provider.calls == 0 - - -def test_trigger_service_snooze_ignores_signals(monkeypatch: pytest.MonkeyPatch) -> None: - """Snoozed trigger service ignores signals until snooze expires.""" - provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5)) - service = TriggerService([provider], settings=_settings()) - - monkeypatch.setattr(time, "monotonic", lambda: 100.0) - service.snooze(seconds=20) - - monkeypatch.setattr(time, "monotonic", lambda: 110.0) - decision = service.evaluate() - assert decision.action == TriggerAction.IGNORE - - monkeypatch.setattr(time, "monotonic", lambda: 130.0) - decision = service.evaluate() - assert decision.action == TriggerAction.NOTIFY - - -def test_trigger_service_rate_limit(monkeypatch: pytest.MonkeyPatch) -> None: - """TriggerService enforces rate limit between prompts.""" - provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5)) - service = TriggerService([provider], settings=_settings(rate_limit_seconds=60)) - - monkeypatch.setattr(time, "monotonic", lambda: 100.0) - first = service.evaluate() - assert first.action == TriggerAction.NOTIFY - - monkeypatch.setattr(time, "monotonic", lambda: 120.0) - second = service.evaluate() - assert second.action == TriggerAction.IGNORE - - monkeypatch.setattr(time, "monotonic", lambda: 200.0) - third = service.evaluate() - assert third.action == TriggerAction.NOTIFY - - -def test_trigger_service_auto_start(monkeypatch: pytest.MonkeyPatch) -> None: - """Auto-start fires when confidence passes threshold and auto-start is enabled.""" - provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9)) - service = TriggerService([provider], settings=_settings(auto_start=True, threshold_auto=0.8)) - - monkeypatch.setattr(time, "monotonic", lambda: 100.0) - decision = service.evaluate() - - assert decision.action == TriggerAction.AUTO_START - - -def test_trigger_service_auto_start_disabled_notifies(monkeypatch: pytest.MonkeyPatch) -> None: - """High confidence should still notify when auto-start is disabled.""" - provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9)) - service = TriggerService([provider], settings=_settings(auto_start=False, threshold_auto=0.8)) - - monkeypatch.setattr(time, "monotonic", lambda: 100.0) - decision = service.evaluate() - - assert decision.action == TriggerAction.NOTIFY - - -def test_trigger_service_below_ignore_threshold(monkeypatch: pytest.MonkeyPatch) -> None: - """Signals below ignore threshold should be ignored.""" - provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.1)) - service = TriggerService([provider], settings=_settings(threshold_ignore=0.2)) - - monkeypatch.setattr(time, "monotonic", lambda: 100.0) - decision = service.evaluate() - - assert decision.action == TriggerAction.IGNORE - - -def test_trigger_service_threshold_validation() -> None: - """Invalid threshold ordering should raise.""" - with pytest.raises(ValueError, match="threshold_auto_start"): - TriggerServiceSettings( - enabled=True, - auto_start_enabled=False, - rate_limit_seconds=10, - snooze_seconds=5, - threshold_ignore=0.9, - threshold_auto_start=0.2, - ) - - -def test_trigger_service_skips_disabled_providers() -> None: - """Disabled providers should be skipped when evaluating.""" - enabled_signal = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.3)) - disabled_signal = FakeProvider( - signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.7), enabled=False - ) - service = TriggerService([enabled_signal, disabled_signal], settings=_settings()) - - decision = service.evaluate() - - assert decision.confidence == pytest.approx(0.3) - assert enabled_signal.calls == 1 - assert disabled_signal.calls == 0 - - -def test_trigger_service_snooze_state_properties(monkeypatch: pytest.MonkeyPatch) -> None: - """is_snoozed and remaining seconds should reflect snooze window.""" - service = TriggerService([], settings=_settings()) - monkeypatch.setattr(time, "monotonic", lambda: 50.0) - service.snooze(seconds=10) - - monkeypatch.setattr(time, "monotonic", lambda: 55.0) - assert service.is_snoozed is True - assert service.snooze_remaining_seconds == pytest.approx(5.0) - - service.clear_snooze() - assert service.is_snoozed is False - assert service.snooze_remaining_seconds == 0.0 - - -def test_trigger_service_rate_limit_with_existing_prompt(monkeypatch: pytest.MonkeyPatch) -> None: - """Existing prompt time inside rate limit should short-circuit to IGNORE.""" - provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9)) - service = TriggerService([provider], settings=_settings(rate_limit_seconds=30)) - - monkeypatch.setattr(time, "monotonic", lambda: 100.0) - service._last_prompt = 90.0 # Pretend we prompted 10s ago - decision = service.evaluate() - - assert decision.action == TriggerAction.IGNORE - assert service.is_enabled is True - - -def test_trigger_service_enable_toggles() -> None: - """set_enabled and set_auto_start should update settings.""" - service = TriggerService([], settings=_settings(enabled=True, auto_start=False)) - - service.set_enabled(False) - assert service.is_enabled is False - - service.set_auto_start(True) - assert service._settings.auto_start_enabled is True -```` - -## File: tests/client/test_async_mixin.py -````python -"""Tests for AsyncOperationMixin.""" - -from __future__ import annotations - -import asyncio -from unittest.mock import AsyncMock, MagicMock - -import pytest - from noteflow.client.components._async_mixin import AsyncOperationMixin - - -class ConcreteComponent(AsyncOperationMixin[str]): - """Concrete implementation for testing.""" - - def __init__(self, page: MagicMock | None = None) -> None: - self._page = page - - -class TestAsyncOperationMixin: - """Tests for AsyncOperationMixin.""" - - @pytest.fixture - def mock_page(self) -> MagicMock: - """Create mock Flet page.""" - page = MagicMock() - - def _run_task(fn): - try: - loop = asyncio.get_running_loop() - return loop.create_task(fn()) - except RuntimeError: - # No running loop (sync tests); run immediately - return asyncio.run(fn()) - - page.run_task = MagicMock(side_effect=_run_task) - return page - - @pytest.fixture - def component(self, mock_page: MagicMock) -> ConcreteComponent: - """Create component with mock page.""" - return ConcreteComponent(page=mock_page) - - @pytest.mark.asyncio - async def test_run_async_operation_success_calls_callbacks( - self, component: ConcreteComponent - ) -> None: - """Successful operation calls on_success and set_loading.""" - operation = AsyncMock(return_value="result") - on_success = MagicMock() - on_error = MagicMock() - set_loading = MagicMock() - - result = await component.run_async_operation( - operation=operation, - on_success=on_success, - on_error=on_error, - set_loading=set_loading, - ) - - await asyncio.sleep(0) - - assert result == "result" - operation.assert_awaited_once() - on_success.assert_called_once_with("result") - on_error.assert_not_called() - # Loading: True then False - assert set_loading.call_count == 2 - set_loading.assert_any_call(True) - set_loading.assert_any_call(False) - - @pytest.mark.asyncio - async def test_run_async_operation_error_calls_on_error( - self, component: ConcreteComponent - ) -> None: - """Failed operation calls on_error and returns None.""" - operation = AsyncMock(side_effect=ValueError("test error")) - on_success = MagicMock() - on_error = MagicMock() - set_loading = MagicMock() - - result = await component.run_async_operation( - operation=operation, - on_success=on_success, - on_error=on_error, - set_loading=set_loading, - ) - - await asyncio.sleep(0) - - assert result is None - on_success.assert_not_called() - on_error.assert_called_once_with("test error") - # Loading: True then False (finally block) - assert set_loading.call_count == 2 - - @pytest.mark.asyncio - async def test_run_async_operation_always_clears_loading( - self, component: ConcreteComponent - ) -> None: - """Loading state always cleared in finally block.""" - operation = AsyncMock(side_effect=RuntimeError("boom")) - set_loading = MagicMock() - - await component.run_async_operation( - operation=operation, - on_success=MagicMock(), - on_error=MagicMock(), - set_loading=set_loading, - ) - - await asyncio.sleep(0) - - # Final call should be set_loading(False) - assert set_loading.call_args_list[-1][0][0] is False - - def test_dispatch_ui_no_page_is_noop(self) -> None: - """Dispatch with no page does nothing.""" - component = ConcreteComponent(page=None) - callback = MagicMock() - - # Should not raise - component._dispatch_ui(callback) - - callback.assert_not_called() - - def test_dispatch_ui_with_page_calls_run_task( - self, component: ConcreteComponent, mock_page: MagicMock - ) -> None: - """Dispatch with page calls page.run_task.""" - callback = MagicMock() - - component._dispatch_ui(callback) - - mock_page.run_task.assert_called_once() - callback.assert_called_once() -```` - -## File: tests/client/test_summary_panel.py -````python -"""Tests for SummaryPanelComponent.""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from unittest.mock import Mock -from uuid import uuid4 - -import flet as ft -import pytest - -from noteflow.client.components.summary_panel import ( - PRIORITY_COLORS, - PRIORITY_LABELS, - SummaryPanelComponent, -) -from noteflow.domain.entities import ActionItem, KeyPoint, Summary -from noteflow.domain.value_objects import MeetingId - - -@dataclass -class MockAppState: - """Minimal mock AppState for testing.""" - - transcript_segments: list = field(default_factory=list) - current_meeting: Mock | None = None - current_summary: Summary | None = None - summary_loading: bool = False - summary_error: str | None = None - _page: Mock | None = None - - def request_update(self) -> None: - """No-op for tests.""" - - def run_on_ui_thread(self, callback) -> None: - """Execute callback immediately for tests.""" - callback() if callable(callback) else None - - -def _create_mock_state() -> MockAppState: - """Create mock AppState with meeting.""" - state = MockAppState() - state.current_meeting = Mock() - state.current_meeting.id = str(uuid4()) - return state - - -def _create_summary( - key_points: list[KeyPoint] | None = None, - action_items: list[ActionItem] | None = None, -) -> Summary: - """Create test Summary.""" - return Summary( - meeting_id=MeetingId(uuid4()), - executive_summary="Test executive summary.", - key_points=key_points or [], - action_items=action_items or [], - ) - - -class TestSummaryPanelBuild: - """Tests for SummaryPanelComponent.build().""" - - def test_build_returns_container(self) -> None: - """build() should return ft.Container.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - result = panel.build() - - assert isinstance(result, ft.Container) - - def test_build_initially_hidden(self) -> None: - """Panel should be hidden by default.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - container = panel.build() - - assert container.visible is False - - def test_build_creates_ui_elements(self) -> None: - """build() should create all UI elements.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - panel.build() - - assert panel._summary_text is not None - assert panel._key_points_list is not None - assert panel._action_items_list is not None - assert panel._generate_btn is not None - assert panel._loading_indicator is not None - assert panel._error_text is not None - - -class TestSummaryPanelVisibility: - """Tests for visibility control.""" - - def test_set_visible_shows_panel(self) -> None: - """set_visible(True) should show panel.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - - panel.set_visible(True) - - assert panel._container is not None - assert panel._container.visible is True - - def test_set_visible_hides_panel(self) -> None: - """set_visible(False) should hide panel.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - panel.set_visible(True) - - panel.set_visible(False) - - assert panel._container is not None - assert panel._container.visible is False - - -class TestSummaryPanelEnabled: - """Tests for enabled state control.""" - - def test_set_enabled_enables_button(self) -> None: - """set_enabled(True) should enable generate button.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - - panel.set_enabled(True) - - assert panel._generate_btn is not None - assert panel._generate_btn.disabled is False - - def test_set_enabled_disables_button(self) -> None: - """set_enabled(False) should disable generate button.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - panel.set_enabled(True) - - panel.set_enabled(False) - - assert panel._generate_btn is not None - assert panel._generate_btn.disabled is True - - -class TestSummaryPanelRender: - """Tests for rendering summary content.""" - - def test_render_summary_shows_executive_summary(self) -> None: - """_render_summary should display executive summary text.""" - state = _create_mock_state() - state.current_summary = _create_summary() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - - panel._render_summary() - - assert panel._summary_text is not None - assert panel._summary_text.value == "Test executive summary." - - def test_render_summary_populates_key_points(self) -> None: - """_render_summary should populate key points list.""" - state = _create_mock_state() - state.current_summary = _create_summary( - key_points=[ - KeyPoint(text="Point 1", segment_ids=[0]), - KeyPoint(text="Point 2", segment_ids=[1]), - ] - ) - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - - panel._render_summary() - - assert panel._key_points_list is not None - assert len(panel._key_points_list.controls) == 2 - - def test_render_summary_populates_action_items(self) -> None: - """_render_summary should populate action items list.""" - state = _create_mock_state() - state.current_summary = _create_summary( - action_items=[ - ActionItem(text="Action 1", segment_ids=[0], priority=1), - ActionItem(text="Action 2", segment_ids=[1], priority=2, assignee="Alice"), - ] - ) - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - - panel._render_summary() - - assert panel._action_items_list is not None - assert len(panel._action_items_list.controls) == 2 - - -class TestCitationChips: - """Tests for citation chip functionality.""" - - def test_create_citation_chip_returns_container(self) -> None: - """_create_citation_chip should return Container.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - chip = panel._create_citation_chip(5) - - assert isinstance(chip, ft.Container) - - def test_citation_chip_has_correct_label(self) -> None: - """Citation chip should display [#N] format.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - chip = panel._create_citation_chip(42) - text = chip.content - - assert isinstance(text, ft.Text) - assert text.value == "[#42]" - - def test_citation_chip_click_calls_callback(self) -> None: - """Clicking citation chip should call on_citation_click.""" - clicked_ids: list[int] = [] - state = _create_mock_state() - panel = SummaryPanelComponent( - state, - get_service=lambda: None, - on_citation_click=lambda sid: clicked_ids.append(sid), - ) - - panel._handle_citation_click(7) - - assert clicked_ids == [7] - - def test_citation_click_no_callback_is_noop(self) -> None: - """Citation click with no callback should not raise.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None, on_citation_click=None) - - panel._handle_citation_click(5) # Should not raise - - -class TestPriorityBadge: - """Tests for priority badge functionality.""" - - @pytest.mark.parametrize( - ("priority", "expected_label"), - [ - (0, "—"), - (1, "Low"), - (2, "Med"), - (3, "High"), - ], - ) - def test_priority_badge_labels(self, priority: int, expected_label: str) -> None: - """Priority badge should show correct label.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - badge = panel._create_priority_badge(priority) - text = badge.content - - assert isinstance(text, ft.Text) - assert text.value == expected_label - - @pytest.mark.parametrize( - ("priority", "expected_color"), - [ - (0, ft.Colors.GREY_400), - (1, ft.Colors.BLUE_400), - (2, ft.Colors.ORANGE_400), - (3, ft.Colors.RED_400), - ], - ) - def test_priority_badge_colors(self, priority: int, expected_color: str) -> None: - """Priority badge should have correct background color.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - badge = panel._create_priority_badge(priority) - - assert badge.bgcolor == expected_color - - -class TestLoadingAndError: - """Tests for loading and error states.""" - - def test_update_loading_state_shows_indicator(self) -> None: - """Loading indicator should be visible when loading.""" - state = _create_mock_state() - state.summary_loading = True - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - - panel._update_loading_state() - - assert panel._loading_indicator is not None - assert panel._generate_btn is not None - assert panel._loading_indicator.visible is True - assert panel._generate_btn.disabled is True - - def test_update_loading_state_hides_indicator(self) -> None: - """Loading indicator should be hidden when not loading.""" - state = _create_mock_state() - state.summary_loading = False - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - assert panel._loading_indicator is not None - panel._loading_indicator.visible = True - - panel._update_loading_state() - - assert not panel._loading_indicator.visible - - def test_show_error_displays_message(self) -> None: - """_show_error should display error message.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - - panel._show_error("Test error message") - - assert panel._error_text is not None - assert panel._error_text.value == "Test error message" - assert panel._error_text.visible is True - - def test_clear_error_hides_message(self) -> None: - """_clear_error should hide error message.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - panel._show_error("Error") - - panel._clear_error() - - assert panel._error_text is not None - assert panel._error_text.value == "" - assert panel._error_text.visible is False - - -class TestPriorityConstants: - """Tests for priority constant values.""" - - def test_priority_colors_has_all_levels(self) -> None: - """PRIORITY_COLORS should have entries for all priority levels.""" - assert 0 in PRIORITY_COLORS - assert 1 in PRIORITY_COLORS - assert 2 in PRIORITY_COLORS - assert 3 in PRIORITY_COLORS - - def test_priority_labels_has_all_levels(self) -> None: - """PRIORITY_LABELS should have entries for all priority levels.""" - assert 0 in PRIORITY_LABELS - assert 1 in PRIORITY_LABELS - assert 2 in PRIORITY_LABELS - assert 3 in PRIORITY_LABELS - - -class TestUncitedDraftsToggle: - """Tests for uncited drafts toggle functionality.""" - - def test_build_creates_toggle_ui(self) -> None: - """build() should create uncited toggle and count text.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - panel.build() - - assert panel._uncited_toggle is not None - assert panel._uncited_count_text is not None - - def test_toggle_initially_hidden(self) -> None: - """Uncited toggle should be hidden by default.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - - assert panel._uncited_toggle is not None - assert panel._uncited_toggle.visible is False - - def test_calculate_uncited_counts_with_no_summaries(self) -> None: - """Uncited counts should be zero when no summaries.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - panel._calculate_uncited_counts() - - assert panel._uncited_key_points == 0 - assert panel._uncited_action_items == 0 - - def test_calculate_uncited_counts_with_filtered_items(self) -> None: - """Uncited counts should reflect difference between original and filtered.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - - # Original has 3 key points - panel._original_summary = _create_summary( - key_points=[ - KeyPoint(text="Point 1", segment_ids=[0]), - KeyPoint(text="Point 2", segment_ids=[1]), - KeyPoint(text="Point 3", segment_ids=[]), # uncited - ], - action_items=[ - ActionItem(text="Action 1", segment_ids=[0]), - ActionItem(text="Action 2", segment_ids=[]), # uncited - ], - ) - # Filtered has 2 key points (1 filtered out) - panel._filtered_summary = _create_summary( - key_points=[ - KeyPoint(text="Point 1", segment_ids=[0]), - KeyPoint(text="Point 2", segment_ids=[1]), - ], - action_items=[ - ActionItem(text="Action 1", segment_ids=[0]), - ], - ) - - panel._calculate_uncited_counts() - - assert panel._uncited_key_points == 1 - assert panel._uncited_action_items == 1 - - def test_has_uncited_items_true_when_filtered(self) -> None: - """_has_uncited_items should return True when items filtered.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel._uncited_key_points = 2 - panel._uncited_action_items = 0 - - assert panel._has_uncited_items() is True - - def test_has_uncited_items_false_when_none_filtered(self) -> None: - """_has_uncited_items should return False when nothing filtered.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel._uncited_key_points = 0 - panel._uncited_action_items = 0 - - assert panel._has_uncited_items() is False - - def test_update_uncited_ui_shows_toggle_when_uncited(self) -> None: - """Toggle should be visible when uncited items exist.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - panel._uncited_key_points = 2 - panel._uncited_action_items = 1 - - panel._update_uncited_ui() - - assert panel._uncited_toggle is not None - assert panel._uncited_toggle.visible is True - - def test_update_uncited_ui_hides_toggle_when_no_uncited(self) -> None: - """Toggle should be hidden when no uncited items.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - panel._uncited_key_points = 0 - panel._uncited_action_items = 0 - - panel._update_uncited_ui() - - assert panel._uncited_toggle is not None - assert panel._uncited_toggle.visible is False - - def test_update_uncited_ui_shows_count_text(self) -> None: - """Count text should show total uncited when toggle is off.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - panel._uncited_key_points = 2 - panel._uncited_action_items = 3 - panel._show_uncited = False - - panel._update_uncited_ui() - - assert panel._uncited_count_text is not None - assert panel._uncited_count_text.visible is True - assert panel._uncited_count_text.value == "(5 hidden)" - - def test_update_uncited_ui_hides_count_when_showing_uncited(self) -> None: - """Count text should be hidden when showing uncited items.""" - state = _create_mock_state() - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - panel._uncited_key_points = 2 - panel._uncited_action_items = 0 - panel._show_uncited = True - - panel._update_uncited_ui() - - assert panel._uncited_count_text is not None - assert panel._uncited_count_text.visible is False - - def test_get_display_summary_returns_original_when_toggled(self) -> None: - """_get_display_summary should return original when showing uncited.""" - state = _create_mock_state() - original = _create_summary(key_points=[KeyPoint(text="Original", segment_ids=[])]) - filtered = _create_summary(key_points=[]) - state.current_summary = filtered - - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel._original_summary = original - panel._filtered_summary = filtered - panel._show_uncited = True - - result = panel._get_display_summary() - - assert result is original - - def test_get_display_summary_returns_current_when_not_toggled(self) -> None: - """_get_display_summary should return current_summary when toggle off.""" - state = _create_mock_state() - original = _create_summary(key_points=[KeyPoint(text="Original", segment_ids=[])]) - filtered = _create_summary(key_points=[]) - state.current_summary = filtered - - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel._original_summary = original - panel._filtered_summary = filtered - panel._show_uncited = False - - result = panel._get_display_summary() - - assert result is filtered - - def test_render_summary_switches_on_toggle(self) -> None: - """Rendering should switch content based on toggle state.""" - state = _create_mock_state() - original = _create_summary( - key_points=[ - KeyPoint(text="Point 1", segment_ids=[0]), - KeyPoint(text="Uncited", segment_ids=[]), - ] - ) - filtered = _create_summary(key_points=[KeyPoint(text="Point 1", segment_ids=[0])]) - state.current_summary = filtered - - panel = SummaryPanelComponent(state, get_service=lambda: None) - panel.build() - panel._original_summary = original - panel._filtered_summary = filtered - panel._uncited_key_points = 1 - - # First render with toggle off - panel._show_uncited = False - panel._render_summary() - assert panel._key_points_list is not None - assert len(panel._key_points_list.controls) == 1 - - # Toggle on and re-render - panel._show_uncited = True - panel._render_summary() - assert len(panel._key_points_list.controls) == 2 -```` - -## File: tests/client/test_transcript_component.py -````python -"""Tests for TranscriptComponent including partial rendering.""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import TYPE_CHECKING -from unittest.mock import MagicMock - -import flet as ft - -if TYPE_CHECKING: - from collections.abc import Callable - +from noteflow.client.components._thread_mixin import BackgroundWorkerMixin +from noteflow.client.components.annotation_display import AnnotationDisplayComponent +from noteflow.client.components.annotation_toolbar import AnnotationToolbarComponent +from noteflow.client.components.connection_panel import ConnectionPanelComponent +from noteflow.client.components.meeting_library import MeetingLibraryComponent +from noteflow.client.components.playback_controls import PlaybackControlsComponent +from noteflow.client.components.playback_sync import PlaybackSyncController +from noteflow.client.components.recording_timer import RecordingTimerComponent +from noteflow.client.components.summary_panel import SummaryPanelComponent from noteflow.client.components.transcript import TranscriptComponent - - -@dataclass -class MockTranscriptSegment: - """Mock TranscriptSegment for testing.""" - - text: str - start_time: float - end_time: float - is_final: bool = True - speaker_id: str = "" - speaker_confidence: float = 0.0 - - -@dataclass -class MockServerInfo: - """Mock ServerInfo for testing.""" - - version: str = "1.0.0" - asr_model: str = "base" - asr_ready: bool = True - active_meetings: int = 0 - - -@dataclass -class MockAppState: - """Minimal mock AppState for testing transcript component.""" - - transcript_segments: list[MockTranscriptSegment] = field(default_factory=list) - current_partial_text: str = "" - _page: MagicMock | None = None - - def request_update(self) -> None: - """No-op for tests.""" - - def run_on_ui_thread(self, callback: Callable[[], None]) -> None: - """Execute callback immediately for tests.""" - callback() - - def clear_transcript(self) -> None: - """Clear transcript segments and partial text.""" - self.transcript_segments.clear() - self.current_partial_text = "" - - -class TestTranscriptComponentBuild: - """Tests for TranscriptComponent.build().""" - - def test_build_returns_column(self) -> None: - """build() should return ft.Column.""" - state = MockAppState() - component = TranscriptComponent(state) - - result = component.build() - - assert isinstance(result, ft.Column) - - def test_build_creates_search_field(self) -> None: - """build() should create search field.""" - state = MockAppState() - component = TranscriptComponent(state) - - component.build() - - assert component._search_field is not None - assert isinstance(component._search_field, ft.TextField) - - def test_build_creates_list_view(self) -> None: - """build() should create ListView.""" - state = MockAppState() - component = TranscriptComponent(state) - - component.build() - - assert component._list_view is not None - assert isinstance(component._list_view, ft.ListView) - - -class TestTranscriptPartialRendering: - """Tests for partial transcript rendering.""" - - def test_add_partial_segment_updates_state(self) -> None: - """Adding partial segment should update state partial text.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - partial = MockTranscriptSegment( - text="Hello, I am speaking...", - start_time=0.0, - end_time=1.0, - is_final=False, - ) - component.add_segment(partial) - - assert state.current_partial_text == "Hello, I am speaking..." - - def test_add_partial_creates_partial_row(self) -> None: - """Adding partial segment should create partial row in ListView.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - partial = MockTranscriptSegment( - text="Speaking now...", - start_time=0.0, - end_time=1.0, - is_final=False, - ) - component.add_segment(partial) - - assert component._partial_row is not None - assert component._list_view is not None - assert component._partial_row in component._list_view.controls - - def test_partial_row_has_live_indicator(self) -> None: - """Partial row should contain [LIVE] indicator.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - partial = MockTranscriptSegment( - text="Testing...", - start_time=0.0, - end_time=1.0, - is_final=False, - ) - component.add_segment(partial) - - # Check that partial row content contains LIVE indicator - assert component._partial_row is not None - partial_content = component._partial_row.content - assert isinstance(partial_content, ft.Row) - # First element should be the LIVE text - live_text = partial_content.controls[0] - assert isinstance(live_text, ft.Text) - assert live_text.value is not None - assert "[LIVE]" in live_text.value - - def test_partial_row_has_italic_styling(self) -> None: - """Partial row text should be italicized.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - partial = MockTranscriptSegment( - text="Testing...", - start_time=0.0, - end_time=1.0, - is_final=False, - ) - component.add_segment(partial) - - assert component._partial_row is not None - partial_content = component._partial_row.content - assert isinstance(partial_content, ft.Row) - text_element = partial_content.controls[1] - assert isinstance(text_element, ft.Text) - assert text_element.italic is True - - def test_partial_row_updated_on_new_partial(self) -> None: - """Subsequent partials should update existing row, not create new.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - # First partial - component.add_segment( - MockTranscriptSegment(text="First", start_time=0.0, end_time=1.0, is_final=False) - ) - first_row = component._partial_row - assert component._list_view is not None - initial_count = len(component._list_view.controls) - - # Second partial - component.add_segment( - MockTranscriptSegment(text="Second", start_time=1.0, end_time=2.0, is_final=False) - ) - - # Should update same row, not add new - assert component._partial_row is first_row - assert component._list_view is not None - assert len(component._list_view.controls) == initial_count - - -class TestTranscriptFinalSegment: - """Tests for final segment handling.""" - - def test_add_final_segment_clears_partial_text(self) -> None: - """Adding final segment should clear partial text state.""" - state = MockAppState() - state.current_partial_text = "Partial text..." - component = TranscriptComponent(state) - component.build() - - final = MockTranscriptSegment( - text="Final transcript.", - start_time=0.0, - end_time=2.0, - is_final=True, - ) - component.add_segment(final) - - assert not state.current_partial_text - - def test_add_final_removes_partial_row(self) -> None: - """Adding final segment should remove partial row.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - # Add partial first - partial = MockTranscriptSegment( - text="Speaking...", - start_time=0.0, - end_time=1.0, - is_final=False, - ) - component.add_segment(partial) - assert component._partial_row is not None - - # Add final - final = MockTranscriptSegment( - text="Final text.", - start_time=0.0, - end_time=2.0, - is_final=True, - ) - component.add_segment(final) - - # Partial row should be removed - assert component._partial_row is None - - def test_add_final_appends_to_segments(self) -> None: - """Adding final segment should append to state transcript_segments.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - final = MockTranscriptSegment( - text="Final text.", - start_time=0.0, - end_time=2.0, - is_final=True, - ) - component.add_segment(final) - - assert len(state.transcript_segments) == 1 - assert state.transcript_segments[0].text == "Final text." - - -class TestTranscriptClear: - """Tests for transcript clearing.""" - - def test_clear_removes_partial_row(self) -> None: - """clear() should remove partial row.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - # Add partial - partial = MockTranscriptSegment( - text="Partial...", - start_time=0.0, - end_time=1.0, - is_final=False, - ) - component.add_segment(partial) - - component.clear() - - assert component._partial_row is None - - def test_clear_empties_list_view(self) -> None: - """clear() should empty ListView controls.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - # Add some segments - component.add_segment( - MockTranscriptSegment(text="First", start_time=0.0, end_time=1.0, is_final=True) - ) - component.add_segment( - MockTranscriptSegment(text="Second", start_time=1.0, end_time=2.0, is_final=True) - ) - - component.clear() - - assert component._list_view is not None - assert len(component._list_view.controls) == 0 - - def test_clear_clears_search_field(self) -> None: - """clear() should clear search field.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - assert component._search_field is not None - component._search_field.value = "test query" - - component.clear() - - assert component._search_field is not None - assert not component._search_field.value - - -class TestTranscriptSearch: - """Tests for transcript search functionality.""" - - def test_search_filters_segments(self) -> None: - """Search should filter visible segments.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - # Add segments to state - state.transcript_segments = [ - MockTranscriptSegment(text="Hello world", start_time=0.0, end_time=1.0), - MockTranscriptSegment(text="Goodbye world", start_time=1.0, end_time=2.0), - MockTranscriptSegment(text="Something else", start_time=2.0, end_time=3.0), - ] - - # Simulate search - component._search_query = "world" - component._rerender_all_segments() - - # Should only show segments containing "world" - visible_count = sum(row is not None for row in component._segment_rows) - assert visible_count == 2 - - def test_search_is_case_insensitive(self) -> None: - """Search should be case-insensitive.""" - state = MockAppState() - component = TranscriptComponent(state) - component.build() - - state.transcript_segments = [ - MockTranscriptSegment(text="Hello WORLD", start_time=0.0, end_time=1.0), - MockTranscriptSegment(text="something else", start_time=1.0, end_time=2.0), - ] - - component._search_query = "world" - component._rerender_all_segments() - - visible_count = sum(row is not None for row in component._segment_rows) - assert visible_count == 1 - - -class TestTranscriptSegmentClick: - """Tests for segment click handling.""" - - def test_click_callback_receives_segment_index(self) -> None: - """Clicking segment should call callback with segment index.""" - clicked_indices: list[int] = [] - state = MockAppState() - component = TranscriptComponent( - state, - on_segment_click=lambda idx: clicked_indices.append(idx), - ) - component.build() - - component._handle_click(5) - - assert clicked_indices == [5] - - def test_click_without_callback_is_noop(self) -> None: - """Click without callback should not raise.""" - state = MockAppState() - component = TranscriptComponent(state, on_segment_click=None) - component.build() - - component._handle_click(3) # Should not raise -```` - -## File: tests/infrastructure/asr/test_engine.py -````python -"""Tests for FasterWhisperEngine behavior without loading models.""" - -from __future__ import annotations - -import sys -import types - -import numpy as np -import pytest - -from noteflow.infrastructure.asr.engine import FasterWhisperEngine - - -class TestFasterWhisperEngine: - """Tests for FasterWhisperEngine.""" - - def test_transcribe_without_load_raises(self) -> None: - """Calling transcribe before load_model raises RuntimeError.""" - engine = FasterWhisperEngine() - audio = np.zeros(1600, dtype=np.float32) - with pytest.raises(RuntimeError, match="Model not loaded"): - list(engine.transcribe(audio)) - - def test_load_invalid_model_size_raises(self) -> None: - """Invalid model size raises ValueError when faster-whisper is available.""" - pytest.importorskip("faster_whisper") - engine = FasterWhisperEngine() - with pytest.raises(ValueError, match="Invalid model size"): - engine.load_model(model_size="not-a-model") - - def test_load_model_with_stub_sets_state(self, monkeypatch: pytest.MonkeyPatch) -> None: - """load_model should set model and size when stubbed module is present.""" - - class DummyModel: - def __init__( - self, model_size: str, device: str, compute_type: str, num_workers: int - ) -> None: - self.args = (model_size, device, compute_type, num_workers) - - fake_module = types.SimpleNamespace(WhisperModel=DummyModel) - monkeypatch.setitem(sys.modules, "faster_whisper", fake_module) - - engine = FasterWhisperEngine(compute_type="float32", device="cpu", num_workers=2) - engine.load_model(model_size="base") - - assert engine.is_loaded is True - assert engine.model_size == "base" - assert engine._model.args == ("base", "cpu", "float32", 2) # type: ignore[attr-defined] - - def test_load_model_wraps_errors(self, monkeypatch: pytest.MonkeyPatch) -> None: - """load_model should surface model construction errors as RuntimeError.""" - - class FailingModel: - def __init__(self, *_: object, **__: object) -> None: - raise ValueError("boom") - - fake_module = types.SimpleNamespace(WhisperModel=FailingModel) - monkeypatch.setitem(sys.modules, "faster_whisper", fake_module) - - engine = FasterWhisperEngine() - with pytest.raises(RuntimeError, match="Failed to load model"): - engine.load_model(model_size="base") - - def test_transcribe_with_stubbed_model(self) -> None: - """transcribe should yield AsrResult objects when model is preset.""" - engine = FasterWhisperEngine() - - class DummyWord: - def __init__(self) -> None: - self.word = "hi" - self.start = 0.0 - self.end = 0.5 - self.probability = 0.9 - - class DummySegment: - def __init__(self) -> None: - self.text = " hi " - self.start = 0.0 - self.end = 1.0 - self.words = [DummyWord()] - self.avg_logprob = -0.1 - self.no_speech_prob = 0.01 - - class DummyInfo: - language = "en" - language_probability = 0.95 - - class DummyModel: - def transcribe(self, audio: np.ndarray, **_: object): - return [DummySegment()], DummyInfo() - - engine._model = DummyModel() - engine._model_size = "base" - - audio = np.zeros(1600, dtype=np.float32) - results = list(engine.transcribe(audio)) - - assert len(results) == 1 - first = results[0] - assert first.text == "hi" - assert first.words[0].word == "hi" - assert engine.is_loaded is True - - engine.unload() - assert engine.is_loaded is False -```` - -## File: tests/infrastructure/audio/test_capture.py -````python -"""Tests for SoundDeviceCapture.""" - -from __future__ import annotations - -from types import SimpleNamespace -from typing import TYPE_CHECKING - -import numpy as np -import pytest - -from noteflow.infrastructure.audio import SoundDeviceCapture - -if TYPE_CHECKING: - import numpy as np - from numpy.typing import NDArray - - -class TestSoundDeviceCapture: - """Tests for SoundDeviceCapture class.""" - - @pytest.fixture - def capture(self) -> SoundDeviceCapture: - """Create SoundDeviceCapture instance.""" - return SoundDeviceCapture() - - def test_init_defaults(self, capture: SoundDeviceCapture) -> None: - """Test capture initializes with correct defaults.""" - assert capture.sample_rate == 16000 - assert capture.channels == 1 - assert capture.current_device_id is None - - def test_is_capturing_initially_false(self, capture: SoundDeviceCapture) -> None: - """Test is_capturing returns False when not started.""" - assert capture.is_capturing() is False - - def test_list_devices_returns_list(self, capture: SoundDeviceCapture) -> None: - """Test list_devices returns a list (may be empty in CI).""" - devices = capture.list_devices() - assert isinstance(devices, list) - - def test_get_default_device_returns_device_or_none(self, capture: SoundDeviceCapture) -> None: - """Test get_default_device returns device info or None.""" - device = capture.get_default_device() - # May be None in CI environments without audio - if device is not None: - assert device.device_id >= 0 - assert isinstance(device.name, str) - assert device.channels > 0 - - def test_stop_when_not_capturing_is_safe(self, capture: SoundDeviceCapture) -> None: - """Test stop() is safe to call when not capturing.""" - # Should not raise - capture.stop() - assert capture.is_capturing() is False - - def test_start_when_already_capturing_raises(self, capture: SoundDeviceCapture) -> None: - """Test start() raises if already capturing. - - Note: This test may be skipped in CI without audio devices. - """ - devices = capture.list_devices() - if not devices: - pytest.skip("No audio devices available") - - def dummy_callback(frames: NDArray[np.float32], timestamp: float) -> None: - pass - - try: - capture.start( - device_id=None, - on_frames=dummy_callback, - sample_rate=16000, - channels=1, - ) - - # Second start should raise - with pytest.raises(RuntimeError, match="Already capturing"): - capture.start( - device_id=None, - on_frames=dummy_callback, - ) - finally: - capture.stop() - - def test_properties_after_start(self, capture: SoundDeviceCapture) -> None: - """Test properties reflect configured values after start. - - Note: This test may be skipped in CI without audio devices. - """ - devices = capture.list_devices() - if not devices: - pytest.skip("No audio devices available") - - def dummy_callback(frames: NDArray[np.float32], timestamp: float) -> None: - pass - - try: - capture.start( - device_id=None, - on_frames=dummy_callback, - sample_rate=44100, - channels=1, - ) - - assert capture.sample_rate == 44100 - assert capture.channels == 1 - assert capture.is_capturing() is True - finally: - capture.stop() - - def test_start_with_stubbed_stream_invokes_callback( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - """start should configure and invoke callback when stream is stubbed.""" - captured: list[np.ndarray] = [] - - class DummyStream: - def __init__(self, *, callback, **_: object) -> None: - self.callback = callback - self.active = False - - def start(self) -> None: - self.active = True - data = np.zeros((4, 1), dtype=np.float32) - self.callback(data, len(data), None, 0) - - def stop(self) -> None: - self.active = False - - def close(self) -> None: - self.active = False - - monkeypatch.setattr( - "noteflow.infrastructure.audio.capture.sd.InputStream", - DummyStream, - ) - monkeypatch.setattr( - "noteflow.infrastructure.audio.capture.sd.PortAudioError", - RuntimeError, - ) - monkeypatch.setattr( - "noteflow.infrastructure.audio.capture.sd.CallbackFlags", - int, - ) - monkeypatch.setattr( - "noteflow.infrastructure.audio.capture.sd.query_devices", - lambda: [{"name": "Mic", "max_input_channels": 1, "default_samplerate": 16000}], - ) - monkeypatch.setattr( - "noteflow.infrastructure.audio.capture.sd.default", - SimpleNamespace(device=(0, 1)), - ) - - def on_frames(frames: NDArray[np.float32], timestamp: float) -> None: # type: ignore[name-defined] - captured.append(frames) - assert isinstance(timestamp, float) - - capture = SoundDeviceCapture() - capture.start(device_id=None, on_frames=on_frames, sample_rate=16000, channels=1) - - assert captured, "callback should have been invoked" - assert capture.is_capturing() is True - capture.stop() - assert capture.is_capturing() is False - - def test_start_wraps_portaudio_error(self, monkeypatch: pytest.MonkeyPatch) -> None: - """PortAudio errors should be converted to RuntimeError.""" - - class DummyError(Exception): ... - - def failing_stream(**_: object) -> None: - raise DummyError("boom") - - monkeypatch.setattr("noteflow.infrastructure.audio.capture.sd.InputStream", failing_stream) - monkeypatch.setattr("noteflow.infrastructure.audio.capture.sd.PortAudioError", DummyError) - - capture = SoundDeviceCapture() - with pytest.raises(RuntimeError, match="Failed to start audio capture"): - capture.start(device_id=None, on_frames=lambda *_: None) -```` - -## File: tests/infrastructure/summarization/test_cloud_provider.py -````python -"""Tests for cloud summarization provider.""" - -from __future__ import annotations - -import json -import sys -import types -from typing import Any -from uuid import uuid4 - -import pytest - -from noteflow.domain.entities import Segment -from noteflow.domain.summarization import ( - InvalidResponseError, - ProviderUnavailableError, - SummarizationRequest, -) -from noteflow.domain.value_objects import MeetingId -from noteflow.infrastructure.summarization import CloudBackend - - -def _segment( - segment_id: int, - text: str, - start: float = 0.0, - end: float = 5.0, -) -> Segment: - """Create a test segment.""" - return Segment( - segment_id=segment_id, - text=text, - start_time=start, - end_time=end, - ) - - -def _valid_json_response( - summary: str = "Test summary.", - key_points: list[dict[str, Any]] | None = None, - action_items: list[dict[str, Any]] | None = None, -) -> str: - """Build a valid JSON response string.""" - return json.dumps( - { - "executive_summary": summary, - "key_points": key_points or [], - "action_items": action_items or [], - } - ) - - -class TestCloudSummarizerProperties: - """Tests for CloudSummarizer properties.""" - - def test_provider_name_openai(self) -> None: - """Provider name should be 'openai' for OpenAI backend.""" - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(backend=CloudBackend.OPENAI) - assert summarizer.provider_name == "openai" - - def test_provider_name_anthropic(self) -> None: - """Provider name should be 'anthropic' for Anthropic backend.""" - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC) - assert summarizer.provider_name == "anthropic" - - def test_requires_cloud_consent_true(self) -> None: - """Cloud providers should require consent.""" - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer() - assert summarizer.requires_cloud_consent is True - - def test_is_available_with_api_key(self) -> None: - """is_available should be True when API key is provided.""" - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="test-key") - assert summarizer.is_available is True - - def test_is_available_without_api_key(self, monkeypatch: pytest.MonkeyPatch) -> None: - """is_available should be False without API key or env var.""" - monkeypatch.delenv("OPENAI_API_KEY", raising=False) - monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer() - assert summarizer.is_available is False - - def test_is_available_with_openai_env_var(self, monkeypatch: pytest.MonkeyPatch) -> None: - """is_available should be True with OPENAI_API_KEY env var.""" - monkeypatch.setenv("OPENAI_API_KEY", "sk-test") - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(backend=CloudBackend.OPENAI) - assert summarizer.is_available is True - - def test_is_available_with_anthropic_env_var(self, monkeypatch: pytest.MonkeyPatch) -> None: - """is_available should be True with ANTHROPIC_API_KEY env var.""" - monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test") - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC) - assert summarizer.is_available is True - - def test_default_model_openai(self) -> None: - """Default model for OpenAI should be gpt-4o-mini.""" - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(backend=CloudBackend.OPENAI) - assert summarizer._model == "gpt-4o-mini" - - def test_default_model_anthropic(self) -> None: - """Default model for Anthropic should be claude-3-haiku.""" - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC) - assert summarizer._model == "claude-3-haiku-20240307" - - def test_custom_model(self) -> None: - """Custom model should override default.""" - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(model="gpt-4-turbo") - assert summarizer._model == "gpt-4-turbo" - - def test_openai_base_url_is_passed(self, monkeypatch: pytest.MonkeyPatch) -> None: - """OPENAI_BASE_URL should be forwarded to the client when provided.""" - captured = {} - - def fake_openai_client(**kwargs: Any) -> types.SimpleNamespace: - captured.update(kwargs) - return types.SimpleNamespace( - chat=types.SimpleNamespace( - completions=types.SimpleNamespace( - create=lambda **_: types.SimpleNamespace( - choices=[ - types.SimpleNamespace( - message=types.SimpleNamespace(content=_valid_json_response()) - ) - ], - usage=None, - ) - ) - ) - ) - - mock_module = types.ModuleType("openai") - mock_module.OpenAI = fake_openai_client - monkeypatch.setitem(sys.modules, "openai", mock_module) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer( - api_key="key", backend=CloudBackend.OPENAI, base_url="https://custom" - ) - # Trigger client creation - _ = summarizer._get_openai_client() - assert captured.get("base_url") == "https://custom" - - -class TestCloudSummarizerOpenAI: - """Tests for CloudSummarizer with OpenAI backend.""" - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Create test meeting ID.""" - return MeetingId(uuid4()) - - @pytest.fixture - def mock_openai(self, monkeypatch: pytest.MonkeyPatch) -> types.ModuleType: - """Mock openai module.""" - - def create_response(content: str, tokens: int = 100) -> types.SimpleNamespace: - """Create mock OpenAI response.""" - return types.SimpleNamespace( - choices=[types.SimpleNamespace(message=types.SimpleNamespace(content=content))], - usage=types.SimpleNamespace(total_tokens=tokens), - ) - - mock_client = types.SimpleNamespace( - chat=types.SimpleNamespace( - completions=types.SimpleNamespace( - create=lambda **_: create_response(_valid_json_response()) - ) - ) - ) - mock_module = types.ModuleType("openai") - mock_module.OpenAI = lambda **_: mock_client - monkeypatch.setitem(sys.modules, "openai", mock_module) - return mock_module - - @pytest.mark.asyncio - async def test_summarize_empty_segments( - self, meeting_id: MeetingId, mock_openai: types.ModuleType - ) -> None: - """Empty segments should return empty summary.""" - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="test-key") - request = SummarizationRequest(meeting_id=meeting_id, segments=[]) - - result = await summarizer.summarize(request) - - assert result.summary.key_points == [] - assert result.summary.action_items == [] - - @pytest.mark.asyncio - async def test_summarize_returns_result( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Summarize should return SummarizationResult.""" - response_content = _valid_json_response( - summary="Project meeting summary.", - key_points=[{"text": "Key point", "segment_ids": [0]}], - action_items=[{"text": "Action", "assignee": "Bob", "priority": 1, "segment_ids": [1]}], - ) - - def create_response(**_: Any) -> types.SimpleNamespace: - return types.SimpleNamespace( - choices=[ - types.SimpleNamespace(message=types.SimpleNamespace(content=response_content)) - ], - usage=types.SimpleNamespace(total_tokens=150), - ) - - mock_client = types.SimpleNamespace( - chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response)) - ) - mock_module = types.ModuleType("openai") - mock_module.OpenAI = lambda **_: mock_client - monkeypatch.setitem(sys.modules, "openai", mock_module) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.OPENAI) - segments = [_segment(0, "Key point"), _segment(1, "Action item")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert result.provider_name == "openai" - assert result.summary.executive_summary == "Project meeting summary." - assert result.tokens_used == 150 - - @pytest.mark.asyncio - async def test_raises_unavailable_on_auth_error( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should raise ProviderUnavailableError on auth failure.""" - - def raise_auth_error(**_: Any) -> None: - raise ValueError("Invalid API key provided") - - mock_client = types.SimpleNamespace( - chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=raise_auth_error)) - ) - mock_module = types.ModuleType("openai") - mock_module.OpenAI = lambda **_: mock_client - monkeypatch.setitem(sys.modules, "openai", mock_module) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="bad-key") - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - with pytest.raises(ProviderUnavailableError, match="authentication failed"): - await summarizer.summarize(request) - - @pytest.mark.asyncio - async def test_raises_invalid_response_on_empty_content( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should raise InvalidResponseError on empty response.""" - - def create_empty_response(**_: Any) -> types.SimpleNamespace: - return types.SimpleNamespace( - choices=[types.SimpleNamespace(message=types.SimpleNamespace(content=""))], - usage=None, - ) - - mock_client = types.SimpleNamespace( - chat=types.SimpleNamespace( - completions=types.SimpleNamespace(create=create_empty_response) - ) - ) - mock_module = types.ModuleType("openai") - mock_module.OpenAI = lambda **_: mock_client - monkeypatch.setitem(sys.modules, "openai", mock_module) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="test-key") - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - with pytest.raises(InvalidResponseError, match="Empty response"): - await summarizer.summarize(request) - - -class TestCloudSummarizerAnthropic: - """Tests for CloudSummarizer with Anthropic backend.""" - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Create test meeting ID.""" - return MeetingId(uuid4()) - - @pytest.mark.asyncio - async def test_summarize_returns_result( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Summarize should return SummarizationResult.""" - response_content = _valid_json_response( - summary="Anthropic summary.", - key_points=[{"text": "Point", "segment_ids": [0]}], - ) - - def create_response(**_: Any) -> types.SimpleNamespace: - return types.SimpleNamespace( - content=[types.SimpleNamespace(text=response_content)], - usage=types.SimpleNamespace(input_tokens=50, output_tokens=100), - ) - - mock_client = types.SimpleNamespace(messages=types.SimpleNamespace(create=create_response)) - mock_module = types.ModuleType("anthropic") - mock_module.Anthropic = lambda **_: mock_client - monkeypatch.setitem(sys.modules, "anthropic", mock_module) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.ANTHROPIC) - segments = [_segment(0, "Test point")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert result.provider_name == "anthropic" - assert result.summary.executive_summary == "Anthropic summary." - assert result.tokens_used == 150 - - @pytest.mark.asyncio - async def test_raises_unavailable_when_package_missing( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should raise ProviderUnavailableError when package not installed.""" - monkeypatch.delitem(sys.modules, "anthropic", raising=False) - - import builtins - - original_import = builtins.__import__ - - def mock_import(name: str, *args: Any, **kwargs: Any) -> Any: - if name == "anthropic": - raise ImportError("No module named 'anthropic'") - return original_import(name, *args, **kwargs) - - monkeypatch.setattr(builtins, "__import__", mock_import) - - from noteflow.infrastructure.summarization import cloud_provider - - summarizer = cloud_provider.CloudSummarizer( - api_key="test-key", backend=CloudBackend.ANTHROPIC - ) - summarizer._client = None - - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - with pytest.raises(ProviderUnavailableError, match="anthropic package"): - await summarizer.summarize(request) - - @pytest.mark.asyncio - async def test_raises_invalid_response_on_empty_content( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should raise InvalidResponseError on empty response.""" - - def create_empty_response(**_: Any) -> types.SimpleNamespace: - return types.SimpleNamespace( - content=[], - usage=types.SimpleNamespace(input_tokens=10, output_tokens=0), - ) - - mock_client = types.SimpleNamespace( - messages=types.SimpleNamespace(create=create_empty_response) - ) - mock_module = types.ModuleType("anthropic") - mock_module.Anthropic = lambda **_: mock_client - monkeypatch.setitem(sys.modules, "anthropic", mock_module) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.ANTHROPIC) - segments = [_segment(0, "Test")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - with pytest.raises(InvalidResponseError, match="Empty response"): - await summarizer.summarize(request) - - -class TestCloudSummarizerFiltering: - """Tests for response filtering in CloudSummarizer.""" - - @pytest.fixture - def meeting_id(self) -> MeetingId: - """Create test meeting ID.""" - return MeetingId(uuid4()) - - @pytest.mark.asyncio - async def test_filters_invalid_segment_ids( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Invalid segment_ids should be filtered from response.""" - response_content = _valid_json_response( - summary="Test", - key_points=[{"text": "Point", "segment_ids": [0, 99, 100]}], - ) - - def create_response(**_: Any) -> types.SimpleNamespace: - return types.SimpleNamespace( - choices=[ - types.SimpleNamespace(message=types.SimpleNamespace(content=response_content)) - ], - usage=None, - ) - - mock_client = types.SimpleNamespace( - chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response)) - ) - mock_module = types.ModuleType("openai") - mock_module.OpenAI = lambda **_: mock_client - monkeypatch.setitem(sys.modules, "openai", mock_module) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="test-key") - segments = [_segment(0, "Only valid segment")] - request = SummarizationRequest(meeting_id=meeting_id, segments=segments) - - result = await summarizer.summarize(request) - - assert result.summary.key_points[0].segment_ids == [0] - - @pytest.mark.asyncio - async def test_respects_max_limits( - self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Max limits should truncate response items.""" - response_content = _valid_json_response( - summary="Test", - key_points=[{"text": f"Point {i}", "segment_ids": [0]} for i in range(10)], - action_items=[{"text": f"Action {i}", "segment_ids": [0]} for i in range(10)], - ) - - def create_response(**_: Any) -> types.SimpleNamespace: - return types.SimpleNamespace( - choices=[ - types.SimpleNamespace(message=types.SimpleNamespace(content=response_content)) - ], - usage=None, - ) - - mock_client = types.SimpleNamespace( - chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response)) - ) - mock_module = types.ModuleType("openai") - mock_module.OpenAI = lambda **_: mock_client - monkeypatch.setitem(sys.modules, "openai", mock_module) - - from noteflow.infrastructure.summarization import CloudSummarizer - - summarizer = CloudSummarizer(api_key="test-key") - segments = [_segment(0, "Test")] - request = SummarizationRequest( - meeting_id=meeting_id, - segments=segments, - max_key_points=2, - max_action_items=3, - ) - - result = await summarizer.summarize(request) - - assert len(result.summary.key_points) == 2 - assert len(result.summary.action_items) == 3 -```` - -## File: tests/integration/conftest.py -````python -"""Pytest fixtures for integration tests.""" - -from __future__ import annotations - -import time -from collections.abc import AsyncGenerator -from importlib import import_module -from typing import TYPE_CHECKING -from urllib.parse import quote - -import pytest -from sqlalchemy import text -from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine - -if TYPE_CHECKING: - from collections.abc import Self - -from noteflow.infrastructure.persistence.models import Base - - -# Store container reference at module level to reuse -class PgTestContainer: - """Minimal Postgres testcontainer wrapper with custom readiness wait.""" - - def __init__( - self, - image: str = "pgvector/pgvector:pg16", - username: str = "test", - password: str = "test", - dbname: str = "noteflow_test", - port: int = 5432, - ) -> None: - self.username = username - self.password = password - self.dbname = dbname - self.port = port - - container_module = import_module("testcontainers.core.container") - docker_container_cls = container_module.DockerContainer - self._container = ( - docker_container_cls(image) - .with_env("POSTGRES_USER", username) - .with_env("POSTGRES_PASSWORD", password) - .with_env("POSTGRES_DB", dbname) - .with_exposed_ports(port) - ) - - def start(self) -> Self: - """Start the container.""" - self._container.start() - self._wait_until_ready() - return self - - def stop(self) -> None: - """Stop the container.""" - self._container.stop() - - def get_connection_url(self) -> str: - """Return a SQLAlchemy-style connection URL.""" - host = self._container.get_container_host_ip() - port = self._container._get_exposed_port(self.port) - quoted_password = quote(self.password, safe=" +") - return f"postgresql+psycopg2://{self.username}:{quoted_password}@{host}:{port}/{self.dbname}" - - def _wait_until_ready(self, timeout: float = 30.0, interval: float = 0.5) -> None: - """Wait for Postgres to accept connections by running a simple query.""" - start_time = time.time() - escaped_password = self.password.replace("'", "'\"'\"'") - cmd = [ - "sh", - "-c", - ( - f"PGPASSWORD='{escaped_password}' " - f"psql --username {self.username} --dbname {self.dbname} --host 127.0.0.1 " - "-c 'select 1;'" - ), - ] - last_error: str | None = None - - while True: - result = self._container.exec(cmd) - if result.exit_code == 0: - return - if result.output: - last_error = result.output.decode(errors="ignore") - if time.time() - start_time > timeout: - raise TimeoutError( - "Postgres container did not become ready in time" - + (f": {last_error}" if last_error else "") - ) - time.sleep(interval) - - -_container: PgTestContainer | None = None -_database_url: str | None = None - - -def get_or_create_container() -> tuple[PgTestContainer, str]: - """Get or create the PostgreSQL container.""" - global _container, _database_url - - if _container is None: - container = PgTestContainer().start() - _container = container - url = container.get_connection_url() - _database_url = url.replace("postgresql+psycopg2://", "postgresql+asyncpg://") - - assert _container is not None, "Container should be initialized" - assert _database_url is not None, "Database URL should be initialized" - return _container, _database_url - - -@pytest.fixture -async def session_factory() -> AsyncGenerator[async_sessionmaker[AsyncSession], None]: - """Create a session factory and initialize the database schema.""" - _, database_url = get_or_create_container() - - engine = create_async_engine(database_url, echo=False) - - async with engine.begin() as conn: - # Create pgvector extension and schema - await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) - await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) - await conn.execute(text("CREATE SCHEMA noteflow")) - # Create all tables - await conn.run_sync(Base.metadata.create_all) - - yield async_sessionmaker( - engine, - class_=AsyncSession, - expire_on_commit=False, - autocommit=False, - autoflush=False, - ) - # Cleanup - drop schema to reset for next test - async with engine.begin() as conn: - await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) - - await engine.dispose() - - -@pytest.fixture -async def session( - session_factory: async_sessionmaker[AsyncSession], -) -> AsyncGenerator[AsyncSession, None]: - """Provide a database session for each test.""" - async with session_factory() as session: - yield session - # Rollback any uncommitted changes - await session.rollback() - - -def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: - """Cleanup container after all tests complete.""" - global _container - if _container is not None: - _container.stop() - _container = None +from noteflow.client.components.vu_meter import VuMeterComponent +__all__ = [ + "AnnotationDisplayComponent", + "AnnotationToolbarComponent", + "AsyncOperationMixin", + "BackgroundWorkerMixin", + "ConnectionPanelComponent", + "MeetingLibraryComponent", + "PlaybackControlsComponent", + "PlaybackSyncController", + "RecordingTimerComponent", + "SummaryPanelComponent", + "TranscriptComponent", + "VuMeterComponent", +] ```` ## File: src/noteflow/client/components/transcript.py ````python -"""Transcript display component with click-to-seek and highlighting. - -Uses TranscriptSegment from grpc.client and format_timestamp from _formatting. -Does not recreate any types - imports and uses existing ones. -""" - from __future__ import annotations - import hashlib from collections.abc import Callable from typing import TYPE_CHECKING - import flet as ft - -# REUSE existing formatting - do not recreate from noteflow.infrastructure.export._formatting import format_timestamp - if TYPE_CHECKING: from noteflow.client.state import AppState - - # REUSE existing types - do not recreate from noteflow.grpc.client import ServerInfo, TranscriptSegment - - class TranscriptComponent: - """Transcript segment display with click-to-seek, highlighting, and search. - - Uses TranscriptSegment from grpc.client and format_timestamp from _formatting. - """ - def __init__( self, state: AppState, on_segment_click: Callable[[int], None] | None = None, ) -> None: - """Initialize transcript component. - - Args: - state: Centralized application state. - on_segment_click: Callback when segment clicked (receives segment index). - """ self._state = state self._on_segment_click = on_segment_click self._list_view: ft.ListView | None = None - self._segment_rows: list[ft.Container | None] = [] # Track rows for highlighting + self._segment_rows: list[ft.Container | None] = [] self._search_field: ft.TextField | None = None self._search_query: str = "" - self._partial_row: ft.Container | None = None # Live partial at bottom - + self._partial_row: ft.Container | None = None def build(self) -> ft.Column: - """Build transcript list view with search. - - Returns: - Column with search field and bordered ListView. - """ self._search_field = ft.TextField( label="Search transcript", prefix_icon=ft.Icons.SEARCH, @@ -25893,15 +12278,13 @@ class TranscriptComponent: dense=True, height=40, ) - self._list_view = ft.ListView( spacing=10, padding=10, - auto_scroll=False, # We control scrolling for sync + auto_scroll=False, height=260, ) self._segment_rows.clear() - return ft.Column( [ self._search_field, @@ -25913,36 +12296,17 @@ class TranscriptComponent: ], spacing=5, ) - def add_segment(self, segment: TranscriptSegment) -> None: - """Add transcript segment to display. - - For final segments, adds to transcript list. - For partials, updates the live partial row at bottom. - - Args: - segment: Transcript segment from server. - """ if segment.is_final: - # Clear partial text when we get a final self._state.current_partial_text = "" self._state.transcript_segments.append(segment) self._state.run_on_ui_thread(lambda: self._render_final_segment(segment)) else: - # Update partial text self._state.current_partial_text = segment.text self._state.run_on_ui_thread(lambda: self._render_partial(segment.text)) - def display_server_info(self, info: ServerInfo) -> None: - """Display server info in transcript area. - - Args: - info: Server info from connection. - """ self._state.run_on_ui_thread(lambda: self._render_server_info(info)) - def clear(self) -> None: - """Clear all transcript segments and partials.""" self._state.clear_transcript() self._segment_rows.clear() self._partial_row = None @@ -25952,76 +12316,39 @@ class TranscriptComponent: if self._list_view: self._list_view.controls.clear() self._state.request_update() - def _on_search_change(self, e: ft.ControlEvent) -> None: - """Handle search field change. - - Args: - e: Control event with new search value. - """ self._search_query = (e.control.value or "").lower() self._rerender_all_segments() - def _rerender_all_segments(self) -> None: - """Re-render all segments with current search filter.""" if not self._list_view: return - self._list_view.controls.clear() self._segment_rows.clear() - for idx, segment in enumerate(self._state.transcript_segments): - # Filter by search query if self._search_query and self._search_query not in segment.text.lower(): - # Add placeholder to maintain index alignment self._segment_rows.append(None) continue - - # Use original index for click handling container = self._create_segment_row(segment, idx) self._segment_rows.append(container) self._list_view.controls.append(container) - self._state.request_update() - def _render_final_segment(self, segment: TranscriptSegment) -> None: - """Render final segment with click handler (UI thread only). - - Args: - segment: Transcript segment to render. - """ if not self._list_view: return - - # Remove partial row if present (final replaces partial) if self._partial_row and self._partial_row in self._list_view.controls: self._list_view.controls.remove(self._partial_row) self._partial_row = None - - # Use the actual index from state (segments are appended before rendering) segment_index = len(self._state.transcript_segments) - 1 - - # Filter by search query during live rendering if self._search_query and self._search_query not in segment.text.lower(): self._segment_rows.append(None) return - container = self._create_segment_row(segment, segment_index) - self._segment_rows.append(container) self._list_view.controls.append(container) self._state.request_update() - def _render_partial(self, text: str) -> None: - """Render or update the partial text row at the bottom (UI thread only). - - Args: - text: Partial transcript text. - """ if not self._list_view or not text: return - - # Create or update partial row partial_content = ft.Row( [ ft.Text("[LIVE]", size=11, color=ft.Colors.BLUE_400, width=120, italic=True), @@ -26035,12 +12362,9 @@ class TranscriptComponent: ), ] ) - if self._partial_row: - # Update existing row self._partial_row.content = partial_content else: - # Create new row self._partial_row = ft.Container( content=partial_content, padding=5, @@ -26048,35 +12372,16 @@ class TranscriptComponent: bgcolor=ft.Colors.BLUE_50, ) self._list_view.controls.append(self._partial_row) - self._state.request_update() - def _create_segment_row(self, segment: TranscriptSegment, segment_index: int) -> ft.Container: - """Create a segment row container. - - Args: - segment: Transcript segment to render. - segment_index: Index for click handling. - - Returns: - Container with segment content. - """ - # REUSE existing format_timestamp from _formatting.py - # Format as time range for transcript display time_str = ( f"[{format_timestamp(segment.start_time)} - {format_timestamp(segment.end_time)}]" ) - - # Style based on finality color = ft.Colors.BLACK if segment.is_final else ft.Colors.GREY_600 weight = ft.FontWeight.NORMAL if segment.is_final else ft.FontWeight.W_300 - - # Build row content with optional speaker label row_controls: list[ft.Control] = [ ft.Text(time_str, size=11, color=ft.Colors.GREY_500, width=120), ] - - # Add speaker label if present if segment.speaker_id: speaker_color = self._get_speaker_color(segment.speaker_id) row_controls.append( @@ -26093,7 +12398,6 @@ class TranscriptComponent: margin=ft.margin.only(right=8), ) ) - row_controls.append( ft.Text( segment.text, @@ -26103,10 +12407,7 @@ class TranscriptComponent: expand=True, ) ) - row = ft.Row(row_controls) - - # Wrap in container for click handling and highlighting return ft.Container( content=row, padding=5, @@ -26114,17 +12415,7 @@ class TranscriptComponent: on_click=lambda e, idx=segment_index: self._handle_click(idx), ink=True, ) - def _get_speaker_color(self, speaker_id: str) -> str: - """Get consistent color for a speaker. - - Args: - speaker_id: Speaker identifier. - - Returns: - Color string for the speaker label. - """ - # Use hash to get consistent color index colors = [ ft.Colors.BLUE_400, ft.Colors.GREEN_400, @@ -26137,32 +12428,18 @@ class TranscriptComponent: ] digest = hashlib.md5(speaker_id.encode("utf-8")).hexdigest() return colors[int(digest, 16) % len(colors)] - def _handle_click(self, segment_index: int) -> None: - """Handle segment row click. - - Args: - segment_index: Index of clicked segment. - """ if self._on_segment_click: self._on_segment_click(segment_index) - def _render_server_info(self, info: ServerInfo) -> None: - """Render server info (UI thread only). - - Args: - info: Server info to display. - """ if not self._list_view: return - asr_status = "ready" if info.asr_ready else "not ready" info_text = ( f"Connected to server v{info.version} | " f"ASR: {info.asr_model} ({asr_status}) | " f"Active meetings: {info.active_meetings}" ) - self._list_view.controls.append( ft.Text( info_text, @@ -26172,13 +12449,7 @@ class TranscriptComponent: ) ) self._state.request_update() - def update_highlight(self, highlighted_index: int | None) -> None: - """Update visual highlight on segments. - - Args: - highlighted_index: Index of segment to highlight, or None to clear. - """ for idx, container in enumerate(self._segment_rows): if container is None: continue @@ -26188,27 +12459,15 @@ class TranscriptComponent: else: container.bgcolor = None container.border = None - - # Scroll to highlighted segment if highlighted_index is not None: self._scroll_to_segment(highlighted_index) - self._state.request_update() - def _scroll_to_segment(self, segment_index: int) -> None: - """Scroll ListView to show specified segment. - - Args: - segment_index: Index of segment to scroll to. - """ if not self._list_view or segment_index >= len(self._segment_rows): return - container = self._segment_rows[segment_index] if container is None: return - - # Estimate row height for scroll calculation estimated_row_height = 50 offset = segment_index * estimated_row_height self._list_view.scroll_to(offset=offset, duration=200) @@ -26216,27 +12475,16 @@ class TranscriptComponent: ## File: src/noteflow/config/settings.py ````python -"""NoteFlow application settings using Pydantic settings.""" - from __future__ import annotations - import json from functools import lru_cache from pathlib import Path from typing import Annotated, cast - from pydantic import Field, PostgresDsn, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict - - def _default_meetings_dir() -> Path: - """Return default meetings directory path.""" return Path.home() / ".noteflow" / "meetings" - - class TriggerSettings(BaseSettings): - """Client trigger settings loaded from environment variables.""" - model_config = SettingsConfigDict( env_prefix="NOTEFLOW_", env_file=".env", @@ -26244,8 +12492,6 @@ class TriggerSettings(BaseSettings): enable_decoding=False, extra="ignore", ) - - # Trigger settings (client-side) trigger_enabled: Annotated[ bool, Field(default=False, description="Enable smart recording triggers (opt-in)"), @@ -26274,8 +12520,6 @@ class TriggerSettings(BaseSettings): float, Field(default=0.80, ge=0.0, le=1.0, description="Confidence to auto-start recording"), ] - - # App audio trigger tuning (system output from whitelisted apps) trigger_audio_enabled: Annotated[ bool, Field(default=True, description="Enable app audio activity detection"), @@ -26300,8 +12544,6 @@ class TriggerSettings(BaseSettings): int, Field(default=50, ge=10, le=1000, description="Max audio activity samples to retain"), ] - - # Calendar trigger tuning (optional integration) trigger_calendar_enabled: Annotated[ bool, Field(default=False, description="Enable calendar-based trigger detection"), @@ -26321,8 +12563,6 @@ class TriggerSettings(BaseSettings): description="Calendar events as JSON list of {start, end, title}", ), ] - - # Foreground app trigger tuning trigger_foreground_enabled: Annotated[ bool, Field(default=True, description="Enable foreground app detection"), @@ -26352,8 +12592,6 @@ class TriggerSettings(BaseSettings): list[str], Field(default_factory=list, description="Meeting app substrings to ignore"), ] - - # Signal weights trigger_weight_audio: Annotated[ float, Field(default=0.30, ge=0.0, le=1.0, description="Audio signal confidence weight"), @@ -26371,7 +12609,6 @@ class TriggerSettings(BaseSettings): float, Field(default=0.30, ge=0.0, le=1.0, description="Calendar signal confidence weight"), ] - @field_validator("trigger_meeting_apps", "trigger_suppressed_apps", mode="before") @classmethod def _parse_csv_list(cls, value: object) -> list[str]: @@ -26386,7 +12623,6 @@ class TriggerSettings(BaseSettings): if isinstance(parsed, list): return [str(item).strip() for item in parsed if str(item).strip()] return [item.strip() for item in value.split(",") if item.strip()] - @field_validator("trigger_calendar_events", mode="before") @classmethod def _parse_calendar_events(cls, value: object) -> list[dict[str, object]]: @@ -26408,27 +12644,7 @@ class TriggerSettings(BaseSettings): if isinstance(value, list): return [item for item in value if isinstance(item, dict)] return [] - - class Settings(TriggerSettings): - """Application settings loaded from environment variables. - - Environment variables: - NOTEFLOW_DATABASE_URL: PostgreSQL connection URL - Example: postgresql+asyncpg://user:pass@host:5432/dbname? - options=-csearch_path%3Dnoteflow - NOTEFLOW_DB_POOL_SIZE: Connection pool size (default: 5) - NOTEFLOW_DB_ECHO: Echo SQL statements (default: False) - NOTEFLOW_ASR_MODEL_SIZE: Whisper model size (default: base) - NOTEFLOW_ASR_DEVICE: ASR device (default: cpu) - NOTEFLOW_ASR_COMPUTE_TYPE: ASR compute type (default: int8) - NOTEFLOW_MEETINGS_DIR: Directory for meeting audio storage (default: ~/.noteflow/meetings) - NOTEFLOW_RETENTION_ENABLED: Enable automatic retention policy (default: False) - NOTEFLOW_RETENTION_DAYS: Days to retain completed meetings (default: 90) - NOTEFLOW_RETENTION_CHECK_INTERVAL_HOURS: Hours between retention checks (default: 24) - """ - - # Database settings database_url: Annotated[ PostgresDsn, Field( @@ -26444,8 +12660,6 @@ class Settings(TriggerSettings): bool, Field(default=False, description="Echo SQL statements to log"), ] - - # ASR settings asr_model_size: Annotated[ str, Field(default="base", description="Whisper model size"), @@ -26458,14 +12672,10 @@ class Settings(TriggerSettings): str, Field(default="int8", description="ASR compute type"), ] - - # Server settings grpc_port: Annotated[ int, Field(default=50051, ge=1, le=65535, description="gRPC server port"), ] - - # Storage settings meetings_dir: Annotated[ Path, Field( @@ -26473,8 +12683,6 @@ class Settings(TriggerSettings): description="Directory for meeting audio and metadata storage", ), ] - - # Retention settings retention_enabled: Annotated[ bool, Field(default=False, description="Enable automatic retention policy"), @@ -26487,8 +12695,6 @@ class Settings(TriggerSettings): int, Field(default=24, ge=1, le=168, description="Hours between retention checks"), ] - - # Diarization settings diarization_enabled: Annotated[ bool, Field(default=False, description="Enable speaker diarization"), @@ -26517,56 +12723,24 @@ class Settings(TriggerSettings): bool, Field(default=True, description="Enable post-meeting diarization refinement"), ] - @property def database_url_str(self) -> str: - """Return database URL as string.""" return str(self.database_url) - - def _load_settings() -> Settings: - """Load settings from environment. - - Returns: - Settings instance. - - Raises: - ValidationError: If required environment variables are not set. - """ - # pydantic-settings reads from environment; model_validate handles this return cast("Settings", Settings.model_validate({})) - - def _load_trigger_settings() -> TriggerSettings: - """Load trigger settings from environment.""" return cast("TriggerSettings", TriggerSettings.model_validate({})) - - @lru_cache def get_settings() -> Settings: - """Get cached settings instance. - - Returns: - Cached Settings instance loaded from environment. - - Raises: - ValidationError: If required environment variables are not set. - """ return _load_settings() - - @lru_cache def get_trigger_settings() -> TriggerSettings: - """Get cached trigger settings instance.""" return _load_trigger_settings() ```` ## File: src/noteflow/grpc/client.py ````python -"""NoteFlow gRPC client for Flet app integration.""" - from __future__ import annotations - import logging import queue import threading @@ -26574,41 +12748,27 @@ import time from collections.abc import Callable, Iterator from dataclasses import dataclass from typing import TYPE_CHECKING, Final - import grpc - from noteflow.config.constants import DEFAULT_SAMPLE_RATE - from .proto import noteflow_pb2, noteflow_pb2_grpc - if TYPE_CHECKING: import numpy as np from numpy.typing import NDArray - logger = logging.getLogger(__name__) - DEFAULT_SERVER: Final[str] = "localhost:50051" -CHUNK_TIMEOUT: Final[float] = 0.1 # Timeout for getting chunks from queue - - +CHUNK_TIMEOUT: Final[float] = 0.1 @dataclass class TranscriptSegment: - """Transcript segment from server.""" - segment_id: int text: str start_time: float end_time: float language: str is_final: bool - speaker_id: str = "" # Speaker identifier from diarization - speaker_confidence: float = 0.0 # Speaker assignment confidence - - + speaker_id: str = "" + speaker_confidence: float = 0.0 @dataclass class ServerInfo: - """Server information.""" - version: str asr_model: str asr_ready: bool @@ -26616,12 +12776,8 @@ class ServerInfo: active_meetings: int diarization_enabled: bool = False diarization_ready: bool = False - - @dataclass class MeetingInfo: - """Meeting information.""" - id: str title: str state: str @@ -26630,12 +12786,8 @@ class MeetingInfo: ended_at: float duration_seconds: float segment_count: int - - @dataclass class AnnotationInfo: - """Annotation information.""" - id: str meeting_id: str annotation_type: str @@ -26644,123 +12796,56 @@ class AnnotationInfo: end_time: float segment_ids: list[int] created_at: float - - @dataclass class ExportResult: - """Export result.""" - content: str format_name: str file_extension: str - - @dataclass class DiarizationResult: - """Result of speaker diarization refinement.""" - job_id: str status: str segments_updated: int speaker_ids: list[str] error_message: str = "" - @property def success(self) -> bool: - """Check if diarization succeeded.""" return self.status == "completed" and not self.error_message - @property def is_terminal(self) -> bool: - """Check if job reached a terminal state.""" return self.status in {"completed", "failed"} - - @dataclass class RenameSpeakerResult: - """Result of speaker rename operation.""" - segments_updated: int success: bool - - -# Callback types TranscriptCallback = Callable[[TranscriptSegment], None] ConnectionCallback = Callable[[bool, str], None] - - class NoteFlowClient: - """gRPC client for NoteFlow server. - - Provides async-safe methods for Flet app integration. - """ - def __init__( self, server_address: str = DEFAULT_SERVER, on_transcript: TranscriptCallback | None = None, on_connection_change: ConnectionCallback | None = None, ) -> None: - """Initialize the client. - - Args: - server_address: Server address (host:port). - on_transcript: Callback for transcript updates. - on_connection_change: Callback for connection state changes. - """ self._server_address = server_address self._on_transcript = on_transcript self._on_connection_change = on_connection_change - self._channel: grpc.Channel | None = None self._stub: noteflow_pb2_grpc.NoteFlowServiceStub | None = None self._connected = False - - # Streaming state self._stream_thread: threading.Thread | None = None self._audio_queue: queue.Queue[tuple[str, NDArray[np.float32], float]] = queue.Queue() self._stop_streaming = threading.Event() self._current_meeting_id: str | None = None - @property def connected(self) -> bool: - """Check if connected to server.""" return self._connected - @property def server_address(self) -> str: - """Get server address.""" return self._server_address - def connect(self, timeout: float = 5.0) -> bool: - """Connect to the server. - - Args: - timeout: Connection timeout in seconds. - - Returns: - True if connected successfully. - """ try: - self._channel = grpc.insecure_channel( - self._server_address, - options=[ - ("grpc.max_send_message_length", 100 * 1024 * 1024), - ("grpc.max_receive_message_length", 100 * 1024 * 1024), - ], - ) - - # Wait for channel to be ready - grpc.channel_ready_future(self._channel).result(timeout=timeout) - - self._stub = noteflow_pb2_grpc.NoteFlowServiceStub(self._channel) - self._connected = True - - logger.info("Connected to server at %s", self._server_address) - self._notify_connection(True, "Connected") - - return True - + return self._extracted_from_connect_11(timeout) except grpc.FutureTimeoutError: logger.error("Connection timeout: %s", self._server_address) self._notify_connection(False, "Connection timeout") @@ -26769,29 +12854,32 @@ class NoteFlowClient: logger.error("Connection failed: %s", e) self._notify_connection(False, str(e)) return False - + def _extracted_from_connect_11(self, timeout): + self._channel = grpc.insecure_channel( + self._server_address, + options=[ + ("grpc.max_send_message_length", 100 * 1024 * 1024), + ("grpc.max_receive_message_length", 100 * 1024 * 1024), + ], + ) + grpc.channel_ready_future(self._channel).result(timeout=timeout) + self._stub = noteflow_pb2_grpc.NoteFlowServiceStub(self._channel) + self._connected = True + logger.info("Connected to server at %s", self._server_address) + self._notify_connection(True, "Connected") + return True def disconnect(self) -> None: - """Disconnect from the server.""" self.stop_streaming() - if self._channel: self._channel.close() self._channel = None self._stub = None - self._connected = False logger.info("Disconnected from server") self._notify_connection(False, "Disconnected") - def get_server_info(self) -> ServerInfo | None: - """Get server information. - - Returns: - ServerInfo or None if request fails. - """ if not self._stub: return None - try: response = self._stub.GetServerInfo(noteflow_pb2.ServerInfoRequest()) return ServerInfo( @@ -26806,19 +12894,9 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to get server info: %s", e) return None - def create_meeting(self, title: str = "") -> MeetingInfo | None: - """Create a new meeting. - - Args: - title: Optional meeting title. - - Returns: - MeetingInfo or None if request fails. - """ if not self._stub: return None - try: request = noteflow_pb2.CreateMeetingRequest(title=title) response = self._stub.CreateMeeting(request) @@ -26826,19 +12904,9 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to create meeting: %s", e) return None - def stop_meeting(self, meeting_id: str) -> MeetingInfo | None: - """Stop a meeting. - - Args: - meeting_id: Meeting ID. - - Returns: - Updated MeetingInfo or None if request fails. - """ if not self._stub: return None - try: request = noteflow_pb2.StopMeetingRequest(meeting_id=meeting_id) response = self._stub.StopMeeting(request) @@ -26846,19 +12914,9 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to stop meeting: %s", e) return None - def get_meeting(self, meeting_id: str) -> MeetingInfo | None: - """Get meeting details. - - Args: - meeting_id: Meeting ID. - - Returns: - MeetingInfo or None if not found. - """ if not self._stub: return None - try: request = noteflow_pb2.GetMeetingRequest( meeting_id=meeting_id, @@ -26870,21 +12928,9 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to get meeting: %s", e) return None - def get_meeting_segments(self, meeting_id: str) -> list[TranscriptSegment]: - """Retrieve transcript segments for a meeting. - - Uses existing GetMeetingRequest with include_segments=True. - - Args: - meeting_id: Meeting ID. - - Returns: - List of TranscriptSegment or empty list if not found. - """ if not self._stub: return [] - try: request = noteflow_pb2.GetMeetingRequest( meeting_id=meeting_id, @@ -26908,19 +12954,9 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to get meeting segments: %s", e) return [] - def list_meetings(self, limit: int = 20) -> list[MeetingInfo]: - """List recent meetings. - - Args: - limit: Maximum number to return. - - Returns: - List of MeetingInfo. - """ if not self._stub: return [] - try: request = noteflow_pb2.ListMeetingsRequest( limit=limit, @@ -26931,74 +12967,43 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to list meetings: %s", e) return [] - def start_streaming(self, meeting_id: str) -> bool: - """Start streaming audio for a meeting. - - Args: - meeting_id: Meeting ID to stream to. - - Returns: - True if streaming started. - """ if not self._stub: logger.error("Not connected") return False - if self._stream_thread and self._stream_thread.is_alive(): logger.warning("Already streaming") return False - self._current_meeting_id = meeting_id self._stop_streaming.clear() - - # Clear any pending audio while not self._audio_queue.empty(): try: self._audio_queue.get_nowait() except queue.Empty: break - - # Start streaming thread self._stream_thread = threading.Thread( target=self._stream_worker, daemon=True, ) self._stream_thread.start() - logger.info("Started streaming for meeting %s", meeting_id) return True - def stop_streaming(self) -> None: - """Stop streaming audio.""" self._stop_streaming.set() - if self._stream_thread: self._stream_thread.join(timeout=2.0) self._stream_thread = None - self._current_meeting_id = None logger.info("Stopped streaming") - def send_audio( self, audio: NDArray[np.float32], timestamp: float | None = None, ) -> None: - """Send audio chunk to server. - - Non-blocking - queues audio for streaming thread. - - Args: - audio: Audio samples (float32, mono, 16kHz). - timestamp: Optional capture timestamp. - """ if not self._current_meeting_id: return - if timestamp is None: timestamp = time.time() - self._audio_queue.put( ( self._current_meeting_id, @@ -27006,14 +13011,10 @@ class NoteFlowClient: timestamp, ) ) - def _stream_worker(self) -> None: - """Background thread for audio streaming.""" if not self._stub: return - def audio_generator() -> Iterator[noteflow_pb2.AudioChunk]: - """Generate audio chunks from queue.""" while not self._stop_streaming.is_set(): try: meeting_id, audio, timestamp = self._audio_queue.get( @@ -27028,16 +13029,11 @@ class NoteFlowClient: ) except queue.Empty: continue - try: - # Start bidirectional stream responses = self._stub.StreamTranscription(audio_generator()) - - # Process responses for response in responses: if self._stop_streaming.is_set(): break - if response.update_type == noteflow_pb2.UPDATE_TYPE_FINAL: segment = TranscriptSegment( segment_id=response.segment.segment_id, @@ -27050,7 +13046,6 @@ class NoteFlowClient: speaker_confidence=response.segment.speaker_confidence, ) self._notify_transcript(segment) - elif response.update_type == noteflow_pb2.UPDATE_TYPE_PARTIAL: segment = TranscriptSegment( segment_id=0, @@ -27061,46 +13056,23 @@ class NoteFlowClient: is_final=False, ) self._notify_transcript(segment) - except grpc.RpcError as e: logger.error("Stream error: %s", e) self._notify_connection(False, f"Stream error: {e}") - def _notify_transcript(self, segment: TranscriptSegment) -> None: - """Notify transcript callback. - - Args: - segment: Transcript segment. - """ if self._on_transcript: try: self._on_transcript(segment) except Exception as e: logger.error("Transcript callback error: %s", e) - def _notify_connection(self, connected: bool, message: str) -> None: - """Notify connection callback. - - Args: - connected: Connection state. - message: Status message. - """ if self._on_connection_change: try: self._on_connection_change(connected, message) except Exception as e: logger.error("Connection callback error: %s", e) - @staticmethod def _proto_to_meeting_info(meeting: noteflow_pb2.Meeting) -> MeetingInfo: - """Convert proto Meeting to MeetingInfo. - - Args: - meeting: Proto meeting. - - Returns: - MeetingInfo dataclass. - """ state_map = { noteflow_pb2.MEETING_STATE_UNSPECIFIED: "unknown", noteflow_pb2.MEETING_STATE_CREATED: "created", @@ -27109,7 +13081,6 @@ class NoteFlowClient: noteflow_pb2.MEETING_STATE_COMPLETED: "completed", noteflow_pb2.MEETING_STATE_ERROR: "error", } - return MeetingInfo( id=meeting.id, title=meeting.title, @@ -27120,11 +13091,6 @@ class NoteFlowClient: duration_seconds=meeting.duration_seconds, segment_count=len(meeting.segments), ) - - # ========================================================================= - # Annotation Methods - # ========================================================================= - def add_annotation( self, meeting_id: str, @@ -27134,22 +13100,8 @@ class NoteFlowClient: end_time: float, segment_ids: list[int] | None = None, ) -> AnnotationInfo | None: - """Add an annotation to a meeting. - - Args: - meeting_id: Meeting ID. - annotation_type: Type of annotation (action_item, decision, note). - text: Annotation text. - start_time: Start time in seconds. - end_time: End time in seconds. - segment_ids: Optional list of linked segment IDs. - - Returns: - AnnotationInfo or None if request fails. - """ if not self._stub: return None - try: proto_type = self._annotation_type_to_proto(annotation_type) request = noteflow_pb2.AddAnnotationRequest( @@ -27165,19 +13117,9 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to add annotation: %s", e) return None - def get_annotation(self, annotation_id: str) -> AnnotationInfo | None: - """Get an annotation by ID. - - Args: - annotation_id: Annotation ID. - - Returns: - AnnotationInfo or None if not found. - """ if not self._stub: return None - try: request = noteflow_pb2.GetAnnotationRequest(annotation_id=annotation_id) response = self._stub.GetAnnotation(request) @@ -27185,26 +13127,14 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to get annotation: %s", e) return None - def list_annotations( self, meeting_id: str, start_time: float = 0, end_time: float = 0, ) -> list[AnnotationInfo]: - """List annotations for a meeting. - - Args: - meeting_id: Meeting ID. - start_time: Optional start time filter. - end_time: Optional end time filter. - - Returns: - List of AnnotationInfo. - """ if not self._stub: return [] - try: request = noteflow_pb2.ListAnnotationsRequest( meeting_id=meeting_id, @@ -27216,7 +13146,6 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to list annotations: %s", e) return [] - def update_annotation( self, annotation_id: str, @@ -27226,22 +13155,8 @@ class NoteFlowClient: end_time: float | None = None, segment_ids: list[int] | None = None, ) -> AnnotationInfo | None: - """Update an existing annotation. - - Args: - annotation_id: Annotation ID. - annotation_type: Optional new type. - text: Optional new text. - start_time: Optional new start time. - end_time: Optional new end time. - segment_ids: Optional new segment IDs. - - Returns: - Updated AnnotationInfo or None if request fails. - """ if not self._stub: return None - try: proto_type = ( self._annotation_type_to_proto(annotation_type) @@ -27261,19 +13176,9 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to update annotation: %s", e) return None - def delete_annotation(self, annotation_id: str) -> bool: - """Delete an annotation. - - Args: - annotation_id: Annotation ID. - - Returns: - True if deleted successfully. - """ if not self._stub: return False - try: request = noteflow_pb2.DeleteAnnotationRequest(annotation_id=annotation_id) response = self._stub.DeleteAnnotation(request) @@ -27281,19 +13186,10 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to delete annotation: %s", e) return False - @staticmethod def _proto_to_annotation_info( annotation: noteflow_pb2.Annotation, ) -> AnnotationInfo: - """Convert proto Annotation to AnnotationInfo. - - Args: - annotation: Proto annotation. - - Returns: - AnnotationInfo dataclass. - """ type_map = { noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED: "note", noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM: "action_item", @@ -27301,7 +13197,6 @@ class NoteFlowClient: noteflow_pb2.ANNOTATION_TYPE_NOTE: "note", noteflow_pb2.ANNOTATION_TYPE_RISK: "risk", } - return AnnotationInfo( id=annotation.id, meeting_id=annotation.meeting_id, @@ -27312,17 +13207,8 @@ class NoteFlowClient: segment_ids=list(annotation.segment_ids), created_at=annotation.created_at, ) - @staticmethod def _annotation_type_to_proto(annotation_type: str) -> int: - """Convert annotation type string to proto enum. - - Args: - annotation_type: Type string. - - Returns: - Proto enum value. - """ type_map = { "action_item": noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM, "decision": noteflow_pb2.ANNOTATION_TYPE_DECISION, @@ -27330,28 +13216,13 @@ class NoteFlowClient: "risk": noteflow_pb2.ANNOTATION_TYPE_RISK, } return type_map.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_NOTE) - - # ========================================================================= - # Export Methods - # ========================================================================= - def export_transcript( self, meeting_id: str, format_name: str = "markdown", ) -> ExportResult | None: - """Export meeting transcript. - - Args: - meeting_id: Meeting ID. - format_name: Export format (markdown, html). - - Returns: - ExportResult or None if request fails. - """ if not self._stub: return None - try: proto_format = self._export_format_to_proto(format_name) request = noteflow_pb2.ExportTranscriptRequest( @@ -27367,27 +13238,16 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to export transcript: %s", e) return None - @staticmethod def _export_format_to_proto(format_name: str) -> int: - """Convert export format string to proto enum. - - Args: - format_name: Format string. - - Returns: - Proto enum value. - """ format_map = { "markdown": noteflow_pb2.EXPORT_FORMAT_MARKDOWN, "md": noteflow_pb2.EXPORT_FORMAT_MARKDOWN, "html": noteflow_pb2.EXPORT_FORMAT_HTML, } return format_map.get(format_name.lower(), noteflow_pb2.EXPORT_FORMAT_MARKDOWN) - @staticmethod def _job_status_to_str(status: int) -> str: - """Convert job status enum to string.""" status_map = { noteflow_pb2.JOB_STATUS_UNSPECIFIED: "unspecified", noteflow_pb2.JOB_STATUS_QUEUED: "queued", @@ -27396,31 +13256,13 @@ class NoteFlowClient: noteflow_pb2.JOB_STATUS_FAILED: "failed", } return status_map.get(status, "unspecified") - - # ========================================================================= - # Speaker Diarization Methods - # ========================================================================= - def refine_speaker_diarization( self, meeting_id: str, num_speakers: int | None = None, ) -> DiarizationResult | None: - """Run post-meeting speaker diarization refinement. - - Requests the server to run offline diarization on the meeting audio - as a background job and update segment speaker assignments. - - Args: - meeting_id: Meeting ID. - num_speakers: Optional known number of speakers (auto-detect if None). - - Returns: - DiarizationResult with job status or None if request fails. - """ if not self._stub: return None - try: request = noteflow_pb2.RefineSpeakerDiarizationRequest( meeting_id=meeting_id, @@ -27437,12 +13279,9 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to refine speaker diarization: %s", e) return None - def get_diarization_job_status(self, job_id: str) -> DiarizationResult | None: - """Get status for a diarization background job.""" if not self._stub: return None - try: request = noteflow_pb2.GetDiarizationJobStatusRequest(job_id=job_id) response = self._stub.GetDiarizationJobStatus(request) @@ -27456,26 +13295,14 @@ class NoteFlowClient: except grpc.RpcError as e: logger.error("Failed to get diarization job status: %s", e) return None - def rename_speaker( self, meeting_id: str, old_speaker_id: str, new_speaker_name: str, ) -> RenameSpeakerResult | None: - """Rename a speaker in all segments of a meeting. - - Args: - meeting_id: Meeting ID. - old_speaker_id: Current speaker ID (e.g., "SPEAKER_00"). - new_speaker_name: New speaker name (e.g., "Alice"). - - Returns: - RenameSpeakerResult or None if request fails. - """ if not self._stub: return None - try: request = noteflow_pb2.RenameSpeakerRequest( meeting_id=meeting_id, @@ -27492,620 +13319,9 @@ class NoteFlowClient: return None ```` -## File: tests/integration/test_repositories.py -````python -"""Integration tests for SQLAlchemy repositories.""" - -from __future__ import annotations - -from datetime import UTC, datetime -from typing import TYPE_CHECKING -from uuid import uuid4 - -import pytest - -from noteflow.domain.entities import Annotation, Meeting, Segment, Summary, WordTiming -from noteflow.domain.entities.summary import ActionItem, KeyPoint -from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState -from noteflow.infrastructure.persistence.repositories import ( - SqlAlchemyAnnotationRepository, - SqlAlchemyMeetingRepository, - SqlAlchemySegmentRepository, - SqlAlchemySummaryRepository, -) - -if TYPE_CHECKING: - from sqlalchemy.ext.asyncio import AsyncSession - - -@pytest.mark.integration -class TestMeetingRepository: - """Integration tests for SqlAlchemyMeetingRepository.""" - - async def test_create_and_get_meeting(self, session: AsyncSession) -> None: - """Test creating and retrieving a meeting.""" - repo = SqlAlchemyMeetingRepository(session) - meeting = Meeting.create(title="Test Meeting", metadata={"key": "value"}) - - # Create - await repo.create(meeting) - await session.commit() - - # Get - retrieved = await repo.get(meeting.id) - - assert retrieved is not None - assert retrieved.id == meeting.id - assert retrieved.title == "Test Meeting" - assert retrieved.state == MeetingState.CREATED - assert retrieved.metadata == {"key": "value"} - - async def test_get_meeting_not_found(self, session: AsyncSession) -> None: - """Test retrieving non-existent meeting returns None.""" - repo = SqlAlchemyMeetingRepository(session) - meeting_id = MeetingId(Meeting.create().id) - - result = await repo.get(meeting_id) - - assert result is None - - async def test_update_meeting(self, session: AsyncSession) -> None: - """Test updating a meeting.""" - repo = SqlAlchemyMeetingRepository(session) - meeting = Meeting.create(title="Original") - await repo.create(meeting) - await session.commit() - - # Update state and title - meeting.start_recording() - await repo.update(meeting) - await session.commit() - - # Verify - retrieved = await repo.get(meeting.id) - assert retrieved is not None - assert retrieved.state == MeetingState.RECORDING - assert retrieved.started_at is not None - - async def test_delete_meeting(self, session: AsyncSession) -> None: - """Test deleting a meeting.""" - repo = SqlAlchemyMeetingRepository(session) - meeting = Meeting.create(title="To Delete") - await repo.create(meeting) - await session.commit() - - # Delete - result = await repo.delete(meeting.id) - await session.commit() - - assert result is True - - # Verify deleted - retrieved = await repo.get(meeting.id) - assert retrieved is None - - async def test_delete_meeting_not_found(self, session: AsyncSession) -> None: - """Test deleting non-existent meeting returns False.""" - repo = SqlAlchemyMeetingRepository(session) - meeting_id = MeetingId(Meeting.create().id) - - result = await repo.delete(meeting_id) - - assert result is False - - async def test_list_all_meetings(self, session: AsyncSession) -> None: - """Test listing all meetings with pagination.""" - repo = SqlAlchemyMeetingRepository(session) - - # Create multiple meetings - meetings = [Meeting.create(title=f"Meeting {i}") for i in range(5)] - for m in meetings: - await repo.create(m) - await session.commit() - - # List with pagination - result, total = await repo.list_all(limit=3, offset=0) - - assert len(result) == 3 - assert total == 5 - - async def test_list_meetings_filter_by_state(self, session: AsyncSession) -> None: - """Test filtering meetings by state.""" - repo = SqlAlchemyMeetingRepository(session) - - # Create meetings in different states - created = Meeting.create(title="Created") - await repo.create(created) - - recording = Meeting.create(title="Recording") - recording.start_recording() - await repo.create(recording) - await session.commit() - - # Filter by RECORDING state - result, _ = await repo.list_all(states=[MeetingState.RECORDING]) - - assert len(result) == 1 - assert result[0].title == "Recording" - - async def test_count_by_state(self, session: AsyncSession) -> None: - """Test counting meetings by state.""" - repo = SqlAlchemyMeetingRepository(session) - - # Create meetings - for _ in range(3): - await repo.create(Meeting.create()) - await session.commit() - - count = await repo.count_by_state(MeetingState.CREATED) - - assert count == 3 - - -@pytest.mark.integration -class TestSegmentRepository: - """Integration tests for SqlAlchemySegmentRepository.""" - - async def test_add_and_get_segments(self, session: AsyncSession) -> None: - """Test adding and retrieving segments.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - segment_repo = SqlAlchemySegmentRepository(session) - - # Create meeting first - meeting = Meeting.create(title="Test") - await meeting_repo.create(meeting) - await session.commit() - - # Add segments - segment = Segment( - segment_id=0, - text="Hello world", - start_time=0.0, - end_time=2.5, - meeting_id=meeting.id, - language="en", - ) - await segment_repo.add(meeting.id, segment) - await session.commit() - - # Get segments - result = await segment_repo.get_by_meeting(meeting.id) - - assert len(result) == 1 - assert result[0].text == "Hello world" - assert result[0].db_id is not None - - async def test_add_segment_with_words(self, session: AsyncSession) -> None: - """Test adding segment with word-level timing.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - segment_repo = SqlAlchemySegmentRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - words = [ - WordTiming(word="Hello", start_time=0.0, end_time=0.5, probability=0.95), - WordTiming(word="world", start_time=0.5, end_time=1.0, probability=0.98), - ] - segment = Segment( - segment_id=0, - text="Hello world", - start_time=0.0, - end_time=1.0, - meeting_id=meeting.id, - words=words, - ) - await segment_repo.add(meeting.id, segment) - await session.commit() - - result = await segment_repo.get_by_meeting(meeting.id, include_words=True) - - assert len(result[0].words) == 2 - assert result[0].words[0].word == "Hello" - - async def test_add_batch_segments(self, session: AsyncSession) -> None: - """Test batch adding segments.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - segment_repo = SqlAlchemySegmentRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - segments = [ - Segment(segment_id=i, text=f"Segment {i}", start_time=float(i), end_time=float(i + 1)) - for i in range(3) - ] - await segment_repo.add_batch(meeting.id, segments) - await session.commit() - - result = await segment_repo.get_by_meeting(meeting.id) - - assert len(result) == 3 - - async def test_get_next_segment_id(self, session: AsyncSession) -> None: - """Test get_next_segment_id returns max + 1 or 0 when empty.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - segment_repo = SqlAlchemySegmentRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - assert await segment_repo.get_next_segment_id(meeting.id) == 0 - - segments = [ - Segment(segment_id=0, text="Segment 0", start_time=0.0, end_time=1.0), - Segment(segment_id=5, text="Segment 5", start_time=1.0, end_time=2.0), - ] - await segment_repo.add_batch(meeting.id, segments) - await session.commit() - - assert await segment_repo.get_next_segment_id(meeting.id) == 6 - - async def test_update_embedding_and_retrieve(self, session: AsyncSession) -> None: - """Test updating a segment embedding persists to the database.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - segment_repo = SqlAlchemySegmentRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - segment = Segment(segment_id=0, text="Hello", start_time=0.0, end_time=1.0) - await segment_repo.add(meeting.id, segment) - await session.commit() - - assert segment.db_id is not None - embedding = [0.1] * 1536 - await segment_repo.update_embedding(segment.db_id, embedding) - await session.commit() - - result = await segment_repo.get_by_meeting(meeting.id) - assert result[0].embedding == pytest.approx(embedding) - - async def test_search_semantic_orders_by_similarity(self, session: AsyncSession) -> None: - """Test semantic search returns closest matches first.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - segment_repo = SqlAlchemySegmentRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - emb1 = [1.0] + [0.0] * 1535 - emb2 = [0.0, 1.0] + [0.0] * 1534 - - segment1 = Segment( - segment_id=0, - text="First", - start_time=0.0, - end_time=1.0, - embedding=emb1, - ) - segment2 = Segment( - segment_id=1, - text="Second", - start_time=1.0, - end_time=2.0, - embedding=emb2, - ) - await segment_repo.add_batch(meeting.id, [segment1, segment2]) - await session.commit() - - results = await segment_repo.search_semantic(query_embedding=emb1, limit=2) - assert len(results) == 2 - assert results[0][0].segment_id == 0 - assert results[0][1] >= results[1][1] - - -@pytest.mark.integration -class TestSummaryRepository: - """Integration tests for SqlAlchemySummaryRepository.""" - - async def test_save_and_get_summary(self, session: AsyncSession) -> None: - """Test saving and retrieving summary.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - summary_repo = SqlAlchemySummaryRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - summary = Summary( - meeting_id=meeting.id, - executive_summary="This was a productive meeting.", - generated_at=datetime.now(UTC), - model_version="test-v1", - ) - await summary_repo.save(summary) - await session.commit() - - result = await summary_repo.get_by_meeting(meeting.id) - - assert result is not None - assert result.executive_summary == "This was a productive meeting." - assert result.model_version == "test-v1" - - async def test_save_summary_with_key_points(self, session: AsyncSession) -> None: - """Test saving summary with key points.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - summary_repo = SqlAlchemySummaryRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - key_points = [ - KeyPoint(text="Point 1", segment_ids=[0, 1]), - KeyPoint(text="Point 2", segment_ids=[2]), - ] - summary = Summary( - meeting_id=meeting.id, - executive_summary="Summary", - key_points=key_points, - ) - await summary_repo.save(summary) - await session.commit() - - result = await summary_repo.get_by_meeting(meeting.id) - - assert result is not None - assert len(result.key_points) == 2 - assert result.key_points[0].text == "Point 1" - - async def test_save_summary_with_action_items(self, session: AsyncSession) -> None: - """Test saving summary with action items.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - summary_repo = SqlAlchemySummaryRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - action_items = [ - ActionItem(text="Review PR", assignee="Alice", priority=2), - ] - summary = Summary( - meeting_id=meeting.id, - executive_summary="Summary", - action_items=action_items, - ) - await summary_repo.save(summary) - await session.commit() - - result = await summary_repo.get_by_meeting(meeting.id) - - assert result is not None - assert len(result.action_items) == 1 - assert result.action_items[0].text == "Review PR" - assert result.action_items[0].assignee == "Alice" - - async def test_delete_summary(self, session: AsyncSession) -> None: - """Test deleting summary.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - summary_repo = SqlAlchemySummaryRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - summary = Summary(meeting_id=meeting.id, executive_summary="To delete") - await summary_repo.save(summary) - await session.commit() - - result = await summary_repo.delete_by_meeting(meeting.id) - await session.commit() - - assert result is True - - retrieved = await summary_repo.get_by_meeting(meeting.id) - assert retrieved is None - - async def test_update_summary_replaces_items(self, session: AsyncSession) -> None: - """Test saving a summary twice replaces key points and action items.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - summary_repo = SqlAlchemySummaryRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - summary_v1 = Summary( - meeting_id=meeting.id, - executive_summary="v1", - key_points=[KeyPoint(text="Old KP")], - action_items=[ActionItem(text="Old AI")], - ) - await summary_repo.save(summary_v1) - await session.commit() - - summary_v2 = Summary( - meeting_id=meeting.id, - executive_summary="v2", - key_points=[KeyPoint(text="New KP")], - action_items=[ActionItem(text="New AI")], - ) - await summary_repo.save(summary_v2) - await session.commit() - - result = await summary_repo.get_by_meeting(meeting.id) - - assert result is not None - assert result.executive_summary == "v2" - assert [kp.text for kp in result.key_points] == ["New KP"] - assert [ai.text for ai in result.action_items] == ["New AI"] - - -@pytest.mark.integration -class TestAnnotationRepository: - """Integration tests for SqlAlchemyAnnotationRepository.""" - - async def test_add_and_get_annotation(self, session: AsyncSession) -> None: - """Test adding and retrieving annotation.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - annotation_repo = SqlAlchemyAnnotationRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - annotation = Annotation( - id=AnnotationId(uuid4()), - meeting_id=meeting.id, - annotation_type=AnnotationType.NOTE, - text="Decision made", - start_time=1.0, - end_time=2.0, - segment_ids=[0], - ) - await annotation_repo.add(annotation) - await session.commit() - - retrieved = await annotation_repo.get(annotation.id) - - assert retrieved is not None - assert retrieved.text == "Decision made" - assert retrieved.segment_ids == [0] - - async def test_get_by_meeting_ordered(self, session: AsyncSession) -> None: - """Test annotations returned in start_time order.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - annotation_repo = SqlAlchemyAnnotationRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - a1 = Annotation( - id=AnnotationId(uuid4()), - meeting_id=meeting.id, - annotation_type=AnnotationType.NOTE, - text="Second", - start_time=2.0, - end_time=3.0, - ) - a2 = Annotation( - id=AnnotationId(uuid4()), - meeting_id=meeting.id, - annotation_type=AnnotationType.NOTE, - text="First", - start_time=1.0, - end_time=2.0, - ) - await annotation_repo.add(a1) - await annotation_repo.add(a2) - await session.commit() - - result = await annotation_repo.get_by_meeting(meeting.id) - - assert [a.text for a in result] == ["First", "Second"] - - async def test_get_by_time_range_inclusive(self, session: AsyncSession) -> None: - """Test time range query includes boundary overlaps.""" - meeting_repo = SqlAlchemyMeetingRepository(session) - annotation_repo = SqlAlchemyAnnotationRepository(session) - - meeting = Meeting.create() - await meeting_repo.create(meeting) - await session.commit() - - a1 = Annotation( - id=AnnotationId(uuid4()), - meeting_id=meeting.id, - annotation_type=AnnotationType.NOTE, - text="Ends at boundary", - start_time=0.0, - end_time=1.0, - ) - a2 = Annotation( - id=AnnotationId(uuid4()), - meeting_id=meeting.id, - annotation_type=AnnotationType.NOTE, - text="Starts at boundary", - start_time=1.0, - end_time=2.0, - ) - await annotation_repo.add(a1) - await annotation_repo.add(a2) - await session.commit() - - result = await annotation_repo.get_by_time_range(meeting.id, start_time=1.0, end_time=1.0) - - assert {a.text for a in result} == {"Ends at boundary", "Starts at boundary"} - - async def test_update_annotation_not_found_raises(self, session: AsyncSession) -> None: - """Test update raises when annotation does not exist.""" - annotation_repo = SqlAlchemyAnnotationRepository(session) - - annotation = Annotation( - id=AnnotationId(uuid4()), - meeting_id=MeetingId(uuid4()), - annotation_type=AnnotationType.NOTE, - text="Missing", - start_time=0.0, - end_time=1.0, - ) - - with pytest.raises(ValueError, match=r"Annotation .* not found"): - await annotation_repo.update(annotation) - - async def test_delete_annotation_not_found(self, session: AsyncSession) -> None: - """Test deleting unknown annotation returns False.""" - annotation_repo = SqlAlchemyAnnotationRepository(session) - - result = await annotation_repo.delete(AnnotationId(uuid4())) - - assert result is False -```` - -## File: src/noteflow/client/components/__init__.py -````python -"""UI components for NoteFlow client. - -All components use existing types and utilities - no recreation. -""" - -from noteflow.client.components._async_mixin import AsyncOperationMixin -from noteflow.client.components._thread_mixin import BackgroundWorkerMixin -from noteflow.client.components.annotation_display import AnnotationDisplayComponent -from noteflow.client.components.annotation_toolbar import AnnotationToolbarComponent -from noteflow.client.components.connection_panel import ConnectionPanelComponent -from noteflow.client.components.meeting_library import MeetingLibraryComponent -from noteflow.client.components.playback_controls import PlaybackControlsComponent -from noteflow.client.components.playback_sync import PlaybackSyncController -from noteflow.client.components.recording_timer import RecordingTimerComponent -from noteflow.client.components.summary_panel import SummaryPanelComponent -from noteflow.client.components.transcript import TranscriptComponent -from noteflow.client.components.vu_meter import VuMeterComponent - -__all__ = [ - "AnnotationDisplayComponent", - "AnnotationToolbarComponent", - "AsyncOperationMixin", - "BackgroundWorkerMixin", - "ConnectionPanelComponent", - "MeetingLibraryComponent", - "PlaybackControlsComponent", - "PlaybackSyncController", - "RecordingTimerComponent", - "SummaryPanelComponent", - "TranscriptComponent", - "VuMeterComponent", -] -```` - ## File: src/noteflow/client/app.py ````python -"""NoteFlow Flet client application. - -Captures audio locally and streams to NoteFlow gRPC server for transcription. -Orchestrates UI components - does not contain component logic. -""" - from __future__ import annotations - import argparse import asyncio import logging @@ -28113,9 +13329,7 @@ import queue import threading import time from typing import TYPE_CHECKING, Final - import flet as ft - from noteflow.application.services import TriggerService from noteflow.client._trigger_mixin import TriggerMixin from noteflow.client.components import ( @@ -28141,11 +13355,9 @@ from noteflow.infrastructure.audio import ( ) from noteflow.infrastructure.security import AesGcmCryptoBox, KeyringKeyStore from noteflow.infrastructure.summarization import create_summarization_service - if TYPE_CHECKING: import numpy as np from numpy.typing import NDArray - from noteflow.application.services.summarization_service import SummarizationService from noteflow.grpc.client import ( AnnotationInfo, @@ -28155,35 +13367,13 @@ if TYPE_CHECKING: TranscriptSegment, ) from noteflow.infrastructure.triggers import AppAudioProvider, CalendarProvider - logger = logging.getLogger(__name__) - DEFAULT_SERVER: Final[str] = "localhost:50051" - - class NoteFlowClientApp(TriggerMixin): - """Flet client application for NoteFlow. - - Orchestrates UI components and recording logic. - Inherits trigger detection from TriggerMixin. - """ - def __init__(self, server_address: str = DEFAULT_SERVER) -> None: - """Initialize the app. - - Args: - server_address: NoteFlow server address. - """ - # Centralized state self._state = AppState(server_address=server_address) - - # Audio capture (REUSE existing SoundDeviceCapture) self._audio_capture: SoundDeviceCapture | None = None - - # Client reference (managed by ConnectionPanelComponent) self._client: NoteFlowClient | None = None - - # UI components (initialized in _build_ui) self._connection_panel: ConnectionPanelComponent | None = None self._vu_meter: VuMeterComponent | None = None self._timer: RecordingTimerComponent | None = None @@ -28191,73 +13381,37 @@ class NoteFlowClientApp(TriggerMixin): self._playback_controls: PlaybackControlsComponent | None = None self._sync_controller: PlaybackSyncController | None = None self._annotation_toolbar: AnnotationToolbarComponent | None = None - - # Meeting library (M4) self._meeting_library: MeetingLibraryComponent | None = None - - # Summarization (M6) self._summarization_service: SummarizationService | None = None self._summary_panel: SummaryPanelComponent | None = None - - # Annotation display for review mode (M4) self._annotation_display: AnnotationDisplayComponent | None = None - - # Audio reader for archived meetings (M4) self._audio_reader: MeetingAudioReader | None = None - - # Trigger detection (M5) self._trigger_settings: TriggerSettings | None = None self._trigger_service: TriggerService | None = None self._app_audio: AppAudioProvider | None = None self._calendar_provider: CalendarProvider | None = None self._trigger_poll_interval: float = 0.0 self._trigger_task: asyncio.Task | None = None - - # Recording buttons self._record_btn: ft.ElevatedButton | None = None self._stop_btn: ft.ElevatedButton | None = None - - # Audio frame consumer thread (process frames from audio callback thread) self._audio_frame_queue: queue.Queue[tuple[NDArray[np.float32], float]] = queue.Queue() self._audio_consumer_stop = threading.Event() self._audio_consumer_thread: threading.Thread | None = None - def run(self) -> None: - """Run the Flet application.""" ft.app(target=self._main) - def _main(self, page: ft.Page) -> None: - """Flet app entry point. - - Args: - page: Flet page. - """ self._state.set_page(page) page.title = "NoteFlow Client" page.window.width = 800 page.window.height = 600 page.padding = 20 - page.add(self._build_ui()) page.update() - - # Initialize trigger detection (M5) self._initialize_triggers() - - # Start trigger check loop if enabled (opt-in via settings) if self._state.trigger_enabled: self._trigger_task = page.run_task(self._trigger_check_loop) - - # Ensure background tasks are cancelled when the UI closes page.on_disconnect = lambda _e: self._shutdown() - def _build_ui(self) -> ft.Column: - """Build the main UI by composing components. - - Returns: - Main UI column. - """ - # Create components with state self._connection_panel = ConnectionPanelComponent( state=self._state, on_connected=self._on_connected, @@ -28267,14 +13421,10 @@ class NoteFlowClientApp(TriggerMixin): ) self._vu_meter = VuMeterComponent(state=self._state) self._timer = RecordingTimerComponent(state=self._state) - - # Transcript with click handler for playback sync self._transcript = TranscriptComponent( state=self._state, on_segment_click=self._on_segment_click, ) - - # Playback controls and sync self._playback_controls = PlaybackControlsComponent( state=self._state, on_position_change=self._on_playback_position_change, @@ -28283,37 +13433,25 @@ class NoteFlowClientApp(TriggerMixin): state=self._state, on_highlight_change=self._on_highlight_change, ) - - # Annotation toolbar self._annotation_toolbar = AnnotationToolbarComponent( state=self._state, get_client=lambda: self._client, ) - - # Annotation display for review mode self._annotation_display = AnnotationDisplayComponent( state=self._state, on_annotation_seek=self._on_annotation_seek, ) - - # Meeting library (M4) self._meeting_library = MeetingLibraryComponent( state=self._state, get_client=lambda: self._client, on_meeting_selected=self._on_meeting_selected, ) - - # Initialize summarization service - auto-detects LOCAL/MOCK providers self._summarization_service = create_summarization_service() - - # Summary panel self._summary_panel = SummaryPanelComponent( state=self._state, get_service=lambda: self._summarization_service, on_citation_click=self._on_citation_click, ) - - # Recording controls (still in app.py - orchestration) self._record_btn = ft.ElevatedButton( "Start Recording", on_click=self._on_record_click, @@ -28326,10 +13464,7 @@ class NoteFlowClientApp(TriggerMixin): icon=ft.Icons.STOP, disabled=True, ) - recording_row = ft.Row([self._record_btn, self._stop_btn]) - - # Main layout - compose component builds return ft.Column( [ ft.Text("NoteFlow Client", size=24, weight=ft.FontWeight.BOLD), @@ -28353,12 +13488,9 @@ class NoteFlowClientApp(TriggerMixin): ], spacing=10, ) - def _ensure_audio_reader(self) -> MeetingAudioReader | None: - """Lazily initialize MeetingAudioReader (for review playback).""" if self._audio_reader: return self._audio_reader - try: settings = get_settings() keystore = KeyringKeyStore() @@ -28367,15 +13499,11 @@ class NoteFlowClientApp(TriggerMixin): except (OSError, ValueError, KeyError, RuntimeError) as exc: logger.exception("Failed to initialize meeting audio reader: %s", exc) self._audio_reader = None - return self._audio_reader - def _load_meeting_audio(self, meeting: MeetingInfo) -> list[TimestampedAudio]: - """Load archived audio for a meeting, if available.""" reader = self._ensure_audio_reader() if not reader: return [] - try: if not reader.audio_exists(meeting.id): logger.info("No archived audio for meeting %s", meeting.id) @@ -28387,16 +13515,9 @@ class NoteFlowClientApp(TriggerMixin): except (OSError, ValueError, RuntimeError) as exc: logger.exception("Failed to load audio for meeting %s: %s", meeting.id, exc) return [] - def _ensure_audio_capture(self) -> bool: - """Start audio capture if needed. - - Returns: - True if audio capture is running, False if start failed. - """ if self._audio_capture: return True - try: self._audio_capture = SoundDeviceCapture() self._audio_capture.start( @@ -28410,16 +13531,8 @@ class NoteFlowClientApp(TriggerMixin): logger.exception("Failed to start audio capture: %s", exc) self._audio_capture = None return False - return True - def _on_connected(self, client: NoteFlowClient, info: ServerInfo) -> None: - """Handle successful connection. - - Args: - client: Connected NoteFlowClient. - info: Server info. - """ self._client = client if self._transcript: self._transcript.display_server_info(info) @@ -28431,169 +13544,83 @@ class NoteFlowClientApp(TriggerMixin): logger.error("Failed to resume streaming after reconnect") self._stop_recording() self._update_recording_buttons() - - # Refresh meeting library on connection if self._meeting_library: self._meeting_library.refresh_meetings() - def _on_disconnected(self) -> None: - """Handle disconnection.""" self._shutdown() if self._state.recording: self._stop_recording() self._client = None self._update_recording_buttons() - def _on_connection_change(self, _connected: bool, _message: str) -> None: - """Handle connection state change from client. - - Args: - connected: Connection state. - message: Status message. - """ self._update_recording_buttons() - def _on_transcript(self, segment: TranscriptSegment) -> None: - """Handle transcript update callback. - - Args: - segment: Transcript segment from server. - """ if self._transcript: self._transcript.add_segment(segment) self._ensure_summary_panel_ready() - def _on_record_click(self, e: ft.ControlEvent) -> None: - """Handle record button click. - - Args: - e: Control event. - """ self._start_recording() - def _on_stop_click(self, e: ft.ControlEvent) -> None: - """Handle stop button click. - - Args: - e: Control event. - """ self._stop_recording() - def _start_recording(self) -> None: - """Start recording audio.""" if not self._client or not self._state.connected: return - - # Create meeting meeting = self._client.create_meeting(title=f"Recording {time.strftime('%Y-%m-%d %H:%M')}") if not meeting: logger.error("Failed to create meeting") return - self._state.current_meeting = meeting - - # Make summary panel visible once we have meeting context self._ensure_summary_panel_ready() - - # Start streaming if not self._client.start_streaming(meeting.id): logger.error("Failed to start streaming") self._client.stop_meeting(meeting.id) self._state.current_meeting = None return - - # Start audio capture (reuse existing capture if already running) if not self._ensure_audio_capture(): self._client.stop_streaming() self._client.stop_meeting(meeting.id) self._state.reset_recording_state() self._update_recording_buttons() return - self._state.recording = True - - # Start audio frame consumer thread self._start_audio_consumer() - - # Clear audio buffer for new recording self._state.session_audio_buffer.clear() - - # Start timer if self._timer: self._timer.start() - - # Clear transcript if self._transcript: self._transcript.clear() - - # Enable annotation toolbar if self._annotation_toolbar: self._annotation_toolbar.set_visible(True) self._annotation_toolbar.set_enabled(True) - self._update_recording_buttons() - def _stop_recording(self) -> None: - """Stop recording audio.""" - # Stop audio frame consumer thread self._stop_audio_consumer() - - # Stop audio capture if self._audio_capture and not self._should_keep_capture_running(): self._audio_capture.stop() self._audio_capture = None - - # Stop streaming if self._client: self._client.stop_streaming() - - # Stop meeting if self._state.current_meeting: self._client.stop_meeting(self._state.current_meeting.id) - - # Load buffered audio for playback if self._state.session_audio_buffer and self._playback_controls: self._playback_controls.load_audio() self._playback_controls.set_visible(True) - - # Start sync controller for playback if self._sync_controller: self._sync_controller.start() - - # Keep annotation toolbar visible for playback annotations if self._annotation_toolbar: self._annotation_toolbar.set_enabled(True) - - # Ensure summary panel reflects current data after recording ends self._ensure_summary_panel_ready() - - # Reset recording state (but keep meeting/transcript for playback) self._state.recording = False - - # Stop timer if self._timer: self._timer.stop() - self._update_recording_buttons() - def _on_audio_frames( self, frames: NDArray[np.float32], timestamp: float, ) -> None: - """Handle audio frames from capture (called from audio thread). - - Enqueues frames for processing by consumer thread to avoid blocking - the real-time audio callback. - - Args: - frames: Audio samples. - timestamp: Capture timestamp. - """ self._audio_frame_queue.put_nowait((frames.copy(), timestamp)) - def _start_audio_consumer(self) -> None: - """Start the audio frame consumer thread.""" if self._audio_consumer_thread is not None and self._audio_consumer_thread.is_alive(): return self._audio_consumer_stop.clear() @@ -28603,107 +13630,56 @@ class NoteFlowClientApp(TriggerMixin): name="audio-consumer", ) self._audio_consumer_thread.start() - def _stop_audio_consumer(self) -> None: - """Stop the audio frame consumer thread.""" self._audio_consumer_stop.set() if self._audio_consumer_thread is not None: self._audio_consumer_thread.join(timeout=1.0) self._audio_consumer_thread = None - # Drain remaining frames while not self._audio_frame_queue.empty(): try: self._audio_frame_queue.get_nowait() except queue.Empty: break - def _audio_consumer_loop(self) -> None: - """Consumer loop that processes audio frames from the queue.""" while not self._audio_consumer_stop.is_set(): try: frames, timestamp = self._audio_frame_queue.get(timeout=0.1) self._process_audio_frames(frames, timestamp) except queue.Empty: continue - def _process_audio_frames( self, frames: NDArray[np.float32], timestamp: float, ) -> None: - """Process audio frames from consumer thread. - - Args: - frames: Audio samples. - timestamp: Capture timestamp. - """ - # Send to server if self._client and self._state.recording: self._client.send_audio(frames, timestamp) - - # Buffer for playback if self._state.recording: duration = len(frames) / DEFAULT_SAMPLE_RATE self._state.session_audio_buffer.append( TimestampedAudio(frames=frames, timestamp=timestamp, duration=duration) ) - - # Update VU meter if self._vu_meter: self._vu_meter.on_audio_frames(frames) - - # Trigger detection uses system output + calendar; no mic-derived updates here. - def _on_segment_click(self, segment_index: int) -> None: - """Handle transcript segment click - seek playback to segment. - - Args: - segment_index: Index of clicked segment. - """ if self._sync_controller: self._sync_controller.seek_to_segment(segment_index) - def _on_citation_click(self, segment_id: int) -> None: - """Handle citation chip click - seek to segment by segment_id. - - Args: - segment_id: Segment ID from citation. - """ - # Find segment index by segment_id for idx, seg in enumerate(self._state.transcript_segments): if seg.segment_id == segment_id: self._on_segment_click(idx) break - def _on_annotation_seek(self, timestamp: float) -> None: - """Handle annotation click - seek to timestamp. - - Args: - timestamp: Timestamp in seconds to seek to. - """ if self._playback_controls: self._playback_controls.seek(timestamp) - def _on_meeting_selected(self, meeting: MeetingInfo) -> None: - """Handle meeting selection from library. - - Loads transcript segments, annotations, and prepares for playback review. - - Args: - meeting: Selected meeting info. - """ if not self._client: return - - # 1. Stop any existing playback if self._state.playback.state != PlaybackState.STOPPED: self._state.playback.stop() if self._sync_controller: self._sync_controller.stop() - - # Capture client reference for closure (may run in background thread) client = self._client - def load_and_apply() -> None: if not client: return @@ -28714,18 +13690,14 @@ class NoteFlowClientApp(TriggerMixin): except (ConnectionError, ValueError, OSError, RuntimeError) as exc: logger.exception("Failed to load meeting %s: %s", meeting.id, exc) return - - # Apply results on UI thread to avoid race conditions self._state.run_on_ui_thread( lambda: self._apply_meeting_data(meeting, segments, annotations, audio_chunks) ) - page = self._state._page if page and hasattr(page, "run_thread"): page.run_thread(load_and_apply) else: load_and_apply() - def _apply_meeting_data( self, meeting: MeetingInfo, @@ -28733,58 +13705,39 @@ class NoteFlowClientApp(TriggerMixin): annotations: list[AnnotationInfo], audio_chunks: list[TimestampedAudio], ) -> None: - """Apply loaded meeting data to state and UI (UI thread only).""" - # Clear state and UI before populating with fresh data self._state.clear_transcript() self._state.annotations.clear() self._state.current_summary = None self._state.highlighted_segment_index = None self._state.clear_session_audio() - if self._transcript: self._transcript.clear() if self._annotation_display: self._annotation_display.clear() - - # Populate transcript if self._transcript: for segment in segments: self._transcript.add_segment(segment) - - # Populate annotations self._state.annotations = annotations if self._annotation_display: self._annotation_display.load_annotations(annotations) - - # Update meeting state self._state.current_meeting = meeting self._state.selected_meeting = meeting - - # Enable annotation toolbar for adding new annotations if self._annotation_toolbar: self._annotation_toolbar.set_visible(True) self._annotation_toolbar.set_enabled(True) - - # Load audio for playback if available if audio_chunks: self._state.session_audio_buffer = audio_chunks if self._playback_controls: self._playback_controls.load_audio() self._playback_controls.set_visible(True) else: - # Hide controls when no audio is available if self._playback_controls: self._playback_controls.set_visible(False) self._state.playback.stop() self._state.playback_position = 0.0 - - # Update summary panel visibility/enabled state self._ensure_summary_panel_ready() - - # Start sync controller for playback highlighting if self._sync_controller: self._sync_controller.start() - logger.info( "Loaded meeting: %s (%d segments, %d annotations, %d audio chunks)", meeting.title, @@ -28792,69 +13745,38 @@ class NoteFlowClientApp(TriggerMixin): len(annotations), len(audio_chunks), ) - def _ensure_summary_panel_ready(self) -> None: - """Update summary panel visibility/enabled state based on data availability.""" if not self._summary_panel: return - has_meeting = self._state.current_meeting is not None has_segments = bool(self._state.transcript_segments) - - # Visible once there is a meeting context; enabled when segments exist. self._summary_panel.set_visible(has_meeting or has_segments) self._summary_panel.set_enabled(has_segments and not self._state.summary_loading) - def _on_highlight_change(self, index: int | None) -> None: - """Handle highlight change from sync controller. - - Args: - index: Segment index to highlight, or None to clear. - """ if self._transcript: self._transcript.update_highlight(index) - def _on_playback_position_change(self, position: float) -> None: - """Handle playback position change. - - Args: - position: Current playback position in seconds. - """ - # Sync controller handles segment matching internally - _ = position # Position tracked in state - + _ = position def _shutdown(self) -> None: - """Stop background tasks and capture started for triggers.""" if self._trigger_task: self._trigger_task.cancel() self._trigger_task = None - - # Stop audio consumer if running self._stop_audio_consumer() - if self._app_audio: self._app_audio.close() - if self._audio_capture and not self._state.recording: try: self._audio_capture.stop() except RuntimeError: logger.debug("Error stopping audio capture during shutdown", exc_info=True) self._audio_capture = None - def _update_recording_buttons(self) -> None: - """Update recording button states.""" if self._record_btn: self._record_btn.disabled = not self._state.connected or self._state.recording - if self._stop_btn: self._stop_btn.disabled = not self._state.recording - self._state.request_update() - - def main() -> None: - """Run the NoteFlow client application.""" parser = argparse.ArgumentParser(description="NoteFlow Client") parser.add_argument( "-s", @@ -28870,119 +13792,66 @@ def main() -> None: help="Enable verbose logging", ) args = parser.parse_args() - - # Configure logging log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig( level=log_level, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) - - # Run app app = NoteFlowClientApp(server_address=args.server) app.run() - - if __name__ == "__main__": main() ```` ## File: src/noteflow/grpc/service.py ````python -"""NoteFlow gRPC service implementation (async with UoW).""" - from __future__ import annotations - -import asyncio import logging -import struct import time -from collections.abc import AsyncIterator -from dataclasses import dataclass, field from pathlib import Path from typing import TYPE_CHECKING, ClassVar, Final -from uuid import UUID, uuid4 - import grpc.aio import numpy as np -from numpy.typing import NDArray - -from noteflow.application.services.export_service import ExportFormat, ExportService -from noteflow.application.services.summarization_service import SummarizationService from noteflow.config.constants import DEFAULT_SAMPLE_RATE as _DEFAULT_SAMPLE_RATE -from noteflow.domain.entities import Annotation, Meeting, Segment, Summary -from noteflow.domain.summarization import ProviderUnavailableError -from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState +from noteflow.domain.entities import Meeting +from noteflow.domain.value_objects import MeetingState from noteflow.infrastructure.asr import Segmenter, SegmenterConfig, StreamingVad -from noteflow.infrastructure.audio.reader import MeetingAudioReader from noteflow.infrastructure.audio.writer import MeetingAudioWriter -from noteflow.infrastructure.converters import AsrConverter -from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork from noteflow.infrastructure.security.crypto import AesGcmCryptoBox from noteflow.infrastructure.security.keystore import KeyringKeyStore - +from ._mixins import ( + AnnotationMixin, + DiarizationMixin, + ExportMixin, + MeetingMixin, + StreamingMixin, + SummarizationMixin, +) from .meeting_store import MeetingStore from .proto import noteflow_pb2, noteflow_pb2_grpc - if TYPE_CHECKING: + from numpy.typing import NDArray from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker - + from noteflow.application.services.summarization_service import SummarizationService from noteflow.infrastructure.asr import FasterWhisperEngine - from noteflow.infrastructure.asr.dto import AsrResult - from noteflow.infrastructure.diarization import DiarizationEngine - + from noteflow.infrastructure.diarization import DiarizationEngine, SpeakerTurn logger = logging.getLogger(__name__) - - -@dataclass -class _StreamSessionInit: - """Result of stream session initialization.""" - - next_segment_id: int - error_code: int | None = None - error_message: str | None = None - - @property - def success(self) -> bool: - """Check if initialization succeeded.""" - return self.error_code is None - - -@dataclass -class _DiarizationJob: - """Track background diarization job state.""" - - job_id: str - meeting_id: str - status: int - segments_updated: int = 0 - speaker_ids: list[str] = field(default_factory=list) - error_message: str = "" - created_at: float = field(default_factory=time.time) - updated_at: float = field(default_factory=time.time) - task: asyncio.Task[None] | None = None - - def to_proto(self) -> noteflow_pb2.DiarizationJobStatus: - return noteflow_pb2.DiarizationJobStatus( - job_id=self.job_id, - status=self.status, - segments_updated=self.segments_updated, - speaker_ids=self.speaker_ids, - error_message=self.error_message, - ) - - -class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): - """Async gRPC service implementation for NoteFlow with PostgreSQL persistence.""" - +class NoteFlowServicer( + StreamingMixin, + DiarizationMixin, + MeetingMixin, + SummarizationMixin, + AnnotationMixin, + ExportMixin, + noteflow_pb2_grpc.NoteFlowServiceServicer, +): VERSION: Final[str] = "0.2.0" - MAX_CHUNK_SIZE: Final[int] = 1024 * 1024 # 1MB + MAX_CHUNK_SIZE: Final[int] = 1024 * 1024 DEFAULT_SAMPLE_RATE: Final[int] = _DEFAULT_SAMPLE_RATE SUPPORTED_SAMPLE_RATES: ClassVar[list[int]] = [16000, 44100, 48000] - PARTIAL_CADENCE_SECONDS: Final[float] = 2.0 # Emit partials every 2 seconds - MIN_PARTIAL_AUDIO_SECONDS: Final[float] = 0.5 # Minimum audio for partial inference - + PARTIAL_CADENCE_SECONDS: Final[float] = 2.0 + MIN_PARTIAL_AUDIO_SECONDS: Final[float] = 0.5 def __init__( self, asr_engine: FasterWhisperEngine | None = None, @@ -28990,94 +13859,56 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): meetings_dir: Path | None = None, summarization_service: SummarizationService | None = None, diarization_engine: DiarizationEngine | None = None, + diarization_refinement_enabled: bool = True, ) -> None: - """Initialize the service. - - Args: - asr_engine: Optional ASR engine. - session_factory: Optional async session factory for database persistence. - If not provided, falls back to in-memory MeetingStore. - meetings_dir: Optional directory for meeting audio storage. - Defaults to ~/.noteflow/meetings. - summarization_service: Optional summarization service for generating summaries. - diarization_engine: Optional diarization engine for speaker identification. - """ self._asr_engine = asr_engine self._session_factory = session_factory self._summarization_service = summarization_service self._diarization_engine = diarization_engine + self._diarization_refinement_enabled = diarization_refinement_enabled self._start_time = time.time() - # Fallback to in-memory store if no database configured self._memory_store: MeetingStore | None = ( MeetingStore() if session_factory is None else None ) - - # Audio writing infrastructure self._meetings_dir = meetings_dir or (Path.home() / ".noteflow" / "meetings") self._keystore = KeyringKeyStore() self._crypto = AesGcmCryptoBox(self._keystore) self._audio_writers: dict[str, MeetingAudioWriter] = {} - - # VAD and segmentation state per meeting self._vad_instances: dict[str, StreamingVad] = {} self._segmenters: dict[str, Segmenter] = {} self._was_speaking: dict[str, bool] = {} self._segment_counters: dict[str, int] = {} self._stream_formats: dict[str, tuple[int, int]] = {} self._active_streams: set[str] = set() - - # Partial transcription state per meeting self._partial_buffers: dict[str, list[NDArray[np.float32]]] = {} self._last_partial_time: dict[str, float] = {} self._last_partial_text: dict[str, str] = {} - - # Streaming diarization state per meeting self._diarization_turns: dict[str, list[SpeakerTurn]] = {} self._diarization_stream_time: dict[str, float] = {} self._diarization_streaming_failed: set[str] = set() - - # Track audio write failures to avoid log spam self._audio_write_failed: set[str] = set() - - # Background diarization jobs - self._diarization_jobs: dict[str, _DiarizationJob] = {} - + self._diarization_jobs: dict[str, object] = {} @property def asr_engine(self) -> FasterWhisperEngine | None: - """Get the ASR engine.""" return self._asr_engine - def set_asr_engine(self, engine: FasterWhisperEngine) -> None: - """Set the ASR engine.""" self._asr_engine = engine - @property def diarization_engine(self) -> DiarizationEngine | None: - """Get the diarization engine.""" return self._diarization_engine - def set_diarization_engine(self, engine: DiarizationEngine) -> None: - """Set the diarization engine.""" self._diarization_engine = engine - def _use_database(self) -> bool: - """Check if database persistence is configured.""" return self._session_factory is not None - def _get_memory_store(self) -> MeetingStore: - """Get the in-memory store, raising if not configured.""" if self._memory_store is None: raise RuntimeError("Memory store not configured") return self._memory_store - def _create_uow(self) -> SqlAlchemyUnitOfWork: - """Create a new Unit of Work.""" if self._session_factory is None: raise RuntimeError("Database not configured") return SqlAlchemyUnitOfWork(self._session_factory) - def _init_streaming_state(self, meeting_id: str, next_segment_id: int) -> None: - """Initialize VAD, Segmenter, speaking state, and partial buffers for a meeting.""" self._vad_instances[meeting_id] = StreamingVad() self._segmenters[meeting_id] = Segmenter( config=SegmenterConfig(sample_rate=self.DEFAULT_SAMPLE_RATE) @@ -29092,9 +13923,7 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): self._diarization_streaming_failed.discard(meeting_id) if self._diarization_engine is not None: self._diarization_engine.reset_streaming() - def _cleanup_streaming_state(self, meeting_id: str) -> None: - """Clean up VAD, Segmenter, speaking state, and partial buffers for a meeting.""" self._vad_instances.pop(meeting_id, None) self._segmenters.pop(meeting_id, None) self._was_speaking.pop(meeting_id, None) @@ -29106,16 +13935,7 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): self._diarization_turns.pop(meeting_id, None) self._diarization_stream_time.pop(meeting_id, None) self._diarization_streaming_failed.discard(meeting_id) - def _ensure_meeting_dek(self, meeting: Meeting) -> tuple[bytes, bytes, bool]: - """Ensure meeting has a DEK, generating one if needed. - - Args: - meeting: Meeting entity. - - Returns: - Tuple of (dek, wrapped_dek, needs_update). - """ if meeting.wrapped_dek is None: dek = self._crypto.generate_dek() wrapped_dek = self._crypto.wrap_dek(dek) @@ -29124,16 +13944,7 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): wrapped_dek = meeting.wrapped_dek dek = self._crypto.unwrap_dek(wrapped_dek) return dek, wrapped_dek, False - def _start_meeting_if_needed(self, meeting: Meeting) -> tuple[bool, str | None]: - """Start recording on meeting if not already recording. - - Args: - meeting: Meeting entity. - - Returns: - Tuple of (needs_update, error_message). - """ if meeting.state == MeetingState.RECORDING: return False, None try: @@ -29141,20 +13952,12 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): return True, None except ValueError as e: return False, str(e) - def _open_meeting_audio_writer( self, meeting_id: str, dek: bytes, wrapped_dek: bytes, ) -> None: - """Open audio writer for a meeting. - - Args: - meeting_id: Meeting ID string. - dek: Data encryption key. - wrapped_dek: Wrapped DEK. - """ writer = MeetingAudioWriter(self._crypto, self._meetings_dir) writer.open( meeting_id=meeting_id, @@ -29164,159 +13967,10 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): ) self._audio_writers[meeting_id] = writer logger.info("Audio writer opened for meeting %s", meeting_id) - - async def _init_stream_session_db(self, meeting_id: str) -> _StreamSessionInit: - """Initialize stream session using database persistence. - - Args: - meeting_id: Meeting ID string. - - Returns: - Stream session initialization result. - """ - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) - if meeting is None: - return _StreamSessionInit( - next_segment_id=0, - error_code=grpc.StatusCode.NOT_FOUND, - error_message=f"Meeting {meeting_id} not found", - ) - - dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) - recording_updated, error_msg = self._start_meeting_if_needed(meeting) - - if error_msg: - return _StreamSessionInit( - next_segment_id=0, - error_code=grpc.StatusCode.INVALID_ARGUMENT, - error_message=error_msg, - ) - - if dek_updated or recording_updated: - await uow.meetings.update(meeting) - await uow.commit() - - next_segment_id = await uow.segments.get_next_segment_id(meeting.id) - self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) - self._init_streaming_state(meeting_id, next_segment_id) - - return _StreamSessionInit(next_segment_id=next_segment_id) - - def _init_stream_session_memory(self, meeting_id: str) -> _StreamSessionInit: - """Initialize stream session using in-memory store. - - Args: - meeting_id: Meeting ID string. - - Returns: - Stream session initialization result. - """ - store = self._get_memory_store() - meeting = store.get(meeting_id) - if meeting is None: - return _StreamSessionInit( - next_segment_id=0, - error_code=grpc.StatusCode.NOT_FOUND, - error_message=f"Meeting {meeting_id} not found", - ) - - dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting) - recording_updated, error_msg = self._start_meeting_if_needed(meeting) - - if error_msg: - return _StreamSessionInit( - next_segment_id=0, - error_code=grpc.StatusCode.INVALID_ARGUMENT, - error_message=error_msg, - ) - - if dek_updated or recording_updated: - store.update(meeting) - - next_segment_id = meeting.next_segment_id - self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) - self._init_streaming_state(meeting_id, next_segment_id) - - return _StreamSessionInit(next_segment_id=next_segment_id) - - def _next_segment_id(self, meeting_id: str, fallback: int = 0) -> int: - """Get and increment the next segment id for a meeting.""" - next_id = self._segment_counters.get(meeting_id) - if next_id is None: - next_id = fallback - self._segment_counters[meeting_id] = next_id + 1 - return next_id - - def _normalize_stream_format( - self, - meeting_id: str, - sample_rate: int, - channels: int, - ) -> tuple[int, int]: - """Validate and persist stream audio format for a meeting.""" - normalized_rate = sample_rate or self.DEFAULT_SAMPLE_RATE - normalized_channels = channels or 1 - - if normalized_rate not in self.SUPPORTED_SAMPLE_RATES: - raise ValueError( - "Unsupported sample_rate " - f"{normalized_rate}; supported: {self.SUPPORTED_SAMPLE_RATES}" - ) - if normalized_channels < 1: - raise ValueError("channels must be >= 1") - - existing = self._stream_formats.get(meeting_id) - if existing and existing != (normalized_rate, normalized_channels): - raise ValueError("Stream audio format cannot change mid-stream") - - self._stream_formats.setdefault(meeting_id, (normalized_rate, normalized_channels)) - return normalized_rate, normalized_channels - - def _convert_audio_format( - self, - audio: NDArray[np.float32], - sample_rate: int, - channels: int, - ) -> NDArray[np.float32]: - """Downmix/resample audio to the server's expected format.""" - if channels > 1: - if audio.size % channels != 0: - raise ValueError("Audio buffer size is not divisible by channel count") - audio = audio.reshape(-1, channels).mean(axis=1) - - if sample_rate != self.DEFAULT_SAMPLE_RATE: - audio = self._resample_audio(audio, sample_rate, self.DEFAULT_SAMPLE_RATE) - - return audio - - @staticmethod - def _resample_audio( - audio: NDArray[np.float32], - src_rate: int, - dst_rate: int, - ) -> NDArray[np.float32]: - """Resample audio using linear interpolation.""" - if src_rate == dst_rate or audio.size == 0: - return audio - - ratio = dst_rate / src_rate - new_length = round(audio.shape[0] * ratio) - if new_length <= 0: - return np.array([], dtype=np.float32) - - old_indices = np.arange(audio.shape[0]) - new_indices = np.arange(new_length) / ratio - return np.interp(new_indices, old_indices, audio).astype(np.float32) - def _close_audio_writer(self, meeting_id: str) -> None: - """Close and remove the audio writer for a meeting.""" - # Clean up write failure tracking self._audio_write_failed.discard(meeting_id) - if meeting_id not in self._audio_writers: return - try: writer = self._audio_writers.pop(meeting_id) writer.close() @@ -29331,1003 +13985,37 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): meeting_id, e, ) - + def _next_segment_id(self, meeting_id: str, fallback: int = 0) -> int: + next_id = self._segment_counters.get(meeting_id) + if next_id is None: + next_id = fallback + self._segment_counters[meeting_id] = next_id + 1 + return next_id async def _count_active_meetings_db(self) -> int: - """Count active meetings using database state.""" async with self._create_uow() as uow: total = 0 for state in (MeetingState.RECORDING, MeetingState.STOPPING): total += await uow.meetings.count_by_state(state) return total - - async def StreamTranscription( - self, - request_iterator: AsyncIterator[noteflow_pb2.AudioChunk], - context: grpc.aio.ServicerContext, - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Handle bidirectional audio streaming with persistence. - - Receives audio chunks from client, processes through ASR, - persists segments, and yields transcript updates. - """ - if self._asr_engine is None or not self._asr_engine.is_loaded: - await context.abort( - grpc.StatusCode.FAILED_PRECONDITION, - "ASR engine not loaded", - ) - - current_meeting_id: str | None = None - - try: - async for chunk in request_iterator: - meeting_id = chunk.meeting_id - if not meeting_id: - await context.abort( - grpc.StatusCode.INVALID_ARGUMENT, - "meeting_id required", - ) - - # Initialize stream on first chunk - if current_meeting_id is None: - init_result = await self._init_stream_for_meeting(meeting_id, context) - if init_result is None: - return # Error already sent via context.abort - current_meeting_id = meeting_id - elif meeting_id != current_meeting_id: - await context.abort( - grpc.StatusCode.INVALID_ARGUMENT, - "Stream may only contain a single meeting_id", - ) - - # Process audio chunk - async for update in self._process_stream_chunk(current_meeting_id, chunk, context): - yield update - - # Flush any remaining audio from segmenter - if current_meeting_id and current_meeting_id in self._segmenters: - async for update in self._flush_segmenter(current_meeting_id): - yield update - finally: - if current_meeting_id: - self._cleanup_streaming_state(current_meeting_id) - self._close_audio_writer(current_meeting_id) - self._active_streams.discard(current_meeting_id) - - async def _init_stream_for_meeting( - self, - meeting_id: str, - context: grpc.aio.ServicerContext, - ) -> _StreamSessionInit | None: - """Initialize streaming for a meeting. - - Args: - meeting_id: Meeting ID string. - context: gRPC context for error handling. - - Returns: - Initialization result, or None if error was sent. - """ - if meeting_id in self._active_streams: - await context.abort( - grpc.StatusCode.FAILED_PRECONDITION, - f"Meeting {meeting_id} already streaming", - ) - - self._active_streams.add(meeting_id) - - if self._use_database(): - init_result = await self._init_stream_session_db(meeting_id) - else: - init_result = self._init_stream_session_memory(meeting_id) - - if not init_result.success: - self._active_streams.discard(meeting_id) - await context.abort(init_result.error_code, init_result.error_message or "") - - return init_result - - async def _process_stream_chunk( - self, - meeting_id: str, - chunk: noteflow_pb2.AudioChunk, - context: grpc.aio.ServicerContext, - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Process a single audio chunk from the stream. - - Args: - meeting_id: Meeting ID string. - chunk: Audio chunk from client. - context: gRPC context for error handling. - - Yields: - Transcript updates from processing. - """ - try: - sample_rate, channels = self._normalize_stream_format( - meeting_id, - chunk.sample_rate, - chunk.channels, - ) - except ValueError as e: - await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) - - audio = self._decode_audio_chunk(chunk) - if audio is None: - return - - try: - audio = self._convert_audio_format(audio, sample_rate, channels) - except ValueError as e: - await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) - - # Write to encrypted audio file - self._write_audio_chunk_safe(meeting_id, audio) - - # VAD-driven segmentation - async for update in self._process_audio_with_vad(meeting_id, audio): - yield update - - def _write_audio_chunk_safe( - self, - meeting_id: str, - audio: NDArray[np.float32], - ) -> None: - """Write audio chunk to encrypted file, logging errors without raising. - - Args: - meeting_id: Meeting ID string. - audio: Audio samples to write. - """ - if meeting_id not in self._audio_writers: - return - if meeting_id in self._audio_write_failed: - return # Already failed, skip to avoid log spam - try: - self._audio_writers[meeting_id].write_chunk(audio) - except Exception as e: - logger.error( - "Audio write failed for meeting %s: %s. Recording may be incomplete.", - meeting_id, - e, - ) - self._audio_write_failed.add(meeting_id) - - def _decode_audio_chunk( - self, - chunk: noteflow_pb2.AudioChunk, - ) -> NDArray[np.float32] | None: - """Decode audio chunk from protobuf to numpy array.""" - if not chunk.audio_data: - return None - try: - return np.frombuffer(chunk.audio_data, dtype=np.float32) - except (ValueError, struct.error) as e: - logger.warning("Failed to decode audio chunk: %s", e) - return None - - async def _process_audio_with_vad( - self, - meeting_id: str, - audio: NDArray[np.float32], - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Process audio chunk through VAD and Segmenter. - - Args: - meeting_id: Meeting identifier. - audio: Audio samples (float32, mono). - - Yields: - TranscriptUpdates for VAD events, partials, and finals. - """ - vad = self._vad_instances.get(meeting_id) - segmenter = self._segmenters.get(meeting_id) - - if vad is None or segmenter is None: - return - - # Get VAD decision - is_speech = vad.process_chunk(audio) - - # Streaming diarization (optional) - self._process_streaming_diarization(meeting_id, audio) - - # Emit VAD state change events - was_speaking = self._was_speaking.get(meeting_id, False) - if is_speech and not was_speaking: - # Speech started - yield self._create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_START) - self._was_speaking[meeting_id] = True - elif not is_speech and was_speaking: - # Speech ended - yield self._create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_END) - self._was_speaking[meeting_id] = False - - # Buffer audio for partial transcription - if is_speech: - if meeting_id in self._partial_buffers: - self._partial_buffers[meeting_id].append(audio.copy()) - - # Check if we should emit a partial - partial_update = await self._maybe_emit_partial(meeting_id) - if partial_update is not None: - yield partial_update - - # Process through segmenter - for audio_segment in segmenter.process_audio(audio, is_speech): - # Clear partial buffer when we get a final segment - self._clear_partial_buffer(meeting_id) - async for update in self._process_audio_segment( - meeting_id, - audio_segment.audio, - audio_segment.start_time, - ): - yield update - - async def _maybe_emit_partial( - self, - meeting_id: str, - ) -> noteflow_pb2.TranscriptUpdate | None: - """Check if it's time to emit a partial and generate if so. - - Args: - meeting_id: Meeting identifier. - - Returns: - TranscriptUpdate with partial text, or None if not time yet. - """ - if self._asr_engine is None or not self._asr_engine.is_loaded: - return None - - last_time = self._last_partial_time.get(meeting_id, 0) - now = time.time() - - # Check if enough time has passed since last partial - if now - last_time < self.PARTIAL_CADENCE_SECONDS: - return None - - # Check if we have enough audio - buffer = self._partial_buffers.get(meeting_id, []) - if not buffer: - return None - - # Concatenate buffered audio - combined = np.concatenate(buffer) - audio_seconds = len(combined) / self.DEFAULT_SAMPLE_RATE - - if audio_seconds < self.MIN_PARTIAL_AUDIO_SECONDS: - return None - - # Run inference on buffered audio (async to avoid blocking event loop) - results = await self._asr_engine.transcribe_async(combined) - partial_text = " ".join(result.text for result in results) - - # Clear buffer after inference to keep partials incremental and bounded - self._partial_buffers[meeting_id] = [] - - # Only emit if text changed (debounce) - last_text = self._last_partial_text.get(meeting_id, "") - if partial_text and partial_text != last_text: - self._last_partial_time[meeting_id] = now - self._last_partial_text[meeting_id] = partial_text - return noteflow_pb2.TranscriptUpdate( - meeting_id=meeting_id, - update_type=noteflow_pb2.UPDATE_TYPE_PARTIAL, - partial_text=partial_text, - server_timestamp=now, - ) - - self._last_partial_time[meeting_id] = now - return None - - def _clear_partial_buffer(self, meeting_id: str) -> None: - """Clear the partial buffer and reset state after a final is emitted. - - Args: - meeting_id: Meeting identifier. - """ - if meeting_id in self._partial_buffers: - self._partial_buffers[meeting_id] = [] - if meeting_id in self._last_partial_text: - self._last_partial_text[meeting_id] = "" - if meeting_id in self._last_partial_time: - self._last_partial_time[meeting_id] = time.time() - - def _process_streaming_diarization( - self, - meeting_id: str, - audio: NDArray[np.float32], - ) -> None: - """Process an audio chunk for streaming diarization (best-effort).""" - if self._diarization_engine is None: - return - if meeting_id in self._diarization_streaming_failed: - return - if audio.size == 0: - return - - if not self._diarization_engine.is_streaming_loaded: - try: - self._diarization_engine.load_streaming_model() - except (RuntimeError, ValueError) as exc: - logger.warning( - "Streaming diarization disabled for meeting %s: %s", - meeting_id, - exc, - ) - self._diarization_streaming_failed.add(meeting_id) - return - - stream_time = self._diarization_stream_time.get(meeting_id, 0.0) - duration = len(audio) / self.DEFAULT_SAMPLE_RATE - - try: - turns = self._diarization_engine.process_chunk( - audio, - sample_rate=self.DEFAULT_SAMPLE_RATE, - ) - except Exception as exc: - logger.warning( - "Streaming diarization failed for meeting %s: %s", - meeting_id, - exc, - ) - self._diarization_streaming_failed.add(meeting_id) - return - - diarization_turns = self._diarization_turns.setdefault(meeting_id, []) - for turn in turns: - diarization_turns.append( - SpeakerTurn( - speaker=turn.speaker, - start=turn.start + stream_time, - end=turn.end + stream_time, - confidence=turn.confidence, - ) - ) - - self._diarization_stream_time[meeting_id] = stream_time + duration - - async def _flush_segmenter( - self, - meeting_id: str, - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Flush remaining audio from segmenter at stream end. - - Args: - meeting_id: Meeting identifier. - - Yields: - TranscriptUpdates for final segment. - """ - segmenter = self._segmenters.get(meeting_id) - if segmenter is None: - return - - # Clear partial buffer since we're flushing to final - self._clear_partial_buffer(meeting_id) - - final_segment = segmenter.flush() - if final_segment is not None: - async for update in self._process_audio_segment( - meeting_id, - final_segment.audio, - final_segment.start_time, - ): - yield update - - async def _process_audio_segment( - self, - meeting_id: str, - audio: NDArray[np.float32], - segment_start_time: float, - ) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]: - """Process a complete audio segment through ASR. - - Args: - meeting_id: Meeting identifier. - audio: Complete audio segment. - segment_start_time: Segment start time in stream seconds. - - Yields: - TranscriptUpdates for transcribed segments. - """ - if len(audio) == 0 or self._asr_engine is None: - return - - if self._use_database(): - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) - if meeting is None: - return - - results = await self._asr_engine.transcribe_async(audio) - for result in results: - segment_id = self._next_segment_id( - meeting_id, - fallback=meeting.next_segment_id, - ) - segment = self._create_segment_from_asr( - meeting.id, - segment_id, - result, - segment_start_time, - ) - self._maybe_assign_speaker(meeting_id, segment) - meeting.add_segment(segment) - await uow.segments.add(meeting.id, segment) - await uow.commit() - yield self._segment_to_proto_update(meeting_id, segment) - else: - store = self._get_memory_store() - meeting = store.get(meeting_id) - if meeting is None: - return - results = await self._asr_engine.transcribe_async(audio) - for result in results: - segment_id = self._next_segment_id( - meeting_id, - fallback=meeting.next_segment_id, - ) - segment = self._create_segment_from_asr( - meeting.id, - segment_id, - result, - segment_start_time, - ) - self._maybe_assign_speaker(meeting_id, segment) - store.add_segment(meeting_id, segment) - yield self._segment_to_proto_update(meeting_id, segment) - - def _create_vad_update( - self, - meeting_id: str, - update_type: int, - ) -> noteflow_pb2.TranscriptUpdate: - """Create a VAD event update. - - Args: - meeting_id: Meeting identifier. - update_type: VAD_START or VAD_END. - - Returns: - TranscriptUpdate with VAD event. - """ - return noteflow_pb2.TranscriptUpdate( - meeting_id=meeting_id, - update_type=update_type, - server_timestamp=time.time(), - ) - - def _create_segment_from_asr( - self, - meeting_id: MeetingId, - segment_id: int, - result: AsrResult, - segment_start_time: float, - ) -> Segment: - """Create a Segment from ASR result. - - Use converters to transform ASR DTO to domain entities. - """ - words = AsrConverter.result_to_domain_words(result) - if segment_start_time: - for word in words: - word.start_time += segment_start_time - word.end_time += segment_start_time - - return Segment( - segment_id=segment_id, - text=result.text, - start_time=result.start + segment_start_time, - end_time=result.end + segment_start_time, - meeting_id=meeting_id, - words=words, - language=result.language, - language_confidence=result.language_probability, - avg_logprob=result.avg_logprob, - no_speech_prob=result.no_speech_prob, - ) - - def _maybe_assign_speaker(self, meeting_id: str, segment: Segment) -> None: - """Assign speaker to a segment using streaming diarization turns (best-effort).""" - if self._diarization_engine is None: - return - if meeting_id in self._diarization_streaming_failed: - return - turns = self._diarization_turns.get(meeting_id) - if not turns: - return - - speaker_id, confidence = assign_speaker( - segment.start_time, - segment.end_time, - turns, - ) - if speaker_id is None: - return - - segment.speaker_id = speaker_id - segment.speaker_confidence = confidence - - def _segment_to_proto_update( - self, - meeting_id: str, - segment: Segment, - ) -> noteflow_pb2.TranscriptUpdate: - """Convert domain Segment to protobuf TranscriptUpdate.""" - words = [ - noteflow_pb2.WordTiming( - word=w.word, - start_time=w.start_time, - end_time=w.end_time, - probability=w.probability, - ) - for w in segment.words - ] - final_segment = noteflow_pb2.FinalSegment( - segment_id=segment.segment_id, - text=segment.text, - start_time=segment.start_time, - end_time=segment.end_time, - words=words, - language=segment.language, - language_confidence=segment.language_confidence, - avg_logprob=segment.avg_logprob, - no_speech_prob=segment.no_speech_prob, - speaker_id=segment.speaker_id or "", - speaker_confidence=segment.speaker_confidence, - ) - return noteflow_pb2.TranscriptUpdate( - meeting_id=meeting_id, - update_type=noteflow_pb2.UPDATE_TYPE_FINAL, - segment=final_segment, - server_timestamp=time.time(), - ) - - async def CreateMeeting( - self, - request: noteflow_pb2.CreateMeetingRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Meeting: - """Create a new meeting.""" - metadata = dict(request.metadata) if request.metadata else {} - - if self._use_database(): - async with self._create_uow() as uow: - meeting = Meeting.create(title=request.title, metadata=metadata) - saved = await uow.meetings.create(meeting) - await uow.commit() - return self._meeting_to_proto(saved) - else: - store = self._get_memory_store() - meeting = store.create(title=request.title, metadata=metadata) - return self._meeting_to_proto(meeting) - - async def StopMeeting( - self, - request: noteflow_pb2.StopMeetingRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Meeting: - """Stop a meeting using graceful STOPPING -> STOPPED transition.""" - meeting_id = request.meeting_id - - # Close audio writer if open - if meeting_id in self._audio_writers: - self._close_audio_writer(meeting_id) - - if self._use_database(): - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(UUID(meeting_id))) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {meeting_id} not found", - ) - try: - # Graceful shutdown: RECORDING -> STOPPING -> STOPPED - meeting.begin_stopping() - meeting.stop_recording() - except ValueError as e: - await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) - await uow.meetings.update(meeting) - await uow.commit() - return self._meeting_to_proto(meeting) - store = self._get_memory_store() - meeting = store.get(meeting_id) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {meeting_id} not found", - ) - try: - # Graceful shutdown: RECORDING -> STOPPING -> STOPPED - meeting.begin_stopping() - meeting.stop_recording() - except ValueError as e: - await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) - store.update(meeting) - return self._meeting_to_proto(meeting) - - async def refine_speaker_diarization( - self, - meeting_id: str, - num_speakers: int | None = None, - ) -> int: - """Run post-meeting speaker diarization refinement. - - Loads the full meeting audio, runs offline diarization, and updates - segment speaker assignments. This provides higher quality speaker - labels than streaming diarization. - - Args: - meeting_id: Meeting UUID string. - num_speakers: Known number of speakers (None for auto-detect). - - Returns: - Number of segments updated with speaker labels. - - Raises: - RuntimeError: If diarization engine not available or meeting not found. - """ - turns = await asyncio.to_thread( - self._run_diarization_inference, - meeting_id, - num_speakers, - ) - - updated_count = await self._apply_diarization_turns(meeting_id, turns) - - logger.info( - "Updated %d segments with speaker labels for meeting %s", - updated_count, - meeting_id, - ) - - return updated_count - - def _run_diarization_inference( - self, - meeting_id: str, - num_speakers: int | None, - ) -> list[SpeakerTurn]: - """Run offline diarization and return speaker turns (blocking).""" - if self._diarization_engine is None: - raise RuntimeError("Diarization engine not configured") - - if not self._diarization_engine.is_offline_loaded: - logger.info("Loading offline diarization model for refinement...") - self._diarization_engine.load_offline_model() - - audio_reader = MeetingAudioReader(self._crypto, self._meetings_dir) - if not audio_reader.audio_exists(meeting_id): - raise RuntimeError("No audio file found for meeting") - - logger.info("Loading audio for meeting %s", meeting_id) - try: - audio_chunks = audio_reader.load_meeting_audio(meeting_id) - except (FileNotFoundError, ValueError) as exc: - raise RuntimeError(f"Failed to load audio: {exc}") from exc - - if not audio_chunks: - raise RuntimeError("No audio chunks loaded for meeting") - - sample_rate = audio_reader.sample_rate - all_audio = np.concatenate([chunk.frames for chunk in audio_chunks]) - - logger.info( - "Running offline diarization on %.2f seconds of audio", - len(all_audio) / sample_rate, - ) - - turns = self._diarization_engine.diarize_full( - all_audio, - sample_rate=sample_rate, - num_speakers=num_speakers, - ) - - logger.info("Diarization found %d speaker turns", len(turns)) - return list(turns) - - async def _apply_diarization_turns( - self, - meeting_id: str, - turns: list[SpeakerTurn], - ) -> int: - """Apply diarization turns to segments and return updated count.""" - updated_count = 0 - - if self._use_database(): - async with self._create_uow() as uow: - segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) - for segment in segments: - if segment.db_id is None: - continue - speaker_id, confidence = assign_speaker( - segment.start_time, - segment.end_time, - turns, - ) - if speaker_id is None: - continue - await uow.segments.update_speaker( - segment.db_id, - speaker_id, - confidence, - ) - updated_count += 1 - await uow.commit() - else: - store = self._get_memory_store() - if meeting := store.get(meeting_id): - for segment in meeting.segments: - speaker_id, confidence = assign_speaker( - segment.start_time, - segment.end_time, - turns, - ) - if speaker_id is None: - continue - segment.speaker_id = speaker_id - segment.speaker_confidence = confidence - updated_count += 1 - - return updated_count - - async def _collect_speaker_ids(self, meeting_id: str) -> list[str]: - """Collect distinct speaker IDs for a meeting.""" - if self._use_database(): - async with self._create_uow() as uow: - segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id))) - return sorted({s.speaker_id for s in segments if s.speaker_id}) - store = self._get_memory_store() - if meeting := store.get(meeting_id): - return sorted({s.speaker_id for s in meeting.segments if s.speaker_id}) - return [] - - async def ListMeetings( - self, - request: noteflow_pb2.ListMeetingsRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.ListMeetingsResponse: - """List meetings.""" - limit = request.limit or 100 - offset = request.offset or 0 - sort_desc = request.sort_order != noteflow_pb2.SORT_ORDER_CREATED_ASC - - if self._use_database(): - states = [MeetingState(s) for s in request.states] if request.states else None - async with self._create_uow() as uow: - meetings, total = await uow.meetings.list_all( - states=states, - limit=limit, - offset=offset, - sort_desc=sort_desc, - ) - return noteflow_pb2.ListMeetingsResponse( - meetings=[self._meeting_to_proto(m, include_segments=False) for m in meetings], - total_count=total, - ) - else: - store = self._get_memory_store() - states = [MeetingState(s) for s in request.states] if request.states else None - meetings, total = store.list_all( - states=states, - limit=limit, - offset=offset, - sort_desc=sort_desc, - ) - return noteflow_pb2.ListMeetingsResponse( - meetings=[self._meeting_to_proto(m, include_segments=False) for m in meetings], - total_count=total, - ) - - async def GetMeeting( - self, - request: noteflow_pb2.GetMeetingRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Meeting: - """Get meeting details.""" - if self._use_database(): - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(UUID(request.meeting_id))) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - # Load segments if requested - if request.include_segments: - segments = await uow.segments.get_by_meeting(meeting.id) - meeting.segments = list(segments) - # Load summary if requested - if request.include_summary: - summary = await uow.summaries.get_by_meeting(meeting.id) - meeting.summary = summary - return self._meeting_to_proto( - meeting, - include_segments=request.include_segments, - include_summary=request.include_summary, - ) - store = self._get_memory_store() - meeting = store.get(request.meeting_id) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - return self._meeting_to_proto( - meeting, - include_segments=request.include_segments, - include_summary=request.include_summary, - ) - - async def DeleteMeeting( - self, - request: noteflow_pb2.DeleteMeetingRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.DeleteMeetingResponse: - """Delete a meeting.""" - if self._use_database(): - async with self._create_uow() as uow: - success = await uow.meetings.delete(MeetingId(UUID(request.meeting_id))) - if success: - await uow.commit() - return noteflow_pb2.DeleteMeetingResponse(success=True) - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - store = self._get_memory_store() - success = store.delete(request.meeting_id) - if not success: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - return noteflow_pb2.DeleteMeetingResponse(success=True) - - async def GenerateSummary( - self, - request: noteflow_pb2.GenerateSummaryRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Summary: - """Generate meeting summary using SummarizationService with fallback.""" - if self._use_database(): - return await self._generate_summary_db(request, context) - - return await self._generate_summary_memory(request, context) - - async def _generate_summary_db( - self, - request: noteflow_pb2.GenerateSummaryRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Summary: - """Generate summary for a meeting stored in the database. - - The potentially slow summarization step is executed outside the UoW to - avoid holding database connections while waiting on LLMs. - """ - - meeting_id = MeetingId(UUID(request.meeting_id)) - - # 1) Load meeting, existing summary, and segments inside a short UoW - async with self._create_uow() as uow: - meeting = await uow.meetings.get(meeting_id) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - - existing = await uow.summaries.get_by_meeting(meeting.id) - if existing and not request.force_regenerate: - return self._summary_to_proto(existing) - - segments = list(await uow.segments.get_by_meeting(meeting.id)) - - # 2) Run summarization outside DB transaction - summary = await self._summarize_or_placeholder(meeting_id, segments) - - # 3) Persist in a fresh UoW - async with self._create_uow() as uow: - saved = await uow.summaries.save(summary) - await uow.commit() - - return self._summary_to_proto(saved) - - async def _generate_summary_memory( - self, - request: noteflow_pb2.GenerateSummaryRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Summary: - """Generate summary for meetings held in the in-memory store.""" - - store = self._get_memory_store() - meeting = store.get(request.meeting_id) - if meeting is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Meeting {request.meeting_id} not found", - ) - - if meeting.summary and not request.force_regenerate: - return self._summary_to_proto(meeting.summary) - - summary = await self._summarize_or_placeholder(meeting.id, meeting.segments) - store.set_summary(request.meeting_id, summary) - return self._summary_to_proto(summary) - - async def _summarize_or_placeholder( - self, - meeting_id: MeetingId, - segments: list[Segment], - ) -> Summary: - """Try to summarize via service, fallback to placeholder on failure.""" - if self._summarization_service is None: - logger.warning("SummarizationService not configured; using placeholder summary") - return self._generate_placeholder_summary(meeting_id, segments) - - try: - result = await self._summarization_service.summarize( - meeting_id=meeting_id, - segments=segments, - ) - logger.info( - "Generated summary using %s (fallback=%s)", - result.provider_used, - result.fallback_used, - ) - return result.summary - except ProviderUnavailableError as exc: - logger.warning("Summarization provider unavailable; using placeholder: %s", exc) - except (TimeoutError, RuntimeError, ValueError) as exc: - logger.exception( - "Summarization failed (%s); using placeholder summary", type(exc).__name__ - ) - - return self._generate_placeholder_summary(meeting_id, segments) - - def _generate_placeholder_summary( - self, - meeting_id: MeetingId, - segments: list[Segment], - ) -> Summary: - """Generate a lightweight placeholder summary when summarization fails.""" - full_text = " ".join(s.text for s in segments) - executive = f"{full_text[:200]}..." if len(full_text) > 200 else full_text - executive = executive or "No transcript available." - - return Summary( - meeting_id=meeting_id, - executive_summary=executive, - key_points=[], - action_items=[], - model_version="placeholder-v0", - ) - async def GetServerInfo( self, request: noteflow_pb2.ServerInfoRequest, context: grpc.aio.ServicerContext, ) -> noteflow_pb2.ServerInfo: - """Get server information.""" asr_model = "" asr_ready = False if self._asr_engine: asr_ready = self._asr_engine.is_loaded asr_model = self._asr_engine.model_size or "" - diarization_enabled = self._diarization_engine is not None diarization_ready = self._diarization_engine is not None and ( self._diarization_engine.is_streaming_loaded or self._diarization_engine.is_offline_loaded ) - if self._use_database(): active = await self._count_active_meetings_db() else: active = self._get_memory_store().active_count - return noteflow_pb2.ServerInfo( version=self.VERSION, asr_model=asr_model, @@ -30339,503 +14027,4 @@ class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer): diarization_enabled=diarization_enabled, diarization_ready=diarization_ready, ) - - def _meeting_to_proto( - self, - meeting: Meeting, - include_segments: bool = True, - include_summary: bool = True, - ) -> noteflow_pb2.Meeting: - """Convert domain Meeting to protobuf.""" - segments = [] - if include_segments: - for seg in meeting.segments: - words = [ - noteflow_pb2.WordTiming( - word=w.word, - start_time=w.start_time, - end_time=w.end_time, - probability=w.probability, - ) - for w in seg.words - ] - segments.append( - noteflow_pb2.FinalSegment( - segment_id=seg.segment_id, - text=seg.text, - start_time=seg.start_time, - end_time=seg.end_time, - words=words, - language=seg.language, - language_confidence=seg.language_confidence, - avg_logprob=seg.avg_logprob, - no_speech_prob=seg.no_speech_prob, - speaker_id=seg.speaker_id or "", - speaker_confidence=seg.speaker_confidence, - ) - ) - - summary = None - if include_summary and meeting.summary: - summary = self._summary_to_proto(meeting.summary) - - return noteflow_pb2.Meeting( - id=str(meeting.id), - title=meeting.title, - state=meeting.state.value, - created_at=meeting.created_at.timestamp(), - started_at=meeting.started_at.timestamp() if meeting.started_at else 0, - ended_at=meeting.ended_at.timestamp() if meeting.ended_at else 0, - duration_seconds=meeting.duration_seconds, - segments=segments, - summary=summary, - metadata=meeting.metadata, - ) - - def _summary_to_proto(self, summary: Summary) -> noteflow_pb2.Summary: - """Convert domain Summary to protobuf.""" - key_points = [ - noteflow_pb2.KeyPoint( - text=kp.text, - segment_ids=kp.segment_ids, - start_time=kp.start_time, - end_time=kp.end_time, - ) - for kp in summary.key_points - ] - action_items = [ - noteflow_pb2.ActionItem( - text=ai.text, - assignee=ai.assignee, - due_date=ai.due_date.timestamp() if ai.due_date is not None else 0, - priority=ai.priority, - segment_ids=ai.segment_ids, - ) - for ai in summary.action_items - ] - return noteflow_pb2.Summary( - meeting_id=str(summary.meeting_id), - executive_summary=summary.executive_summary, - key_points=key_points, - action_items=action_items, - generated_at=( - summary.generated_at.timestamp() if summary.generated_at is not None else 0 - ), - model_version=summary.model_version, - ) - - # ========================================================================= - # Annotation Methods - # ========================================================================= - - async def AddAnnotation( - self, - request: noteflow_pb2.AddAnnotationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Annotation: - """Add an annotation to a meeting.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - annotation_type = self._proto_to_annotation_type(request.annotation_type) - from uuid import uuid4 - - annotation = Annotation( - id=AnnotationId(uuid4()), - meeting_id=MeetingId(UUID(request.meeting_id)), - annotation_type=annotation_type, - text=request.text, - start_time=request.start_time, - end_time=request.end_time, - segment_ids=list(request.segment_ids), - ) - - async with self._create_uow() as uow: - saved = await uow.annotations.add(annotation) - await uow.commit() - return self._annotation_to_proto(saved) - - async def GetAnnotation( - self, - request: noteflow_pb2.GetAnnotationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Annotation: - """Get an annotation by ID.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - async with self._create_uow() as uow: - annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) - if annotation is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Annotation {request.annotation_id} not found", - ) - return self._annotation_to_proto(annotation) - - async def ListAnnotations( - self, - request: noteflow_pb2.ListAnnotationsRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.ListAnnotationsResponse: - """List annotations for a meeting.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - async with self._create_uow() as uow: - meeting_id = MeetingId(UUID(request.meeting_id)) - # Check if time range filter is specified - if request.start_time > 0 or request.end_time > 0: - annotations = await uow.annotations.get_by_time_range( - meeting_id, - request.start_time, - request.end_time, - ) - else: - annotations = await uow.annotations.get_by_meeting(meeting_id) - - return noteflow_pb2.ListAnnotationsResponse( - annotations=[self._annotation_to_proto(a) for a in annotations] - ) - - async def UpdateAnnotation( - self, - request: noteflow_pb2.UpdateAnnotationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.Annotation: - """Update an existing annotation.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - async with self._create_uow() as uow: - annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id))) - if annotation is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Annotation {request.annotation_id} not found", - ) - - # Update fields if provided - if request.annotation_type != noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED: - annotation.annotation_type = self._proto_to_annotation_type(request.annotation_type) - if request.text: - annotation.text = request.text - if request.start_time > 0: - annotation.start_time = request.start_time - if request.end_time > 0: - annotation.end_time = request.end_time - if request.segment_ids: - annotation.segment_ids = list(request.segment_ids) - - updated = await uow.annotations.update(annotation) - await uow.commit() - return self._annotation_to_proto(updated) - - async def DeleteAnnotation( - self, - request: noteflow_pb2.DeleteAnnotationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.DeleteAnnotationResponse: - """Delete an annotation.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Annotations require database persistence", - ) - - async with self._create_uow() as uow: - success = await uow.annotations.delete(AnnotationId(UUID(request.annotation_id))) - if success: - await uow.commit() - return noteflow_pb2.DeleteAnnotationResponse(success=True) - await context.abort( - grpc.StatusCode.NOT_FOUND, - f"Annotation {request.annotation_id} not found", - ) - - def _annotation_to_proto( - self, - annotation: Annotation, - ) -> noteflow_pb2.Annotation: - """Convert domain Annotation to protobuf.""" - return noteflow_pb2.Annotation( - id=str(annotation.id), - meeting_id=str(annotation.meeting_id), - annotation_type=self._annotation_type_to_proto(annotation.annotation_type), - text=annotation.text, - start_time=annotation.start_time, - end_time=annotation.end_time, - segment_ids=annotation.segment_ids, - created_at=annotation.created_at.timestamp(), - ) - - def _annotation_type_to_proto( - self, - annotation_type: AnnotationType, - ) -> int: - """Convert domain AnnotationType to protobuf enum.""" - mapping = { - AnnotationType.ACTION_ITEM: noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM, - AnnotationType.DECISION: noteflow_pb2.ANNOTATION_TYPE_DECISION, - AnnotationType.NOTE: noteflow_pb2.ANNOTATION_TYPE_NOTE, - AnnotationType.RISK: noteflow_pb2.ANNOTATION_TYPE_RISK, - } - return mapping.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED) - - def _proto_to_annotation_type( - self, - proto_type: int, - ) -> AnnotationType: - """Convert protobuf enum to domain AnnotationType.""" - mapping: dict[int, AnnotationType] = { - int(noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM): AnnotationType.ACTION_ITEM, - int(noteflow_pb2.ANNOTATION_TYPE_DECISION): AnnotationType.DECISION, - int(noteflow_pb2.ANNOTATION_TYPE_NOTE): AnnotationType.NOTE, - int(noteflow_pb2.ANNOTATION_TYPE_RISK): AnnotationType.RISK, - } - return mapping.get(proto_type, AnnotationType.NOTE) - - # ========================================================================= - # Export Methods - # ========================================================================= - - async def ExportTranscript( - self, - request: noteflow_pb2.ExportTranscriptRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.ExportTranscriptResponse: - """Export meeting transcript to specified format.""" - if not self._use_database(): - await context.abort( - grpc.StatusCode.UNIMPLEMENTED, - "Export requires database persistence", - ) - - # Map proto format to ExportFormat - fmt = self._proto_to_export_format(request.format) - - export_service = ExportService(self._create_uow()) - try: - content = await export_service.export_transcript( - MeetingId(UUID(request.meeting_id)), - fmt, - ) - exporter_info = export_service.get_supported_formats() - fmt_name = "" - fmt_ext = "" - for name, ext in exporter_info: - if fmt == ExportFormat.MARKDOWN and ext == ".md": - fmt_name, fmt_ext = name, ext - break - if fmt == ExportFormat.HTML and ext == ".html": - fmt_name, fmt_ext = name, ext - break - - return noteflow_pb2.ExportTranscriptResponse( - content=content, - format_name=fmt_name, - file_extension=fmt_ext, - ) - except ValueError as e: - await context.abort( - grpc.StatusCode.NOT_FOUND, - str(e), - ) - - def _proto_to_export_format(self, proto_format: int) -> ExportFormat: - """Convert protobuf ExportFormat to domain ExportFormat.""" - if proto_format == noteflow_pb2.EXPORT_FORMAT_HTML: - return ExportFormat.HTML - return ExportFormat.MARKDOWN # Default to Markdown - - # ========================================================================= - # Speaker Diarization Methods - # ========================================================================= - - async def RefineSpeakerDiarization( - self, - request: noteflow_pb2.RefineSpeakerDiarizationRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.RefineSpeakerDiarizationResponse: - """Run post-meeting speaker diarization refinement. - - Loads the full meeting audio, runs offline diarization, and updates - segment speaker assignments. - """ - if self._diarization_engine is None: - response = noteflow_pb2.RefineSpeakerDiarizationResponse() - response.segments_updated = 0 - response.speaker_ids[:] = [] - response.error_message = "Diarization not enabled on server" - response.job_id = "" - response.status = noteflow_pb2.JOB_STATUS_FAILED - return response - - try: - meeting_uuid = UUID(request.meeting_id) - except ValueError: - response = noteflow_pb2.RefineSpeakerDiarizationResponse() - response.segments_updated = 0 - response.speaker_ids[:] = [] - response.error_message = "Invalid meeting_id" - response.job_id = "" - response.status = noteflow_pb2.JOB_STATUS_FAILED - return response - - if self._use_database(): - async with self._create_uow() as uow: - meeting = await uow.meetings.get(MeetingId(meeting_uuid)) - else: - store = self._get_memory_store() - meeting = store.get(request.meeting_id) - if meeting is None: - response = noteflow_pb2.RefineSpeakerDiarizationResponse() - response.segments_updated = 0 - response.speaker_ids[:] = [] - response.error_message = "Meeting not found" - response.job_id = "" - response.status = noteflow_pb2.JOB_STATUS_FAILED - return response - meeting_state = meeting.state - if meeting_state in ( - MeetingState.UNSPECIFIED, - MeetingState.CREATED, - MeetingState.RECORDING, - MeetingState.STOPPING, - ): - response = noteflow_pb2.RefineSpeakerDiarizationResponse() - response.segments_updated = 0 - response.speaker_ids[:] = [] - response.error_message = ( - "Meeting must be stopped before refinement " - f"(state: {meeting_state.name.lower()})" - ) - response.job_id = "" - response.status = noteflow_pb2.JOB_STATUS_FAILED - return response - - num_speakers = request.num_speakers if request.num_speakers > 0 else None - - job_id = str(uuid4()) - job = _DiarizationJob( - job_id=job_id, - meeting_id=request.meeting_id, - status=noteflow_pb2.JOB_STATUS_QUEUED, - ) - self._diarization_jobs[job_id] = job - - # Task runs in background, no need to await - task = asyncio.create_task(self._run_diarization_job(job_id, num_speakers)) - job.task = task - - response = noteflow_pb2.RefineSpeakerDiarizationResponse() - response.segments_updated = 0 - response.speaker_ids[:] = [] - response.error_message = "" - response.job_id = job_id - response.status = noteflow_pb2.JOB_STATUS_QUEUED - return response - - async def _run_diarization_job(self, job_id: str, num_speakers: int | None) -> None: - job = self._diarization_jobs.get(job_id) - if job is None: - return - - job.status = noteflow_pb2.JOB_STATUS_RUNNING - job.updated_at = time.time() - - try: - updated_count = await self.refine_speaker_diarization( - meeting_id=job.meeting_id, - num_speakers=num_speakers, - ) - speaker_ids = await self._collect_speaker_ids(job.meeting_id) - job.segments_updated = updated_count - job.speaker_ids = speaker_ids - job.status = noteflow_pb2.JOB_STATUS_COMPLETED - except Exception as exc: - logger.exception("Diarization failed for meeting %s", job.meeting_id) - job.error_message = str(exc) - job.status = noteflow_pb2.JOB_STATUS_FAILED - finally: - job.updated_at = time.time() - - async def RenameSpeaker( - self, - request: noteflow_pb2.RenameSpeakerRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.RenameSpeakerResponse: - """Rename a speaker ID in all segments of a meeting. - - Updates all segments where speaker_id matches old_speaker_id - to use new_speaker_name instead. - """ - if not request.old_speaker_id or not request.new_speaker_name: - await context.abort( - grpc.StatusCode.INVALID_ARGUMENT, - "old_speaker_id and new_speaker_name are required", - ) - - try: - meeting_uuid = UUID(request.meeting_id) - except ValueError: - await context.abort( - grpc.StatusCode.INVALID_ARGUMENT, - "Invalid meeting_id", - ) - - updated_count = 0 - - if self._use_database(): - async with self._create_uow() as uow: - segments = await uow.segments.get_by_meeting(MeetingId(meeting_uuid)) - - for segment in segments: - if segment.speaker_id == request.old_speaker_id and segment.db_id: - await uow.segments.update_speaker( - segment.db_id, - request.new_speaker_name, - segment.speaker_confidence, - ) - updated_count += 1 - - await uow.commit() - else: - store = self._get_memory_store() - if meeting := store.get(request.meeting_id): - for segment in meeting.segments: - if segment.speaker_id == request.old_speaker_id: - segment.speaker_id = request.new_speaker_name - updated_count += 1 - - return noteflow_pb2.RenameSpeakerResponse( - segments_updated=updated_count, - success=updated_count > 0, - ) - - async def GetDiarizationJobStatus( - self, - request: noteflow_pb2.GetDiarizationJobStatusRequest, - context: grpc.aio.ServicerContext, - ) -> noteflow_pb2.DiarizationJobStatus: - """Return current status for a diarization job.""" - job = self._diarization_jobs.get(request.job_id) - if job is None: - await context.abort( - grpc.StatusCode.NOT_FOUND, - "Diarization job not found", - ) - return job.to_proto() ```` diff --git a/repomix.config.json b/repomix.config.json index 949807f..edc7cd8 100644 --- a/repomix.config.json +++ b/repomix.config.json @@ -6,12 +6,12 @@ "output": { "filePath": "repomix-output.md", "style": "markdown", - "parsableStyle": false, + "parsableStyle": true, "fileSummary": true, "directoryStructure": true, "files": true, - "removeComments": false, - "removeEmptyLines": false, + "removeComments": true, + "removeEmptyLines": true, "compress": false, "topFilesLength": 5, "showLineNumbers": false, diff --git a/scripts/dev_watch_server.py b/scripts/dev_watch_server.py index 396e19d..fd918d5 100644 --- a/scripts/dev_watch_server.py +++ b/scripts/dev_watch_server.py @@ -21,7 +21,9 @@ def run_server() -> None: def main() -> None: root = Path(__file__).resolve().parents[1] watch_paths = [root / "src" / "noteflow", root / "alembic.ini"] - existing_paths = [str(path) for path in watch_paths if path.exists()] or [str(root / "src" / "noteflow")] + existing_paths = [str(path) for path in watch_paths if path.exists()] or [ + str(root / "src" / "noteflow") + ] run_process( *existing_paths, diff --git a/src/noteflow/application/services/recovery_service.py b/src/noteflow/application/services/recovery_service.py index 0dbb450..f36097e 100644 --- a/src/noteflow/application/services/recovery_service.py +++ b/src/noteflow/application/services/recovery_service.py @@ -1,12 +1,15 @@ """Recovery service for crash recovery on startup. Detect and recover meetings left in active states after server restart. +Optionally validate audio file integrity for crashed meetings. """ from __future__ import annotations import logging +from dataclasses import dataclass from datetime import UTC, datetime +from pathlib import Path from typing import TYPE_CHECKING, ClassVar from noteflow.domain.value_objects import MeetingState @@ -18,11 +21,37 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +@dataclass(frozen=True) +class AudioValidationResult: + """Result of audio file validation for a meeting.""" + + is_valid: bool + manifest_exists: bool + audio_exists: bool + error_message: str | None = None + + +@dataclass(frozen=True) +class RecoveryResult: + """Result of crash recovery operations.""" + + meetings_recovered: int + diarization_jobs_failed: int + audio_validation_failures: int = 0 + + @property + def total_recovered(self) -> int: + """Total items recovered across all types.""" + return self.meetings_recovered + self.diarization_jobs_failed + + class RecoveryService: """Recover meetings from crash states on server startup. Find meetings left in RECORDING or STOPPING state and mark them as ERROR. This handles the case where the server crashed during an active meeting. + + Optionally validates audio file integrity if crypto and meetings_dir are provided. """ ACTIVE_STATES: ClassVar[list[MeetingState]] = [ @@ -30,22 +59,92 @@ class RecoveryService: MeetingState.STOPPING, ] - def __init__(self, uow: UnitOfWork) -> None: + def __init__( + self, + uow: UnitOfWork, + meetings_dir: Path | None = None, + ) -> None: """Initialize recovery service. Args: uow: Unit of work for persistence. + meetings_dir: Optional meetings directory for audio validation. + If provided, validates that audio files exist for crashed meetings. """ self._uow = uow + self._meetings_dir = meetings_dir - async def recover_crashed_meetings(self) -> list[Meeting]: + def _validate_meeting_audio(self, meeting: Meeting) -> AudioValidationResult: + """Validate audio files for a crashed meeting. + + Check that manifest.json and audio.enc exist in the meeting directory. + + Args: + meeting: Meeting to validate. + + Returns: + AudioValidationResult with validation status. + """ + if self._meetings_dir is None: + return AudioValidationResult( + is_valid=True, + manifest_exists=True, + audio_exists=True, + error_message="Audio validation skipped (no meetings_dir configured)", + ) + + # Prefer explicit asset_path; fall back to metadata for backward compatibility + default_path = str(meeting.id) + asset_path = meeting.asset_path or default_path + if asset_path == default_path: + asset_path = meeting.metadata.get("asset_path") or asset_path + meeting_dir = self._meetings_dir / asset_path + + manifest_path = meeting_dir / "manifest.json" + audio_path = meeting_dir / "audio.enc" + + manifest_exists = manifest_path.exists() + audio_exists = audio_path.exists() + + if not manifest_exists and not audio_exists: + return AudioValidationResult( + is_valid=False, + manifest_exists=False, + audio_exists=False, + error_message="Meeting directory missing or empty", + ) + + if not manifest_exists: + return AudioValidationResult( + is_valid=False, + manifest_exists=False, + audio_exists=audio_exists, + error_message="manifest.json not found", + ) + + if not audio_exists: + return AudioValidationResult( + is_valid=False, + manifest_exists=True, + audio_exists=False, + error_message="audio.enc not found", + ) + + return AudioValidationResult( + is_valid=True, + manifest_exists=True, + audio_exists=True, + ) + + async def recover_crashed_meetings(self) -> tuple[list[Meeting], int]: """Find and recover meetings left in active states. Mark all meetings in RECORDING or STOPPING state as ERROR - with metadata explaining the crash recovery. + with metadata explaining the crash recovery. Also validates + audio file integrity if meetings_dir is configured. Returns: - List of recovered meetings. + Tuple of (recovered meetings, audio validation failure count). """ async with self._uow: # Find all meetings in active states @@ -56,7 +155,7 @@ class RecoveryService: if total == 0: logger.info("No crashed meetings found during recovery") - return [] + return [], 0 logger.warning( "Found %d meetings in active state during startup, marking as ERROR", @@ -64,6 +163,7 @@ class RecoveryService: ) recovered: list[Meeting] = [] + audio_failures = 0 recovery_time = datetime.now(UTC).isoformat() for meeting in meetings: @@ -75,18 +175,35 @@ class RecoveryService: meeting.metadata["crash_recovery_time"] = recovery_time meeting.metadata["crash_previous_state"] = previous_state + # Validate audio files if configured + validation = self._validate_meeting_audio(meeting) + meeting.metadata["audio_valid"] = str(validation.is_valid).lower() + if not validation.is_valid: + audio_failures += 1 + meeting.metadata["audio_error"] = validation.error_message or "unknown" + logger.warning( + "Audio validation failed for meeting %s: %s", + meeting.id, + validation.error_message, + ) + await self._uow.meetings.update(meeting) recovered.append(meeting) logger.info( - "Recovered crashed meeting: id=%s, previous_state=%s", + "Recovered crashed meeting: id=%s, previous_state=%s, audio_valid=%s", meeting.id, previous_state, + validation.is_valid, ) await self._uow.commit() - logger.info("Crash recovery complete: %d meetings recovered", len(recovered)) - return recovered + logger.info( + "Crash recovery complete: %d meetings recovered, %d audio failures", + len(recovered), + audio_failures, + ) + return recovered, audio_failures async def count_crashed_meetings(self) -> int: """Count meetings currently in crash states. @@ -99,3 +216,53 @@ class RecoveryService: for state in self.ACTIVE_STATES: total += await self._uow.meetings.count_by_state(state) return total + + async def recover_crashed_diarization_jobs(self) -> int: + """Mark diarization jobs left in running states as failed. + + Find all diarization jobs in QUEUED or RUNNING state and mark them + as FAILED with an error message explaining the crash recovery. + + Returns: + Number of jobs marked as failed. + """ + async with self._uow: + failed_count = await self._uow.diarization_jobs.mark_running_as_failed() + await self._uow.commit() + + if failed_count > 0: + logger.warning( + "Marked %d diarization jobs as failed during crash recovery", + failed_count, + ) + else: + logger.info("No crashed diarization jobs found during recovery") + + return failed_count + + async def recover_all(self) -> RecoveryResult: + """Run all crash recovery operations. + + Recovers crashed meetings and failed diarization jobs in a single + coordinated operation. + + Returns: + RecoveryResult with counts of recovered items. + """ + meetings, audio_failures = await self.recover_crashed_meetings() + jobs = await self.recover_crashed_diarization_jobs() + + result = RecoveryResult( + meetings_recovered=len(meetings), + diarization_jobs_failed=jobs, + audio_validation_failures=audio_failures, + ) + + if result.total_recovered > 0: + logger.warning( + "Crash recovery complete: %d meetings, %d diarization jobs", + result.meetings_recovered, + result.diarization_jobs_failed, + ) + + return result diff --git a/src/noteflow/application/services/summarization_service.py b/src/noteflow/application/services/summarization_service.py index b28bddd..b126c7e 100644 --- a/src/noteflow/application/services/summarization_service.py +++ b/src/noteflow/application/services/summarization_service.py @@ -27,6 +27,9 @@ if TYPE_CHECKING: # Type alias for persistence callback PersistCallback = Callable[[Summary], Awaitable[None]] + # Type alias for consent persistence callback + ConsentPersistCallback = Callable[[bool], Awaitable[None]] + logger = logging.getLogger(__name__) @@ -102,6 +105,7 @@ class SummarizationService: verifier: CitationVerifier | None = None settings: SummarizationServiceSettings = field(default_factory=SummarizationServiceSettings) on_persist: PersistCallback | None = None + on_consent_change: ConsentPersistCallback | None = None def register_provider(self, mode: SummarizationMode, provider: SummarizerProvider) -> None: """Register a provider for a specific mode. @@ -147,15 +151,19 @@ class SummarizationService: """ return mode in self.get_available_modes() - def grant_cloud_consent(self) -> None: + async def grant_cloud_consent(self) -> None: """Grant consent for cloud processing.""" self.settings.cloud_consent_granted = True logger.info("Cloud consent granted") + if self.on_consent_change: + await self.on_consent_change(True) - def revoke_cloud_consent(self) -> None: + async def revoke_cloud_consent(self) -> None: """Revoke consent for cloud processing.""" self.settings.cloud_consent_granted = False logger.info("Cloud consent revoked") + if self.on_consent_change: + await self.on_consent_change(False) async def summarize( self, diff --git a/src/noteflow/cli/retention.py b/src/noteflow/cli/retention.py index 46c1148..322ea85 100644 --- a/src/noteflow/cli/retention.py +++ b/src/noteflow/cli/retention.py @@ -5,8 +5,6 @@ Usage: python -m noteflow.cli.retention status """ -from __future__ import annotations - import argparse import asyncio import logging diff --git a/src/noteflow/client/components/_thread_mixin.py b/src/noteflow/client/components/_thread_mixin.py index daaf305..20a11e2 100644 --- a/src/noteflow/client/components/_thread_mixin.py +++ b/src/noteflow/client/components/_thread_mixin.py @@ -4,8 +4,6 @@ Provides standardized thread start/stop patterns for UI components that need background polling or timer threads. """ -from __future__ import annotations - import threading from collections.abc import Callable diff --git a/src/noteflow/client/components/meeting_library.py b/src/noteflow/client/components/meeting_library.py index dfe6564..5fce8b1 100644 --- a/src/noteflow/client/components/meeting_library.py +++ b/src/noteflow/client/components/meeting_library.py @@ -408,7 +408,10 @@ class MeetingLibraryComponent: if num_speakers < 1: num_speakers = None except ValueError: - logger.debug("Invalid speaker count input '%s', using auto-detection", self._num_speakers_field.value) + logger.debug( + "Invalid speaker count input '%s', using auto-detection", + self._num_speakers_field.value, + ) meeting_id = self._state.selected_meeting.id self._close_analyze_dialog() diff --git a/src/noteflow/client/components/vu_meter.py b/src/noteflow/client/components/vu_meter.py index 56125b9..bd244ee 100644 --- a/src/noteflow/client/components/vu_meter.py +++ b/src/noteflow/client/components/vu_meter.py @@ -5,7 +5,8 @@ Uses RmsLevelProvider from AppState (not a new instance). from __future__ import annotations -from typing import TYPE_CHECKING +import time +from typing import TYPE_CHECKING, Final import flet as ft import numpy as np @@ -14,6 +15,9 @@ from numpy.typing import NDArray if TYPE_CHECKING: from noteflow.client.state import AppState +# Throttle UI updates to 20 fps (50ms interval) +VU_UPDATE_INTERVAL: Final[float] = 0.05 + class VuMeterComponent: """Audio level visualization component. @@ -31,6 +35,7 @@ class VuMeterComponent: # REUSE level_provider from state - do not create new instance self._progress_bar: ft.ProgressBar | None = None self._label: ft.Text | None = None + self._last_update_time: float = 0.0 def build(self) -> ft.Row: """Build VU meter UI elements. @@ -59,10 +64,17 @@ class VuMeterComponent: """Process incoming audio frames for level metering. Uses state.level_provider.get_db() - existing RmsLevelProvider method. + Throttled to VU_UPDATE_INTERVAL to avoid excessive UI updates. Args: frames: Audio samples as float32 array. """ + now = time.time() + if now - self._last_update_time < VU_UPDATE_INTERVAL: + return # Throttle: skip update if within interval + + self._last_update_time = now + # REUSE existing RmsLevelProvider from state db_level = self._state.level_provider.get_db(frames) self._state.current_db_level = db_level diff --git a/src/noteflow/client/state.py b/src/noteflow/client/state.py index 4ab07e5..7f8f052 100644 --- a/src/noteflow/client/state.py +++ b/src/noteflow/client/state.py @@ -4,8 +4,6 @@ Composes existing types from grpc.client and infrastructure.audio. Does not recreate any dataclasses - imports and uses existing ones. """ -from __future__ import annotations - import logging from collections.abc import Callable from dataclasses import dataclass, field diff --git a/src/noteflow/config/constants.py b/src/noteflow/config/constants.py index 3d0f4a5..a8e0532 100644 --- a/src/noteflow/config/constants.py +++ b/src/noteflow/config/constants.py @@ -4,8 +4,6 @@ This module provides shared constants used across the codebase to avoid magic numbers and ensure consistency. """ -from __future__ import annotations - from typing import Final # Audio constants @@ -21,3 +19,10 @@ DEFAULT_GRPC_PORT: Final[int] = 50051 MAX_GRPC_MESSAGE_SIZE: Final[int] = 100 * 1024 * 1024 """Maximum gRPC message size in bytes (100 MB).""" + +# Audio encryption buffering constants +AUDIO_BUFFER_SIZE_BYTES: Final[int] = 320_000 +"""Target audio buffer size before encryption (320 KB = ~10 seconds at 16kHz PCM16).""" + +PERIODIC_FLUSH_INTERVAL_SECONDS: Final[float] = 2.0 +"""Interval for periodic audio buffer flush to disk (crash resilience).""" diff --git a/src/noteflow/config/settings.py b/src/noteflow/config/settings.py index 02c7e53..59e8179 100644 --- a/src/noteflow/config/settings.py +++ b/src/noteflow/config/settings.py @@ -1,7 +1,5 @@ """NoteFlow application settings using Pydantic settings.""" -from __future__ import annotations - import json from functools import lru_cache from pathlib import Path diff --git a/src/noteflow/domain/entities/meeting.py b/src/noteflow/domain/entities/meeting.py index 2a94146..8405a66 100644 --- a/src/noteflow/domain/entities/meeting.py +++ b/src/noteflow/domain/entities/meeting.py @@ -32,6 +32,7 @@ class Meeting: summary: Summary | None = None metadata: dict[str, str] = field(default_factory=dict) wrapped_dek: bytes | None = None # Encrypted data encryption key + asset_path: str | None = None # Relative path for audio assets @classmethod def create( @@ -60,6 +61,7 @@ class Meeting: state=MeetingState.CREATED, created_at=now, metadata=metadata or {}, + asset_path=str(meeting_id), # Default to meeting ID ) @classmethod @@ -73,6 +75,7 @@ class Meeting: ended_at: datetime | None = None, metadata: dict[str, str] | None = None, wrapped_dek: bytes | None = None, + asset_path: str | None = None, ) -> Meeting: """Create meeting with existing UUID string. @@ -85,6 +88,7 @@ class Meeting: ended_at: End timestamp. metadata: Meeting metadata. wrapped_dek: Encrypted data encryption key. + asset_path: Relative path for audio assets. Returns: Meeting instance with specified ID. @@ -99,6 +103,7 @@ class Meeting: ended_at=ended_at, metadata=metadata or {}, wrapped_dek=wrapped_dek, + asset_path=asset_path or uuid_str, ) def start_recording(self) -> None: diff --git a/src/noteflow/domain/triggers/entities.py b/src/noteflow/domain/triggers/entities.py index f52770c..efef906 100644 --- a/src/noteflow/domain/triggers/entities.py +++ b/src/noteflow/domain/triggers/entities.py @@ -3,8 +3,6 @@ Define trigger signals, decisions, and actions for meeting detection. """ -from __future__ import annotations - import time from dataclasses import dataclass, field from enum import Enum diff --git a/src/noteflow/grpc/_mixins/diarization.py b/src/noteflow/grpc/_mixins/diarization.py index 32bca5c..f0909b3 100644 --- a/src/noteflow/grpc/_mixins/diarization.py +++ b/src/noteflow/grpc/_mixins/diarization.py @@ -4,8 +4,8 @@ from __future__ import annotations import asyncio import logging -import time -from dataclasses import dataclass, field +from datetime import datetime, timedelta +from functools import partial from typing import TYPE_CHECKING from uuid import UUID, uuid4 @@ -17,6 +17,10 @@ from noteflow.domain.entities import Segment from noteflow.domain.value_objects import MeetingId, MeetingState from noteflow.infrastructure.audio.reader import MeetingAudioReader from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker +from noteflow.infrastructure.persistence.repositories import ( + DiarizationJob, + StreamingTurn, +) from ..proto import noteflow_pb2 @@ -26,31 +30,6 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -@dataclass -class _DiarizationJob: - """Track background diarization job state.""" - - job_id: str - meeting_id: str - status: int - segments_updated: int = 0 - speaker_ids: list[str] = field(default_factory=list) - error_message: str = "" - created_at: float = field(default_factory=time.time) - updated_at: float = field(default_factory=time.time) - task: asyncio.Task[None] | None = None - - def to_proto(self) -> noteflow_pb2.DiarizationJobStatus: - """Convert to protobuf message.""" - return noteflow_pb2.DiarizationJobStatus( - job_id=self.job_id, - status=self.status, - segments_updated=self.segments_updated, - speaker_ids=self.speaker_ids, - error_message=self.error_message, - ) - - class DiarizationMixin: """Mixin providing speaker diarization functionality. @@ -60,12 +39,15 @@ class DiarizationMixin: # Job retention constant DIARIZATION_JOB_TTL_SECONDS: float = 60 * 60 # 1 hour - def _process_streaming_diarization( + async def _process_streaming_diarization( self: ServicerHost, meeting_id: str, audio: NDArray[np.float32], ) -> None: - """Process an audio chunk for streaming diarization (best-effort).""" + """Process an audio chunk for streaming diarization (best-effort). + + Offloads heavy ML inference to thread pool to avoid blocking the event loop. + """ if self._diarization_engine is None: return if meeting_id in self._diarization_streaming_failed: @@ -73,48 +55,77 @@ class DiarizationMixin: if audio.size == 0: return - if not self._diarization_engine.is_streaming_loaded: + loop = asyncio.get_running_loop() + + async with self._diarization_lock: + if not self._diarization_engine.is_streaming_loaded: + try: + await loop.run_in_executor( + None, + self._diarization_engine.load_streaming_model, + ) + except (RuntimeError, ValueError) as exc: + logger.warning( + "Streaming diarization disabled for meeting %s: %s", + meeting_id, + exc, + ) + self._diarization_streaming_failed.add(meeting_id) + return + + stream_time = self._diarization_stream_time.get(meeting_id, 0.0) + duration = len(audio) / self.DEFAULT_SAMPLE_RATE + try: - self._diarization_engine.load_streaming_model() - except (RuntimeError, ValueError) as exc: + turns = await loop.run_in_executor( + None, + partial( + self._diarization_engine.process_chunk, + audio, + sample_rate=self.DEFAULT_SAMPLE_RATE, + ), + ) + except Exception as exc: logger.warning( - "Streaming diarization disabled for meeting %s: %s", + "Streaming diarization failed for meeting %s: %s", meeting_id, exc, ) self._diarization_streaming_failed.add(meeting_id) return - stream_time = self._diarization_stream_time.get(meeting_id, 0.0) - duration = len(audio) / self.DEFAULT_SAMPLE_RATE - - try: - turns = self._diarization_engine.process_chunk( - audio, - sample_rate=self.DEFAULT_SAMPLE_RATE, - ) - except Exception as exc: - logger.warning( - "Streaming diarization failed for meeting %s: %s", - meeting_id, - exc, - ) - self._diarization_streaming_failed.add(meeting_id) - return - diarization_turns = self._diarization_turns.setdefault(meeting_id, []) + adjusted_turns: list[SpeakerTurn] = [] for turn in turns: - diarization_turns.append( - SpeakerTurn( - speaker=turn.speaker, - start=turn.start + stream_time, - end=turn.end + stream_time, - confidence=turn.confidence, - ) + adjusted = SpeakerTurn( + speaker=turn.speaker, + start=turn.start + stream_time, + end=turn.end + stream_time, + confidence=turn.confidence, ) + diarization_turns.append(adjusted) + adjusted_turns.append(adjusted) self._diarization_stream_time[meeting_id] = stream_time + duration + # Persist turns immediately for crash resilience + if adjusted_turns and self._use_database(): + try: + async with self._create_uow() as uow: + repo_turns = [ + StreamingTurn( + speaker=t.speaker, + start_time=t.start, + end_time=t.end, + confidence=t.confidence, + ) + for t in adjusted_turns + ] + await uow.diarization_jobs.add_streaming_turns(meeting_id, repo_turns) + await uow.commit() + except Exception: + logger.exception("Failed to persist streaming turns for %s", meeting_id) + def _maybe_assign_speaker( self: ServicerHost, meeting_id: str, @@ -140,23 +151,41 @@ class DiarizationMixin: segment.speaker_id = speaker_id segment.speaker_confidence = confidence - def _prune_diarization_jobs(self: ServicerHost) -> None: - """Remove completed diarization jobs older than retention window.""" - if not self._diarization_jobs: - return - now = time.time() + async def _prune_diarization_jobs(self: ServicerHost) -> None: + """Remove completed diarization jobs older than retention window. + + Prunes both in-memory task references and database records. + """ + # Clean up in-memory task references for completed tasks + completed_tasks = [ + job_id for job_id, task in self._diarization_tasks.items() if task.done() + ] + for job_id in completed_tasks: + self._diarization_tasks.pop(job_id, None) + terminal_statuses = { noteflow_pb2.JOB_STATUS_COMPLETED, noteflow_pb2.JOB_STATUS_FAILED, } - expired = [ - job_id - for job_id, job in self._diarization_jobs.items() - if job.status in terminal_statuses - and now - job.updated_at > self.DIARIZATION_JOB_TTL_SECONDS - ] - for job_id in expired: - self._diarization_jobs.pop(job_id, None) + + # Prune old completed jobs from database + if self._use_database(): + async with self._create_uow() as uow: + pruned = await uow.diarization_jobs.prune_completed( + self.DIARIZATION_JOB_TTL_SECONDS + ) + await uow.commit() + if pruned > 0: + logger.debug("Pruned %d completed diarization jobs", pruned) + else: + cutoff = datetime.now() - timedelta(seconds=self.DIARIZATION_JOB_TTL_SECONDS) + expired = [ + job_id + for job_id, job in self._diarization_jobs.items() + if job.status in terminal_statuses and job.updated_at < cutoff + ] + for job_id in expired: + self._diarization_jobs.pop(job_id, None) async def RefineSpeakerDiarization( self: ServicerHost, @@ -166,9 +195,9 @@ class DiarizationMixin: """Run post-meeting speaker diarization refinement. Load the full meeting audio, run offline diarization, and update - segment speaker assignments. + segment speaker assignments. Job state is persisted to database. """ - self._prune_diarization_jobs() + await self._prune_diarization_jobs() if not self._diarization_refinement_enabled: response = noteflow_pb2.RefineSpeakerDiarizationResponse() @@ -233,16 +262,23 @@ class DiarizationMixin: num_speakers = request.num_speakers if request.num_speakers > 0 else None job_id = str(uuid4()) - job = _DiarizationJob( + job = DiarizationJob( job_id=job_id, meeting_id=request.meeting_id, status=noteflow_pb2.JOB_STATUS_QUEUED, ) - self._diarization_jobs[job_id] = job - # Task runs in background, no need to await + # Persist job to database + if self._use_database(): + async with self._create_uow() as uow: + await uow.diarization_jobs.create(job) + await uow.commit() + else: + self._diarization_jobs[job_id] = job + + # Create background task and store reference for potential cancellation task = asyncio.create_task(self._run_diarization_job(job_id, num_speakers)) - job.task = task + self._diarization_tasks[job_id] = task response = noteflow_pb2.RefineSpeakerDiarizationResponse() response.segments_updated = 0 @@ -257,29 +293,75 @@ class DiarizationMixin: job_id: str, num_speakers: int | None, ) -> None: - """Run background diarization job.""" - job = self._diarization_jobs.get(job_id) - if job is None: - return + """Run background diarization job. - job.status = noteflow_pb2.JOB_STATUS_RUNNING - job.updated_at = time.time() + Updates job status in database as the job progresses. + """ + # Get meeting_id from database + meeting_id: str | None = None + job: DiarizationJob | None = None + if self._use_database(): + async with self._create_uow() as uow: + job = await uow.diarization_jobs.get(job_id) + if job is None: + logger.warning("Diarization job %s not found in database", job_id) + return + meeting_id = job.meeting_id + # Update status to RUNNING + await uow.diarization_jobs.update_status( + job_id, + noteflow_pb2.JOB_STATUS_RUNNING, + ) + await uow.commit() + else: + job = self._diarization_jobs.get(job_id) + if job is None: + logger.warning("Diarization job %s not found in memory", job_id) + return + meeting_id = job.meeting_id + job.status = noteflow_pb2.JOB_STATUS_RUNNING + job.updated_at = datetime.now() try: updated_count = await self.refine_speaker_diarization( - meeting_id=job.meeting_id, + meeting_id=meeting_id, num_speakers=num_speakers, ) - speaker_ids = await self._collect_speaker_ids(job.meeting_id) - job.segments_updated = updated_count - job.speaker_ids = speaker_ids - job.status = noteflow_pb2.JOB_STATUS_COMPLETED + speaker_ids = await self._collect_speaker_ids(meeting_id) + + # Update status to COMPLETED + if self._use_database(): + async with self._create_uow() as uow: + await uow.diarization_jobs.update_status( + job_id, + noteflow_pb2.JOB_STATUS_COMPLETED, + segments_updated=updated_count, + speaker_ids=speaker_ids, + ) + await uow.commit() + else: + if job is not None: + job.status = noteflow_pb2.JOB_STATUS_COMPLETED + job.segments_updated = updated_count + job.speaker_ids = speaker_ids + job.updated_at = datetime.now() + except Exception as exc: - logger.exception("Diarization failed for meeting %s", job.meeting_id) - job.error_message = str(exc) - job.status = noteflow_pb2.JOB_STATUS_FAILED - finally: - job.updated_at = time.time() + logger.exception("Diarization failed for meeting %s", meeting_id) + # Update status to FAILED + if self._use_database(): + async with self._create_uow() as uow: + await uow.diarization_jobs.update_status( + job_id, + noteflow_pb2.JOB_STATUS_FAILED, + error_message=str(exc), + ) + await uow.commit() + else: + if job is not None: + job.status = noteflow_pb2.JOB_STATUS_FAILED + job.error_message = str(exc) + job.updated_at = datetime.now() async def refine_speaker_diarization( self: ServicerHost, @@ -302,11 +384,12 @@ class DiarizationMixin: Raises: RuntimeError: If diarization engine not available or meeting not found. """ - turns = await asyncio.to_thread( - self._run_diarization_inference, - meeting_id, - num_speakers, - ) + async with self._diarization_lock: + turns = await asyncio.to_thread( + self._run_diarization_inference, + meeting_id, + num_speakers, + ) updated_count = await self._apply_diarization_turns(meeting_id, turns) @@ -475,12 +558,37 @@ class DiarizationMixin: request: noteflow_pb2.GetDiarizationJobStatusRequest, context: grpc.aio.ServicerContext, ) -> noteflow_pb2.DiarizationJobStatus: - """Return current status for a diarization job.""" - self._prune_diarization_jobs() + """Return current status for a diarization job. + + Queries job state from database for persistence across restarts. + """ + await self._prune_diarization_jobs() + + if self._use_database(): + async with self._create_uow() as uow: + job = await uow.diarization_jobs.get(request.job_id) + if job is None: + await context.abort( + grpc.StatusCode.NOT_FOUND, + "Diarization job not found", + ) + return noteflow_pb2.DiarizationJobStatus( + job_id=job.job_id, + status=job.status, + segments_updated=job.segments_updated, + speaker_ids=job.speaker_ids, + error_message=job.error_message, + ) job = self._diarization_jobs.get(request.job_id) if job is None: await context.abort( grpc.StatusCode.NOT_FOUND, "Diarization job not found", ) - return job.to_proto() + return noteflow_pb2.DiarizationJobStatus( + job_id=job.job_id, + status=job.status, + segments_updated=job.segments_updated, + speaker_ids=job.speaker_ids, + error_message=job.error_message, + ) diff --git a/src/noteflow/grpc/_mixins/meeting.py b/src/noteflow/grpc/_mixins/meeting.py index e2403b7..9af3bd6 100644 --- a/src/noteflow/grpc/_mixins/meeting.py +++ b/src/noteflow/grpc/_mixins/meeting.py @@ -69,6 +69,8 @@ class MeetingMixin: except ValueError as e: await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e)) await uow.meetings.update(meeting) + # Clean up streaming diarization turns (no longer needed) + await uow.diarization_jobs.clear_streaming_turns(meeting_id) await uow.commit() return meeting_to_proto(meeting) store = self._get_memory_store() diff --git a/src/noteflow/grpc/_mixins/protocols.py b/src/noteflow/grpc/_mixins/protocols.py index b1e4f2f..f9df266 100644 --- a/src/noteflow/grpc/_mixins/protocols.py +++ b/src/noteflow/grpc/_mixins/protocols.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio from pathlib import Path from typing import TYPE_CHECKING, Protocol @@ -15,6 +16,7 @@ if TYPE_CHECKING: from noteflow.infrastructure.asr import FasterWhisperEngine, Segmenter, StreamingVad from noteflow.infrastructure.audio.writer import MeetingAudioWriter from noteflow.infrastructure.diarization import DiarizationEngine, SpeakerTurn + from noteflow.infrastructure.persistence.repositories import DiarizationJob from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork from noteflow.infrastructure.security.crypto import AesGcmCryptoBox @@ -62,6 +64,11 @@ class ServicerHost(Protocol): _diarization_stream_time: dict[str, float] _diarization_streaming_failed: set[str] + # Background diarization task references (for cancellation) + _diarization_jobs: dict[str, DiarizationJob] + _diarization_tasks: dict[str, asyncio.Task[None]] + _diarization_lock: asyncio.Lock + # Constants DEFAULT_SAMPLE_RATE: int SUPPORTED_SAMPLE_RATES: list[int] @@ -105,6 +112,7 @@ class ServicerHost(Protocol): meeting_id: str, dek: bytes, wrapped_dek: bytes, + asset_path: str | None = None, ) -> None: """Open audio writer for a meeting.""" ... diff --git a/src/noteflow/grpc/_mixins/streaming.py b/src/noteflow/grpc/_mixins/streaming.py index 7238007..d9d8505 100644 --- a/src/noteflow/grpc/_mixins/streaming.py +++ b/src/noteflow/grpc/_mixins/streaming.py @@ -15,6 +15,7 @@ import numpy as np from numpy.typing import NDArray from noteflow.domain.value_objects import MeetingId +from noteflow.infrastructure.diarization import SpeakerTurn from ..proto import noteflow_pb2 from .converters import create_segment_from_asr, create_vad_update, segment_to_proto_update @@ -94,6 +95,12 @@ class StreamingMixin: yield update finally: if current_meeting_id: + # Flush audio buffer before cleanup to minimize data loss + if current_meeting_id in self._audio_writers: + try: + self._audio_writers[current_meeting_id].flush() + except Exception as e: + logger.warning("Failed to flush audio for %s: %s", current_meeting_id, e) self._cleanup_streaming_state(current_meeting_id) self._close_audio_writer(current_meeting_id) self._active_streams.discard(current_meeting_id) @@ -167,9 +174,36 @@ class StreamingMixin: await uow.commit() next_segment_id = await uow.segments.get_next_segment_id(meeting.id) - self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) + self._open_meeting_audio_writer( + meeting_id, dek, wrapped_dek, asset_path=meeting.asset_path + ) self._init_streaming_state(meeting_id, next_segment_id) + # Load any persisted streaming turns (crash recovery) + persisted_turns = await uow.diarization_jobs.get_streaming_turns(meeting_id) + if persisted_turns: + domain_turns = [ + SpeakerTurn( + speaker=t.speaker, + start=t.start_time, + end=t.end_time, + confidence=t.confidence, + ) + for t in persisted_turns + ] + self._diarization_turns[meeting_id] = domain_turns + # Advance stream time to avoid overlapping recovered turns + last_end = max(t.end_time for t in persisted_turns) + self._diarization_stream_time[meeting_id] = max( + self._diarization_stream_time.get(meeting_id, 0.0), + last_end, + ) + logger.info( + "Loaded %d streaming diarization turns for meeting %s", + len(domain_turns), + meeting_id, + ) + return _StreamSessionInit(next_segment_id=next_segment_id) def _init_stream_session_memory( @@ -207,7 +241,7 @@ class StreamingMixin: store.update(meeting) next_segment_id = meeting.next_segment_id - self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek) + self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek, asset_path=meeting.asset_path) self._init_streaming_state(meeting_id, next_segment_id) return _StreamSessionInit(next_segment_id=next_segment_id) @@ -377,7 +411,7 @@ class StreamingMixin: # Streaming diarization (optional) - call mixin method if available if hasattr(self, "_process_streaming_diarization"): - self._process_streaming_diarization(meeting_id, audio) + await self._process_streaming_diarization(meeting_id, audio) # Emit VAD state change events was_speaking = self._was_speaking.get(meeting_id, False) diff --git a/src/noteflow/grpc/client.py b/src/noteflow/grpc/client.py index 8489042..67287e0 100644 --- a/src/noteflow/grpc/client.py +++ b/src/noteflow/grpc/client.py @@ -177,7 +177,7 @@ class NoteFlowClient: True if connected successfully. """ try: - return self._extracted_from_connect_11(timeout) + return self._setup_grpc_channel(timeout) except grpc.FutureTimeoutError: logger.error("Connection timeout: %s", self._server_address) self._notify_connection(False, "Connection timeout") @@ -187,8 +187,15 @@ class NoteFlowClient: self._notify_connection(False, str(e)) return False - # TODO Rename this here and in `connect` - def _extracted_from_connect_11(self, timeout): + def _setup_grpc_channel(self, timeout: float) -> bool: + """Set up the gRPC channel and stub. + + Args: + timeout: Connection timeout in seconds. + + Returns: + True if connection succeeded. + """ self._channel = grpc.insecure_channel( self._server_address, options=[ diff --git a/src/noteflow/grpc/server.py b/src/noteflow/grpc/server.py index 14706cb..0ff8cfa 100644 --- a/src/noteflow/grpc/server.py +++ b/src/noteflow/grpc/server.py @@ -137,6 +137,9 @@ class NoteFlowServer: """ if self._server: logger.info("Stopping server (grace period: %.1fs)...", grace_period) + # Clean up servicer state before stopping + if self._servicer: + await self._servicer.shutdown() await self._server.stop(grace_period) logger.info("Server stopped") @@ -184,19 +187,48 @@ async def run_server( logger.info("Database connection pool ready") # Run crash recovery on startup - uow = SqlAlchemyUnitOfWork(session_factory) - recovery_service = RecoveryService(uow) - recovered = await recovery_service.recover_crashed_meetings() - if recovered: + settings = get_settings() + recovery_service = RecoveryService( + SqlAlchemyUnitOfWork(session_factory), + meetings_dir=settings.meetings_dir, + ) + recovery_result = await recovery_service.recover_all() + if recovery_result.meetings_recovered: logger.warning( "Recovered %d crashed meetings on startup", - len(recovered), + recovery_result.meetings_recovered, + ) + if recovery_result.diarization_jobs_failed: + logger.warning( + "Recovered %d crashed diarization jobs on startup", + recovery_result.diarization_jobs_failed, + ) + if recovery_result.audio_validation_failures: + logger.warning( + "Found %d meetings with missing/invalid audio files", + recovery_result.audio_validation_failures, ) # Create summarization service - auto-detects LOCAL/MOCK providers summarization_service = create_summarization_service() logger.info("Summarization service initialized") + # Load cloud consent from database and set up persistence callback + if session_factory: + async with SqlAlchemyUnitOfWork(session_factory) as uow: + cloud_consent = await uow.preferences.get_bool("cloud_consent_granted", False) + summarization_service.settings.cloud_consent_granted = cloud_consent + logger.info("Loaded cloud consent from database: %s", cloud_consent) + + # Create consent persistence callback + async def persist_consent(granted: bool) -> None: + async with SqlAlchemyUnitOfWork(session_factory) as uow: + await uow.preferences.set("cloud_consent_granted", granted) + await uow.commit() + logger.info("Persisted cloud consent: %s", granted) + + summarization_service.on_consent_change = persist_consent + # Create diarization engine if enabled diarization_engine: DiarizationEngine | None = None if diarization_enabled: diff --git a/src/noteflow/grpc/service.py b/src/noteflow/grpc/service.py index 4a54bcc..5aff180 100644 --- a/src/noteflow/grpc/service.py +++ b/src/noteflow/grpc/service.py @@ -2,6 +2,8 @@ from __future__ import annotations +import asyncio +import contextlib import logging import time from pathlib import Path @@ -15,6 +17,7 @@ from noteflow.domain.entities import Meeting from noteflow.domain.value_objects import MeetingState from noteflow.infrastructure.asr import Segmenter, SegmenterConfig, StreamingVad from noteflow.infrastructure.audio.writer import MeetingAudioWriter +from noteflow.infrastructure.persistence.repositories import DiarizationJob from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork from noteflow.infrastructure.security.crypto import AesGcmCryptoBox from noteflow.infrastructure.security.keystore import KeyringKeyStore @@ -118,8 +121,10 @@ class NoteFlowServicer( # Track audio write failures to avoid log spam self._audio_write_failed: set[str] = set() - # Background diarization jobs - self._diarization_jobs: dict[str, object] = {} + # Background diarization task references (for cancellation) + self._diarization_jobs: dict[str, DiarizationJob] = {} + self._diarization_tasks: dict[str, asyncio.Task[None]] = {} + self._diarization_lock = asyncio.Lock() @property def asr_engine(self) -> FasterWhisperEngine | None: @@ -226,6 +231,7 @@ class NoteFlowServicer( meeting_id: str, dek: bytes, wrapped_dek: bytes, + asset_path: str | None = None, ) -> None: """Open audio writer for a meeting. @@ -233,6 +239,7 @@ class NoteFlowServicer( meeting_id: Meeting ID string. dek: Data encryption key. wrapped_dek: Wrapped DEK. + asset_path: Relative path for audio storage (defaults to meeting_id). """ writer = MeetingAudioWriter(self._crypto, self._meetings_dir) writer.open( @@ -240,6 +247,7 @@ class NoteFlowServicer( dek=dek, wrapped_dek=wrapped_dek, sample_rate=self.DEFAULT_SAMPLE_RATE, + asset_path=asset_path, ) self._audio_writers[meeting_id] = writer logger.info("Audio writer opened for meeting %s", meeting_id) @@ -317,3 +325,39 @@ class NoteFlowServicer( diarization_enabled=diarization_enabled, diarization_ready=diarization_ready, ) + + async def shutdown(self) -> None: + """Clean up servicer state before server stops. + + Cancel in-flight diarization tasks, close audio writers, and mark + any running jobs as failed in the database. + """ + logger.info("Shutting down servicer...") + + # Cancel in-flight diarization tasks + for job_id, task in list(self._diarization_tasks.items()): + if not task.done(): + logger.debug("Cancelling diarization task %s", job_id) + task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await task + + self._diarization_tasks.clear() + + # Close all audio writers + for meeting_id in list(self._audio_writers.keys()): + logger.debug("Closing audio writer for meeting %s", meeting_id) + self._close_audio_writer(meeting_id) + + # Mark running jobs as FAILED in database + if self._use_database(): + async with self._create_uow() as uow: + failed_count = await uow.diarization_jobs.mark_running_as_failed() + await uow.commit() + if failed_count > 0: + logger.warning( + "Marked %d running diarization jobs as failed on shutdown", + failed_count, + ) + + logger.info("Servicer shutdown complete") diff --git a/src/noteflow/infrastructure/asr/dto.py b/src/noteflow/infrastructure/asr/dto.py index de734b0..d33c8a8 100644 --- a/src/noteflow/infrastructure/asr/dto.py +++ b/src/noteflow/infrastructure/asr/dto.py @@ -3,8 +3,6 @@ These DTOs define the data structures used by ASR components. """ -from __future__ import annotations - from dataclasses import dataclass, field from enum import Enum diff --git a/src/noteflow/infrastructure/audio/dto.py b/src/noteflow/infrastructure/audio/dto.py index a7ae269..0798d7f 100644 --- a/src/noteflow/infrastructure/audio/dto.py +++ b/src/noteflow/infrastructure/audio/dto.py @@ -3,8 +3,6 @@ Define data structures used by audio capture components. """ -from __future__ import annotations - from collections.abc import Callable from dataclasses import dataclass diff --git a/src/noteflow/infrastructure/audio/levels.py b/src/noteflow/infrastructure/audio/levels.py index 100b5dc..dd62fe3 100644 --- a/src/noteflow/infrastructure/audio/levels.py +++ b/src/noteflow/infrastructure/audio/levels.py @@ -3,8 +3,6 @@ Provide RMS and dB level calculation for VU meter display. """ -from __future__ import annotations - import math from typing import Final diff --git a/src/noteflow/infrastructure/audio/reader.py b/src/noteflow/infrastructure/audio/reader.py index 5f5f7d8..b05e92f 100644 --- a/src/noteflow/infrastructure/audio/reader.py +++ b/src/noteflow/infrastructure/audio/reader.py @@ -54,6 +54,7 @@ class MeetingAudioReader: def load_meeting_audio( self, meeting_id: str, + asset_path: str | None = None, ) -> list[TimestampedAudio]: """Load all audio from an archived meeting. @@ -61,6 +62,8 @@ class MeetingAudioReader: Args: meeting_id: Meeting UUID string. + asset_path: Relative path for audio storage (defaults to meeting_id). + Use the stored asset_path from the database when available. Returns: List of TimestampedAudio chunks (or empty list if not found/failed). @@ -69,7 +72,8 @@ class MeetingAudioReader: FileNotFoundError: If meeting directory or audio file not found. ValueError: If manifest is invalid or audio format unsupported. """ - meeting_dir = self._meetings_dir / meeting_id + storage_path = asset_path or meeting_id + meeting_dir = self._meetings_dir / storage_path self._meeting_dir = meeting_dir # Load and parse manifest @@ -145,31 +149,43 @@ class MeetingAudioReader: return chunks - def get_manifest(self, meeting_id: str) -> dict[str, object] | None: + def get_manifest( + self, + meeting_id: str, + asset_path: str | None = None, + ) -> dict[str, object] | None: """Get manifest metadata for a meeting. Args: meeting_id: Meeting UUID string. + asset_path: Relative path for audio storage (defaults to meeting_id). Returns: Manifest dict or None if not found. """ - manifest_path = self._meetings_dir / meeting_id / "manifest.json" + storage_path = asset_path or meeting_id + manifest_path = self._meetings_dir / storage_path / "manifest.json" if not manifest_path.exists(): return None return dict(json.loads(manifest_path.read_text())) - def audio_exists(self, meeting_id: str) -> bool: + def audio_exists( + self, + meeting_id: str, + asset_path: str | None = None, + ) -> bool: """Check if audio file exists for a meeting. Args: meeting_id: Meeting UUID string. + asset_path: Relative path for audio storage (defaults to meeting_id). Returns: True if audio.enc exists. """ - meeting_dir = self._meetings_dir / meeting_id + storage_path = asset_path or meeting_id + meeting_dir = self._meetings_dir / storage_path audio_path = meeting_dir / "audio.enc" manifest_path = meeting_dir / "manifest.json" return audio_path.exists() and manifest_path.exists() diff --git a/src/noteflow/infrastructure/audio/writer.py b/src/noteflow/infrastructure/audio/writer.py index f1f151c..d193c4f 100644 --- a/src/noteflow/infrastructure/audio/writer.py +++ b/src/noteflow/infrastructure/audio/writer.py @@ -2,15 +2,21 @@ from __future__ import annotations +import io import json import logging +import threading from datetime import UTC, datetime from pathlib import Path from typing import TYPE_CHECKING import numpy as np -from noteflow.config.constants import DEFAULT_SAMPLE_RATE +from noteflow.config.constants import ( + AUDIO_BUFFER_SIZE_BYTES, + DEFAULT_SAMPLE_RATE, + PERIODIC_FLUSH_INTERVAL_SECONDS, +) from noteflow.infrastructure.security.crypto import ChunkedAssetWriter if TYPE_CHECKING: @@ -27,6 +33,14 @@ class MeetingAudioWriter: Manage meeting directory creation, manifest file, and encrypted audio storage. Uses ChunkedAssetWriter for the actual encryption. + Audio data is buffered internally to reduce encryption overhead. Each encrypted + chunk has 28 bytes overhead (12 byte nonce + 16 byte tag) plus 4 byte length + prefix. Buffering aggregates small writes into larger chunks (~320KB) before + encryption to minimize this overhead. + + A background thread periodically flushes the buffer every 2 seconds to minimize + data loss on crashes. All buffer access is protected by a lock. + Directory structure: ~/.noteflow/meetings// ├── manifest.json # Meeting metadata + wrapped DEK @@ -37,19 +51,30 @@ class MeetingAudioWriter: self, crypto: AesGcmCryptoBox, meetings_dir: Path, + buffer_size: int = AUDIO_BUFFER_SIZE_BYTES, ) -> None: """Initialize audio writer. Args: crypto: CryptoBox instance for encryption operations. meetings_dir: Root directory for all meetings (e.g., ~/.noteflow/meetings). + buffer_size: Buffer size threshold in bytes before flushing to disk. + Defaults to AUDIO_BUFFER_SIZE_BYTES (~320KB = 10 seconds at 16kHz). """ self._crypto = crypto self._meetings_dir = meetings_dir + self._buffer_size = buffer_size self._asset_writer: ChunkedAssetWriter | None = None self._meeting_dir: Path | None = None self._sample_rate: int = DEFAULT_SAMPLE_RATE self._chunk_count: int = 0 + self._write_count: int = 0 + self._buffer: io.BytesIO = io.BytesIO() + + # Thread-safety for periodic flush + self._buffer_lock = threading.Lock() + self._flush_thread: threading.Thread | None = None + self._stop_flush = threading.Event() def open( self, @@ -57,6 +82,7 @@ class MeetingAudioWriter: dek: bytes, wrapped_dek: bytes, sample_rate: int = DEFAULT_SAMPLE_RATE, + asset_path: str | None = None, ) -> None: """Open meeting for audio writing. @@ -67,6 +93,8 @@ class MeetingAudioWriter: dek: Unwrapped data encryption key (32 bytes). wrapped_dek: Encrypted DEK to store in manifest. sample_rate: Audio sample rate (default 16000 Hz). + asset_path: Relative path for audio storage (defaults to meeting_id). + This allows meetings_dir to change without orphaning files. Raises: RuntimeError: If already open. @@ -75,8 +103,11 @@ class MeetingAudioWriter: if self._asset_writer is not None: raise RuntimeError("Writer already open") + # Use asset_path if provided, otherwise default to meeting_id + storage_path = asset_path or meeting_id + # Create meeting directory - self._meeting_dir = self._meetings_dir / meeting_id + self._meeting_dir = self._meetings_dir / storage_path self._meeting_dir.mkdir(parents=True, exist_ok=True) # Write manifest.json @@ -98,15 +129,49 @@ class MeetingAudioWriter: self._sample_rate = sample_rate self._chunk_count = 0 + self._write_count = 0 + self._buffer = io.BytesIO() + + # Start periodic flush thread for crash resilience + self._stop_flush.clear() + self._flush_thread = threading.Thread( + target=self._periodic_flush_loop, + name=f"AudioFlush-{meeting_id[:8]}", + daemon=True, + ) + self._flush_thread.start() logger.info( - "Opened audio writer: meeting=%s, dir=%s", + "Opened audio writer: meeting=%s, dir=%s, buffer_size=%d", meeting_id, self._meeting_dir, + self._buffer_size, ) + def _periodic_flush_loop(self) -> None: + """Background thread: periodically flush buffer for crash resilience.""" + while not self._stop_flush.wait(timeout=PERIODIC_FLUSH_INTERVAL_SECONDS): + try: + self._flush_if_open() + except Exception: + logger.exception("Periodic flush failed") + + def _flush_if_open(self) -> None: + """Flush buffer if writer is open (thread-safe, no exception if closed).""" + with self._buffer_lock: + if ( + self._asset_writer is not None + and self._asset_writer.is_open + and self._buffer.tell() > 0 + ): + self._flush_buffer_unlocked() + def write_chunk(self, audio: NDArray[np.float32]) -> None: - """Write audio chunk (convert float32 → PCM16). + """Write audio chunk to internal buffer (convert float32 → PCM16). + + Audio is buffered internally and flushed to encrypted storage when the + buffer exceeds the configured threshold. Call flush() to force immediate + write, or close() to finalize. Args: audio: Audio samples as float32 array (-1.0 to 1.0). @@ -122,29 +187,85 @@ class MeetingAudioWriter: audio_clamped = np.clip(audio, -1.0, 1.0) pcm16 = (audio_clamped * 32767.0).astype(np.int16) - # Write as raw bytes (platform-native endianness, typically little-endian) - self._asset_writer.write_chunk(pcm16.tobytes()) - self._chunk_count += 1 + with self._buffer_lock: + # Append to buffer + self._buffer.write(pcm16.tobytes()) + self._write_count += 1 + + # Flush buffer if threshold exceeded + if self._buffer.tell() >= self._buffer_size: + self._flush_buffer_unlocked() + + def flush(self) -> None: + """Force flush buffered audio to encrypted storage. + + Call this to ensure all buffered audio is written immediately. + Normally only needed before a long pause or when precise timing matters. + + Raises: + RuntimeError: If not open. + """ + if self._asset_writer is None or not self._asset_writer.is_open: + raise RuntimeError("Writer not open") + + with self._buffer_lock: + if self._buffer.tell() > 0: + self._flush_buffer_unlocked() + + def _flush_buffer_unlocked(self) -> None: + """Flush internal buffer to encrypted storage. + + Must be called with _buffer_lock held. + """ + if self._asset_writer is None: + return + + if buffer_bytes := self._buffer.getvalue(): + self._asset_writer.write_chunk(buffer_bytes) + self._chunk_count += 1 + logger.debug( + "Flushed audio buffer: %d bytes, chunk #%d", + len(buffer_bytes), + self._chunk_count, + ) + + # Reset buffer + self._buffer = io.BytesIO() def close(self) -> None: """Close audio writer and finalize files. + Stops the periodic flush thread, flushes remaining audio, and closes files. Safe to call if already closed or never opened. """ + # Stop periodic flush thread first + self._stop_flush.set() + if self._flush_thread is not None: + self._flush_thread.join(timeout=1.0) + self._flush_thread = None + if self._asset_writer is not None: + # Flush remaining buffer under lock + with self._buffer_lock: + self._flush_buffer_unlocked() + bytes_written = self._asset_writer.bytes_written self._asset_writer.close() self._asset_writer = None logger.info( - "Closed audio writer: dir=%s, chunks=%d, bytes=%d", + "Closed audio writer: dir=%s, writes=%d, encrypted_chunks=%d, bytes=%d", self._meeting_dir, + self._write_count, self._chunk_count, bytes_written, ) self._meeting_dir = None self._chunk_count = 0 + self._write_count = 0 + with self._buffer_lock: + self._buffer = io.BytesIO() @property def is_open(self) -> bool: @@ -158,9 +279,26 @@ class MeetingAudioWriter: @property def chunk_count(self) -> int: - """Number of audio chunks written.""" + """Number of encrypted chunks written to disk. + + Due to buffering, this may be less than write_count. + """ return self._chunk_count + @property + def write_count(self) -> int: + """Number of write_chunk() calls made. + + This counts incoming audio frames, not encrypted chunks written to disk. + """ + return self._write_count + + @property + def buffered_bytes(self) -> int: + """Current bytes pending in buffer, not yet written to disk.""" + with self._buffer_lock: + return self._buffer.tell() + @property def meeting_dir(self) -> Path | None: """Current meeting directory, or None if not open.""" diff --git a/src/noteflow/infrastructure/converters/orm_converters.py b/src/noteflow/infrastructure/converters/orm_converters.py index 0258517..48e5d43 100644 --- a/src/noteflow/infrastructure/converters/orm_converters.py +++ b/src/noteflow/infrastructure/converters/orm_converters.py @@ -100,6 +100,7 @@ class OrmConverter: ended_at=model.ended_at, metadata=model.metadata_, wrapped_dek=model.wrapped_dek, + asset_path=model.asset_path, ) # --- Segment --- diff --git a/src/noteflow/infrastructure/diarization/assigner.py b/src/noteflow/infrastructure/diarization/assigner.py index f4b9821..9ee3193 100644 --- a/src/noteflow/infrastructure/diarization/assigner.py +++ b/src/noteflow/infrastructure/diarization/assigner.py @@ -4,8 +4,6 @@ Provides functions to assign speaker labels to transcript segments based on diarization output using timestamp overlap matching. """ -from __future__ import annotations - from collections.abc import Sequence from noteflow.infrastructure.diarization.dto import SpeakerTurn diff --git a/src/noteflow/infrastructure/diarization/dto.py b/src/noteflow/infrastructure/diarization/dto.py index 37dc98a..1bfb386 100644 --- a/src/noteflow/infrastructure/diarization/dto.py +++ b/src/noteflow/infrastructure/diarization/dto.py @@ -3,8 +3,6 @@ These DTOs define the data structures used by diarization components. """ -from __future__ import annotations - from dataclasses import dataclass diff --git a/src/noteflow/infrastructure/export/_formatting.py b/src/noteflow/infrastructure/export/_formatting.py index 3357eec..f188b06 100644 --- a/src/noteflow/infrastructure/export/_formatting.py +++ b/src/noteflow/infrastructure/export/_formatting.py @@ -1,7 +1,5 @@ """Shared formatting utilities for export modules.""" -from __future__ import annotations - from datetime import datetime diff --git a/src/noteflow/infrastructure/persistence/database.py b/src/noteflow/infrastructure/persistence/database.py index e5b4727..52d88b7 100644 --- a/src/noteflow/infrastructure/persistence/database.py +++ b/src/noteflow/infrastructure/persistence/database.py @@ -2,7 +2,6 @@ from __future__ import annotations -from collections.abc import AsyncGenerator from typing import TYPE_CHECKING from sqlalchemy.ext.asyncio import ( @@ -55,21 +54,6 @@ def get_async_session_factory( ) -async def get_async_session( - session_factory: async_sessionmaker[AsyncSession], -) -> AsyncGenerator[AsyncSession, None]: - """Yield an async database session. - - Args: - session_factory: Factory for creating sessions. - - Yields: - Async database session that is closed after use. - """ - async with session_factory() as session: - yield session - - def create_async_session_factory( database_url: str, pool_size: int = 5, diff --git a/src/noteflow/infrastructure/persistence/migrations/env.py b/src/noteflow/infrastructure/persistence/migrations/env.py index 2e81fdd..526c5b6 100644 --- a/src/noteflow/infrastructure/persistence/migrations/env.py +++ b/src/noteflow/infrastructure/persistence/migrations/env.py @@ -1,7 +1,5 @@ """Alembic migration environment configuration.""" -from __future__ import annotations - import asyncio import os from logging.config import fileConfig diff --git a/src/noteflow/infrastructure/persistence/migrations/versions/d8e5f6a7b2c3_add_diarization_jobs_table.py b/src/noteflow/infrastructure/persistence/migrations/versions/d8e5f6a7b2c3_add_diarization_jobs_table.py new file mode 100644 index 0000000..4021e4e --- /dev/null +++ b/src/noteflow/infrastructure/persistence/migrations/versions/d8e5f6a7b2c3_add_diarization_jobs_table.py @@ -0,0 +1,80 @@ +"""add_diarization_jobs_table + +Revision ID: d8e5f6a7b2c3 +Revises: c7d4e9f3a2b1 +Create Date: 2025-12-19 10:00:00.000000 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "d8e5f6a7b2c3" +down_revision: str | Sequence[str] | None = "c7d4e9f3a2b1" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Create diarization_jobs table for tracking background jobs.""" + op.create_table( + "diarization_jobs", + sa.Column("id", sa.String(36), primary_key=True), + sa.Column( + "meeting_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("status", sa.Integer(), nullable=False, server_default="0"), + sa.Column("segments_updated", sa.Integer(), nullable=False, server_default="0"), + sa.Column( + "speaker_ids", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default="[]", + ), + sa.Column("error_message", sa.Text(), nullable=False, server_default=""), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + schema="noteflow", + ) + + # Create index for meeting_id lookups + op.create_index( + "ix_diarization_jobs_meeting_id", + "diarization_jobs", + ["meeting_id"], + schema="noteflow", + ) + + # Create index for status queries (e.g., finding running jobs) + op.create_index( + "ix_diarization_jobs_status", + "diarization_jobs", + ["status"], + schema="noteflow", + ) + + +def downgrade() -> None: + """Drop diarization_jobs table.""" + op.drop_index("ix_diarization_jobs_status", table_name="diarization_jobs", schema="noteflow") + op.drop_index( + "ix_diarization_jobs_meeting_id", table_name="diarization_jobs", schema="noteflow" + ) + op.drop_table("diarization_jobs", schema="noteflow") diff --git a/src/noteflow/infrastructure/persistence/migrations/versions/e9f0a1b2c3d4_add_asset_path_to_meetings.py b/src/noteflow/infrastructure/persistence/migrations/versions/e9f0a1b2c3d4_add_asset_path_to_meetings.py new file mode 100644 index 0000000..54c2697 --- /dev/null +++ b/src/noteflow/infrastructure/persistence/migrations/versions/e9f0a1b2c3d4_add_asset_path_to_meetings.py @@ -0,0 +1,45 @@ +"""add_asset_path_to_meetings + +Revision ID: e9f0a1b2c3d4 +Revises: d8e5f6a7b2c3 +Create Date: 2025-12-19 08:00:00.000000 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "e9f0a1b2c3d4" +down_revision: str | Sequence[str] | None = "d8e5f6a7b2c3" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add asset_path column to meetings table. + + Stores the relative path for audio files. This allows the meetings_dir + to change without orphaning existing recordings. + """ + op.add_column( + "meetings", + sa.Column("asset_path", sa.Text(), nullable=True), + schema="noteflow", + ) + + # Backfill existing rows: asset_path = id (as string) + op.execute( + """ + UPDATE noteflow.meetings + SET asset_path = id::text + WHERE asset_path IS NULL + """ + ) + + +def downgrade() -> None: + """Remove asset_path column from meetings table.""" + op.drop_column("meetings", "asset_path", schema="noteflow") diff --git a/src/noteflow/infrastructure/persistence/migrations/versions/f0a1b2c3d4e5_add_user_preferences_table.py b/src/noteflow/infrastructure/persistence/migrations/versions/f0a1b2c3d4e5_add_user_preferences_table.py new file mode 100644 index 0000000..8245629 --- /dev/null +++ b/src/noteflow/infrastructure/persistence/migrations/versions/f0a1b2c3d4e5_add_user_preferences_table.py @@ -0,0 +1,54 @@ +"""add_user_preferences_table + +Revision ID: f0a1b2c3d4e5 +Revises: e9f0a1b2c3d4 +Create Date: 2025-12-19 09:00:00.000000 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects.postgresql import JSONB + +# revision identifiers, used by Alembic. +revision: str = "f0a1b2c3d4e5" +down_revision: str | Sequence[str] | None = "e9f0a1b2c3d4" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Create user_preferences table for persisting user settings.""" + op.create_table( + "user_preferences", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("key", sa.String(64), nullable=False), + sa.Column("value", JSONB(), nullable=False), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.func.now(), + nullable=False, + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("key"), + schema="noteflow", + ) + op.create_index( + "ix_noteflow_user_preferences_key", + "user_preferences", + ["key"], + schema="noteflow", + ) + + +def downgrade() -> None: + """Drop user_preferences table.""" + op.drop_index( + "ix_noteflow_user_preferences_key", + table_name="user_preferences", + schema="noteflow", + ) + op.drop_table("user_preferences", schema="noteflow") diff --git a/src/noteflow/infrastructure/persistence/migrations/versions/g1b2c3d4e5f6_add_streaming_diarization_turns.py b/src/noteflow/infrastructure/persistence/migrations/versions/g1b2c3d4e5f6_add_streaming_diarization_turns.py new file mode 100644 index 0000000..d8757cb --- /dev/null +++ b/src/noteflow/infrastructure/persistence/migrations/versions/g1b2c3d4e5f6_add_streaming_diarization_turns.py @@ -0,0 +1,62 @@ +"""add_streaming_diarization_turns + +Revision ID: g1b2c3d4e5f6 +Revises: f0a1b2c3d4e5 +Create Date: 2025-12-19 14:00:00.000000 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "g1b2c3d4e5f6" +down_revision: str | Sequence[str] | None = "f0a1b2c3d4e5" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Create streaming_diarization_turns table for crash-resilient speaker turns.""" + op.create_table( + "streaming_diarization_turns", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column( + "meeting_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("speaker", sa.String(50), nullable=False), + sa.Column("start_time", sa.Float(), nullable=False), + sa.Column("end_time", sa.Float(), nullable=False), + sa.Column("confidence", sa.Float(), nullable=False, server_default="0.0"), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + schema="noteflow", + ) + + # Create index for meeting_id lookups + op.create_index( + "ix_streaming_diarization_turns_meeting_id", + "streaming_diarization_turns", + ["meeting_id"], + schema="noteflow", + ) + + +def downgrade() -> None: + """Drop streaming_diarization_turns table.""" + op.drop_index( + "ix_streaming_diarization_turns_meeting_id", + table_name="streaming_diarization_turns", + schema="noteflow", + ) + op.drop_table("streaming_diarization_turns", schema="noteflow") diff --git a/src/noteflow/infrastructure/persistence/models.py b/src/noteflow/infrastructure/persistence/models.py index 4e8a92f..9c5aaac 100644 --- a/src/noteflow/infrastructure/persistence/models.py +++ b/src/noteflow/infrastructure/persistence/models.py @@ -65,6 +65,10 @@ class MeetingModel(Base): LargeBinary, nullable=True, ) + asset_path: Mapped[str | None] = mapped_column( + Text, + nullable=True, + ) # Relationships segments: Mapped[list[SegmentModel]] = relationship( @@ -300,3 +304,91 @@ class AnnotationModel(Base): "MeetingModel", back_populates="annotations", ) + + +class UserPreferencesModel(Base): + """SQLAlchemy model for user_preferences table. + + Stores key-value user preferences for persistence across server restarts. + Currently used for cloud consent and other settings. + """ + + __tablename__ = "user_preferences" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + key: Mapped[str] = mapped_column(String(64), unique=True, index=True, nullable=False) + value: Mapped[dict[str, object]] = mapped_column(JSONB, nullable=False, default=dict) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + onupdate=datetime.now, + ) + + +class DiarizationJobModel(Base): + """SQLAlchemy model for diarization_jobs table. + + Tracks background speaker diarization jobs. Persisting job state + allows recovery after server restart and provides client visibility. + """ + + __tablename__ = "diarization_jobs" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[str] = mapped_column(String(36), primary_key=True) + meeting_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + status: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + segments_updated: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + speaker_ids: Mapped[list[str]] = mapped_column( + JSONB, + nullable=False, + default=list, + ) + error_message: Mapped[str] = mapped_column(Text, nullable=False, default="") + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + onupdate=datetime.now, + ) + + +class StreamingDiarizationTurnModel(Base): + """SQLAlchemy model for streaming_diarization_turns table. + + Stores speaker turns from real-time streaming diarization for crash + resilience. These turns are persisted as they arrive and can be reloaded + if the server restarts during a recording session. + """ + + __tablename__ = "streaming_diarization_turns" + __table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"} + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + meeting_id: Mapped[UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("noteflow.meetings.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + speaker: Mapped[str] = mapped_column(String(50), nullable=False) + start_time: Mapped[float] = mapped_column(Float, nullable=False) + end_time: Mapped[float] = mapped_column(Float, nullable=False) + confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + default=datetime.now, + ) diff --git a/src/noteflow/infrastructure/persistence/repositories/__init__.py b/src/noteflow/infrastructure/persistence/repositories/__init__.py index 9d866e1..5283926 100644 --- a/src/noteflow/infrastructure/persistence/repositories/__init__.py +++ b/src/noteflow/infrastructure/persistence/repositories/__init__.py @@ -1,13 +1,23 @@ """Repository implementations for NoteFlow.""" from .annotation_repo import SqlAlchemyAnnotationRepository +from .diarization_job_repo import ( + DiarizationJob, + SqlAlchemyDiarizationJobRepository, + StreamingTurn, +) from .meeting_repo import SqlAlchemyMeetingRepository +from .preferences_repo import SqlAlchemyPreferencesRepository from .segment_repo import SqlAlchemySegmentRepository from .summary_repo import SqlAlchemySummaryRepository __all__ = [ + "DiarizationJob", "SqlAlchemyAnnotationRepository", + "SqlAlchemyDiarizationJobRepository", "SqlAlchemyMeetingRepository", + "SqlAlchemyPreferencesRepository", "SqlAlchemySegmentRepository", "SqlAlchemySummaryRepository", + "StreamingTurn", ] diff --git a/src/noteflow/infrastructure/persistence/repositories/diarization_job_repo.py b/src/noteflow/infrastructure/persistence/repositories/diarization_job_repo.py new file mode 100644 index 0000000..7a5c410 --- /dev/null +++ b/src/noteflow/infrastructure/persistence/repositories/diarization_job_repo.py @@ -0,0 +1,282 @@ +"""SQLAlchemy implementation of DiarizationJobRepository.""" + +from collections.abc import Sequence +from dataclasses import dataclass, field +from datetime import datetime +from typing import Final +from uuid import UUID + +from sqlalchemy import delete, select, update + +from noteflow.infrastructure.persistence.models import ( + DiarizationJobModel, + StreamingDiarizationTurnModel, +) +from noteflow.infrastructure.persistence.repositories._base import BaseRepository + +# Job status constants (mirrors proto enum) +JOB_STATUS_UNSPECIFIED: Final[int] = 0 +JOB_STATUS_QUEUED: Final[int] = 1 +JOB_STATUS_RUNNING: Final[int] = 2 +JOB_STATUS_COMPLETED: Final[int] = 3 +JOB_STATUS_FAILED: Final[int] = 4 + + +@dataclass +class DiarizationJob: + """Data transfer object for diarization job state. + + Separate from ORM model to allow easy passing between layers. + """ + + job_id: str + meeting_id: str + status: int + segments_updated: int = 0 + speaker_ids: list[str] = field(default_factory=list) + error_message: str = "" + created_at: datetime = field(default_factory=datetime.now) + updated_at: datetime = field(default_factory=datetime.now) + + +@dataclass +class StreamingTurn: + """Data transfer object for streaming diarization turn. + + Represents a speaker turn collected during real-time streaming diarization. + """ + + speaker: str + start_time: float + end_time: float + confidence: float = 0.0 + + +class SqlAlchemyDiarizationJobRepository(BaseRepository): + """SQLAlchemy implementation of DiarizationJobRepository.""" + + @staticmethod + def _to_domain(model: DiarizationJobModel) -> DiarizationJob: + """Convert ORM model to domain object.""" + return DiarizationJob( + job_id=model.id, + meeting_id=str(model.meeting_id), + status=model.status, + segments_updated=model.segments_updated, + speaker_ids=list(model.speaker_ids), + error_message=model.error_message, + created_at=model.created_at, + updated_at=model.updated_at, + ) + + async def create(self, job: DiarizationJob) -> DiarizationJob: + """Persist a new diarization job. + + Args: + job: Job to create. + + Returns: + Created job. + """ + model = DiarizationJobModel( + id=job.job_id, + meeting_id=UUID(job.meeting_id), + status=job.status, + segments_updated=job.segments_updated, + speaker_ids=job.speaker_ids, + error_message=job.error_message, + created_at=job.created_at, + updated_at=job.updated_at, + ) + self._session.add(model) + await self._session.flush() + return job + + async def get(self, job_id: str) -> DiarizationJob | None: + """Retrieve a job by ID. + + Args: + job_id: Job identifier. + + Returns: + Job if found, None otherwise. + """ + stmt = select(DiarizationJobModel).where(DiarizationJobModel.id == job_id) + model = await self._execute_scalar(stmt) + + return None if model is None else self._to_domain(model) + + async def update_status( + self, + job_id: str, + status: int, + *, + segments_updated: int | None = None, + speaker_ids: list[str] | None = None, + error_message: str | None = None, + ) -> bool: + """Update job status and optional fields. + + Args: + job_id: Job identifier. + status: New status value. + segments_updated: Optional segments count. + speaker_ids: Optional speaker IDs list. + error_message: Optional error message. + + Returns: + True if job was updated, False if not found. + """ + values: dict[str, int | list[str] | str | datetime] = { + "status": status, + "updated_at": datetime.now(), + } + if segments_updated is not None: + values["segments_updated"] = segments_updated + if speaker_ids is not None: + values["speaker_ids"] = speaker_ids + if error_message is not None: + values["error_message"] = error_message + + stmt = update(DiarizationJobModel).where(DiarizationJobModel.id == job_id).values(**values) + result = await self._session.execute(stmt) + await self._session.flush() + return result.rowcount > 0 + + async def list_for_meeting(self, meeting_id: str) -> Sequence[DiarizationJob]: + """List all jobs for a meeting. + + Args: + meeting_id: Meeting identifier. + + Returns: + List of jobs ordered by creation time (newest first). + """ + stmt = ( + select(DiarizationJobModel) + .where(DiarizationJobModel.meeting_id == UUID(meeting_id)) + .order_by(DiarizationJobModel.created_at.desc()) + ) + models = await self._execute_scalars(stmt) + return [self._to_domain(model) for model in models] + + async def mark_running_as_failed(self, error_message: str = "Server restarted") -> int: + """Mark all QUEUED or RUNNING jobs as FAILED. + + Used during crash recovery to mark orphaned jobs. + + Args: + error_message: Error message to set on failed jobs. + + Returns: + Number of jobs marked as failed. + """ + stmt = ( + update(DiarizationJobModel) + .where(DiarizationJobModel.status.in_([JOB_STATUS_QUEUED, JOB_STATUS_RUNNING])) + .values( + status=JOB_STATUS_FAILED, + error_message=error_message, + updated_at=datetime.now(), + ) + ) + result = await self._session.execute(stmt) + await self._session.flush() + return result.rowcount + + async def prune_completed(self, ttl_seconds: float) -> int: + """Delete completed/failed jobs older than TTL. + + Args: + ttl_seconds: Time-to-live in seconds. + + Returns: + Number of jobs deleted. + """ + cutoff = datetime.now().timestamp() - ttl_seconds + cutoff_dt = datetime.fromtimestamp(cutoff) + + stmt = delete(DiarizationJobModel).where( + DiarizationJobModel.status.in_([JOB_STATUS_COMPLETED, JOB_STATUS_FAILED]), + DiarizationJobModel.updated_at < cutoff_dt, + ) + result = await self._session.execute(stmt) + await self._session.flush() + return result.rowcount + + # Streaming diarization turn methods + + async def add_streaming_turns(self, meeting_id: str, turns: Sequence[StreamingTurn]) -> int: + """Persist streaming diarization turns for a meeting. + + Immediately stores speaker turns as they arrive during streaming. + Used for crash resilience. + + Args: + meeting_id: Meeting identifier. + turns: Speaker turns to persist. + + Returns: + Number of turns added. + """ + if not turns: + return 0 + + meeting_uuid = UUID(meeting_id) + for turn in turns: + model = StreamingDiarizationTurnModel( + meeting_id=meeting_uuid, + speaker=turn.speaker, + start_time=turn.start_time, + end_time=turn.end_time, + confidence=turn.confidence, + ) + self._session.add(model) + + await self._session.flush() + return len(turns) + + async def get_streaming_turns(self, meeting_id: str) -> list[StreamingTurn]: + """Retrieve streaming diarization turns for a meeting. + + Used to recover streaming state after server restart. + + Args: + meeting_id: Meeting identifier. + + Returns: + List of streaming turns ordered by start time. + """ + stmt = ( + select(StreamingDiarizationTurnModel) + .where(StreamingDiarizationTurnModel.meeting_id == UUID(meeting_id)) + .order_by(StreamingDiarizationTurnModel.start_time) + ) + models = await self._execute_scalars(stmt) + return [ + StreamingTurn( + speaker=model.speaker, + start_time=model.start_time, + end_time=model.end_time, + confidence=model.confidence, + ) + for model in models + ] + + async def clear_streaming_turns(self, meeting_id: str) -> int: + """Delete streaming diarization turns for a meeting. + + Called when a meeting stops recording to clean up temporary turns. + + Args: + meeting_id: Meeting identifier. + + Returns: + Number of turns deleted. + """ + stmt = delete(StreamingDiarizationTurnModel).where( + StreamingDiarizationTurnModel.meeting_id == UUID(meeting_id) + ) + result = await self._session.execute(stmt) + await self._session.flush() + return result.rowcount diff --git a/src/noteflow/infrastructure/persistence/repositories/meeting_repo.py b/src/noteflow/infrastructure/persistence/repositories/meeting_repo.py index 68ac9f4..3694d5a 100644 --- a/src/noteflow/infrastructure/persistence/repositories/meeting_repo.py +++ b/src/noteflow/infrastructure/persistence/repositories/meeting_repo.py @@ -1,7 +1,5 @@ """SQLAlchemy implementation of MeetingRepository.""" -from __future__ import annotations - from collections.abc import Sequence from datetime import datetime from uuid import UUID @@ -36,6 +34,7 @@ class SqlAlchemyMeetingRepository(BaseRepository): ended_at=meeting.ended_at, metadata_=meeting.metadata, wrapped_dek=meeting.wrapped_dek, + asset_path=meeting.asset_path, ) self._session.add(model) await self._session.flush() @@ -79,6 +78,7 @@ class SqlAlchemyMeetingRepository(BaseRepository): model.ended_at = meeting.ended_at model.metadata_ = meeting.metadata model.wrapped_dek = meeting.wrapped_dek + model.asset_path = meeting.asset_path await self._session.flush() return meeting diff --git a/src/noteflow/infrastructure/persistence/repositories/preferences_repo.py b/src/noteflow/infrastructure/persistence/repositories/preferences_repo.py new file mode 100644 index 0000000..aea838f --- /dev/null +++ b/src/noteflow/infrastructure/persistence/repositories/preferences_repo.py @@ -0,0 +1,85 @@ +"""SQLAlchemy implementation of PreferencesRepository.""" + +from sqlalchemy import select + +from noteflow.infrastructure.persistence.models import UserPreferencesModel +from noteflow.infrastructure.persistence.repositories._base import BaseRepository + + +class SqlAlchemyPreferencesRepository(BaseRepository): + """SQLAlchemy implementation of PreferencesRepository. + + Provides key-value storage for user preferences. Values are stored as JSONB + for flexibility while maintaining type-safe retrieval. + """ + + async def _get_by_key(self, key: str) -> UserPreferencesModel | None: + """Get preference model by key. + + Args: + key: Preference key. + + Returns: + UserPreferencesModel or None if not found. + """ + stmt = select(UserPreferencesModel).where(UserPreferencesModel.key == key) + return await self._execute_scalar(stmt) + + async def get(self, key: str) -> object | None: + """Get a preference value by key. + + Args: + key: Preference key. + + Returns: + Preference value or None if not found. + """ + model = await self._get_by_key(key) + return None if model is None else model.value.get("value") + + async def get_bool(self, key: str, default: bool = False) -> bool: + """Get a boolean preference. + + Args: + key: Preference key. + default: Default value if not found. + + Returns: + Boolean preference value. + """ + value = await self.get(key) + return default if value is None else bool(value) + + async def set(self, key: str, value: object) -> None: + """Set a preference value. + + Args: + key: Preference key. + value: Preference value (must be JSON-serializable). + """ + model = await self._get_by_key(key) + + if model is None: + model = UserPreferencesModel(key=key, value={"value": value}) + self._session.add(model) + else: + model.value = {"value": value} + + await self._session.flush() + + async def delete(self, key: str) -> bool: + """Delete a preference. + + Args: + key: Preference key. + + Returns: + True if deleted, False if not found. + """ + model = await self._get_by_key(key) + + if model is None: + return False + + await self._delete_and_flush(model) + return True diff --git a/src/noteflow/infrastructure/persistence/repositories/segment_repo.py b/src/noteflow/infrastructure/persistence/repositories/segment_repo.py index d06e392..4e194c4 100644 --- a/src/noteflow/infrastructure/persistence/repositories/segment_repo.py +++ b/src/noteflow/infrastructure/persistence/repositories/segment_repo.py @@ -1,7 +1,5 @@ """SQLAlchemy implementation of SegmentRepository.""" -from __future__ import annotations - from collections.abc import Sequence from uuid import UUID diff --git a/src/noteflow/infrastructure/persistence/repositories/summary_repo.py b/src/noteflow/infrastructure/persistence/repositories/summary_repo.py index 1998c49..2ebadee 100644 --- a/src/noteflow/infrastructure/persistence/repositories/summary_repo.py +++ b/src/noteflow/infrastructure/persistence/repositories/summary_repo.py @@ -2,6 +2,7 @@ from __future__ import annotations +from collections.abc import Sequence from typing import TYPE_CHECKING from uuid import UUID @@ -23,6 +24,53 @@ if TYPE_CHECKING: class SqlAlchemySummaryRepository(BaseRepository): """SQLAlchemy implementation of SummaryRepository.""" + async def _add_key_points(self, summary_id: int, key_points: Sequence[KeyPoint]) -> None: + """Add key points to a summary. + + Args: + summary_id: Database ID of the summary. + key_points: Key points to add. Their db_id fields are updated in place. + """ + models: list[tuple[KeyPointModel, KeyPoint]] = [] + for kp in key_points: + kp_model = KeyPointModel( + summary_id=summary_id, + text=kp.text, + start_time=kp.start_time, + end_time=kp.end_time, + segment_ids=kp.segment_ids, + ) + self._session.add(kp_model) + models.append((kp_model, kp)) + + await self._session.flush() + for kp_model, kp in models: + kp.db_id = kp_model.id + + async def _add_action_items(self, summary_id: int, action_items: Sequence[ActionItem]) -> None: + """Add action items to a summary. + + Args: + summary_id: Database ID of the summary. + action_items: Action items to add. Their db_id fields are updated in place. + """ + models: list[tuple[ActionItemModel, ActionItem]] = [] + for ai in action_items: + ai_model = ActionItemModel( + summary_id=summary_id, + text=ai.text, + assignee=ai.assignee, + due_date=ai.due_date, + priority=ai.priority, + segment_ids=ai.segment_ids, + ) + self._session.add(ai_model) + models.append((ai_model, ai)) + + await self._session.flush() + for ai_model, ai in models: + ai.db_id = ai_model.id + async def save(self, summary: Summary) -> Summary: """Save or update a meeting summary. @@ -50,38 +98,9 @@ class SqlAlchemySummaryRepository(BaseRepository): delete(ActionItemModel).where(ActionItemModel.summary_id == existing.id) ) - # Add new key points - kp_models: list[tuple[KeyPointModel, KeyPoint]] = [] - for kp in summary.key_points: - kp_model = KeyPointModel( - summary_id=existing.id, - text=kp.text, - start_time=kp.start_time, - end_time=kp.end_time, - segment_ids=kp.segment_ids, - ) - self._session.add(kp_model) - kp_models.append((kp_model, kp)) - - # Add new action items - ai_models: list[tuple[ActionItemModel, ActionItem]] = [] - for ai in summary.action_items: - ai_model = ActionItemModel( - summary_id=existing.id, - text=ai.text, - assignee=ai.assignee, - due_date=ai.due_date, - priority=ai.priority, - segment_ids=ai.segment_ids, - ) - self._session.add(ai_model) - ai_models.append((ai_model, ai)) - - await self._session.flush() - for kp_model, kp in kp_models: - kp.db_id = kp_model.id - for ai_model, ai in ai_models: - ai.db_id = ai_model.id + # Add new key points and action items + await self._add_key_points(existing.id, summary.key_points) + await self._add_action_items(existing.id, summary.action_items) summary.db_id = existing.id else: # Create new summary @@ -94,33 +113,9 @@ class SqlAlchemySummaryRepository(BaseRepository): self._session.add(model) await self._session.flush() - # Add key points - for kp in summary.key_points: - kp_model = KeyPointModel( - summary_id=model.id, - text=kp.text, - start_time=kp.start_time, - end_time=kp.end_time, - segment_ids=kp.segment_ids, - ) - self._session.add(kp_model) - await self._session.flush() - kp.db_id = kp_model.id - - # Add action items - for ai in summary.action_items: - ai_model = ActionItemModel( - summary_id=model.id, - text=ai.text, - assignee=ai.assignee, - due_date=ai.due_date, - priority=ai.priority, - segment_ids=ai.segment_ids, - ) - self._session.add(ai_model) - await self._session.flush() - ai.db_id = ai_model.id - + # Add key points and action items + await self._add_key_points(model.id, summary.key_points) + await self._add_action_items(model.id, summary.action_items) summary.db_id = model.id return summary diff --git a/src/noteflow/infrastructure/persistence/unit_of_work.py b/src/noteflow/infrastructure/persistence/unit_of_work.py index 1c37285..ba3941f 100644 --- a/src/noteflow/infrastructure/persistence/unit_of_work.py +++ b/src/noteflow/infrastructure/persistence/unit_of_work.py @@ -15,7 +15,9 @@ from noteflow.infrastructure.persistence.database import ( from .repositories import ( SqlAlchemyAnnotationRepository, + SqlAlchemyDiarizationJobRepository, SqlAlchemyMeetingRepository, + SqlAlchemyPreferencesRepository, SqlAlchemySegmentRepository, SqlAlchemySummaryRepository, ) @@ -43,7 +45,9 @@ class SqlAlchemyUnitOfWork: self._session_factory = session_factory self._session: AsyncSession | None = None self._annotations_repo: SqlAlchemyAnnotationRepository | None = None + self._diarization_jobs_repo: SqlAlchemyDiarizationJobRepository | None = None self._meetings_repo: SqlAlchemyMeetingRepository | None = None + self._preferences_repo: SqlAlchemyPreferencesRepository | None = None self._segments_repo: SqlAlchemySegmentRepository | None = None self._summaries_repo: SqlAlchemySummaryRepository | None = None @@ -87,6 +91,13 @@ class SqlAlchemyUnitOfWork: raise RuntimeError("UnitOfWork not in context") return self._annotations_repo + @property + def diarization_jobs(self) -> SqlAlchemyDiarizationJobRepository: + """Get diarization jobs repository.""" + if self._diarization_jobs_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._diarization_jobs_repo + @property def meetings(self) -> SqlAlchemyMeetingRepository: """Get meetings repository.""" @@ -101,6 +112,13 @@ class SqlAlchemyUnitOfWork: raise RuntimeError("UnitOfWork not in context") return self._segments_repo + @property + def preferences(self) -> SqlAlchemyPreferencesRepository: + """Get preferences repository.""" + if self._preferences_repo is None: + raise RuntimeError("UnitOfWork not in context") + return self._preferences_repo + @property def summaries(self) -> SqlAlchemySummaryRepository: """Get summaries repository.""" @@ -118,7 +136,9 @@ class SqlAlchemyUnitOfWork: """ self._session = self._session_factory() self._annotations_repo = SqlAlchemyAnnotationRepository(self._session) + self._diarization_jobs_repo = SqlAlchemyDiarizationJobRepository(self._session) self._meetings_repo = SqlAlchemyMeetingRepository(self._session) + self._preferences_repo = SqlAlchemyPreferencesRepository(self._session) self._segments_repo = SqlAlchemySegmentRepository(self._session) self._summaries_repo = SqlAlchemySummaryRepository(self._session) return self @@ -147,7 +167,9 @@ class SqlAlchemyUnitOfWork: await self._session.close() self._session = None self._annotations_repo = None + self._diarization_jobs_repo = None self._meetings_repo = None + self._preferences_repo = None self._segments_repo = None self._summaries_repo = None diff --git a/src/noteflow/infrastructure/security/keystore.py b/src/noteflow/infrastructure/security/keystore.py index 9a1bbbd..f7c0894 100644 --- a/src/noteflow/infrastructure/security/keystore.py +++ b/src/noteflow/infrastructure/security/keystore.py @@ -3,13 +3,13 @@ Provides secure master key storage using OS credential stores. """ -from __future__ import annotations - import base64 import binascii import logging import os import secrets +import stat +from pathlib import Path from typing import Final import keyring @@ -21,6 +21,42 @@ KEY_SIZE: Final[int] = 32 # 256-bit key SERVICE_NAME: Final[str] = "noteflow" KEY_NAME: Final[str] = "master_key" ENV_VAR_NAME: Final[str] = "NOTEFLOW_MASTER_KEY" +DEFAULT_KEY_FILE: Final[Path] = Path.home() / ".noteflow" / ".master_key" + + +def _decode_and_validate_key(encoded: str, source_name: str) -> bytes: + """Decode and validate a base64-encoded master key. + + Args: + encoded: Base64-encoded key string. + source_name: Human-readable source name for error messages. + + Returns: + Decoded key bytes. + + Raises: + RuntimeError: If decoding fails or key size is wrong. + """ + try: + decoded = base64.b64decode(encoded, validate=True) + except (binascii.Error, ValueError) as exc: + raise RuntimeError(f"{source_name} contains invalid base64") from exc + if len(decoded) != KEY_SIZE: + raise RuntimeError( + f"{source_name} has wrong key size: expected {KEY_SIZE}, got {len(decoded)}" + ) + return decoded + + +def _generate_key() -> tuple[bytes, str]: + """Generate a new random master key. + + Returns: + Tuple of (raw_key_bytes, base64_encoded_string). + """ + raw_key = secrets.token_bytes(KEY_SIZE) + encoded = base64.b64encode(raw_key).decode("ascii") + return raw_key, encoded class KeyringKeyStore: @@ -61,17 +97,7 @@ class KeyringKeyStore: # Check environment variable first (for headless/container deployments) if env_key := os.environ.get(ENV_VAR_NAME): logger.debug("Using master key from environment variable") - try: - decoded = base64.b64decode(env_key, validate=True) - except (binascii.Error, ValueError) as exc: - raise RuntimeError( - f"{ENV_VAR_NAME} must be base64-encoded {KEY_SIZE}-byte key" - ) from exc - if len(decoded) != KEY_SIZE: - raise RuntimeError( - f"{ENV_VAR_NAME} must decode to {KEY_SIZE} bytes, got {len(decoded)}" - ) - return decoded + return _decode_and_validate_key(env_key, f"Environment variable {ENV_VAR_NAME}") try: # Try to retrieve existing key from keyring @@ -81,8 +107,7 @@ class KeyringKeyStore: return base64.b64decode(stored) # Generate new key - new_key = secrets.token_bytes(KEY_SIZE) - encoded = base64.b64encode(new_key).decode("ascii") + new_key, encoded = _generate_key() # Store in keyring keyring.set_password(self._service_name, self._key_name, encoded) @@ -90,10 +115,12 @@ class KeyringKeyStore: return new_key except keyring.errors.KeyringError as e: - raise RuntimeError( - f"Keyring unavailable: {e}. " - f"Set {ENV_VAR_NAME} environment variable for headless mode." - ) from e + # Fall back to file-based storage for headless environments + logger.warning( + "Keyring unavailable (%s), falling back to file-based key storage", + e, + ) + return FileKeyStore().get_or_create_master_key() def delete_master_key(self) -> None: """Delete the master key from the keychain. @@ -157,3 +184,69 @@ class InMemoryKeyStore: def has_master_key(self) -> bool: """Check if master key exists.""" return self._key is not None + + +class FileKeyStore: + """File-based key storage for headless environments. + + Stores the master key in a restricted-permissions file when keyring is + unavailable. This is a fallback for headless servers, containers, and + environments without a desktop session. + + File permissions are set to 0600 (owner read/write only). + """ + + def __init__(self, key_file: Path | None = None) -> None: + """Initialize the file keystore. + + Args: + key_file: Path to key file. Defaults to ~/.noteflow/.master_key. + """ + self._key_file = key_file or DEFAULT_KEY_FILE + + def get_or_create_master_key(self) -> bytes: + """Retrieve or generate the master encryption key. + + Returns: + 32-byte master key. + + Raises: + RuntimeError: If key file exists with wrong size or permissions fail. + """ + if self._key_file.exists(): + logger.debug("Retrieved master key from file: %s", self._key_file) + content = self._key_file.read_text().strip() + return _decode_and_validate_key(content, f"Key file {self._key_file}") + + # Generate new key + new_key, encoded = _generate_key() + + # Create parent directory if needed + self._key_file.parent.mkdir(parents=True, exist_ok=True) + + # Write key with restricted permissions + self._key_file.write_text(encoded) + self._key_file.chmod(stat.S_IRUSR | stat.S_IWUSR) # 0600 + + logger.info("Generated and stored master key in file: %s", self._key_file) + return new_key + + def delete_master_key(self) -> None: + """Delete the master key file. + + Safe to call if file doesn't exist. + """ + if self._key_file.exists(): + self._key_file.unlink() + logger.info("Deleted master key file: %s", self._key_file) + else: + logger.debug("Master key file not found, nothing to delete") + + def has_master_key(self) -> bool: + """Check if master key file exists.""" + return self._key_file.exists() + + @property + def key_file(self) -> Path: + """Get the key file path.""" + return self._key_file diff --git a/src/noteflow/infrastructure/security/protocols.py b/src/noteflow/infrastructure/security/protocols.py index 348eca8..97895ad 100644 --- a/src/noteflow/infrastructure/security/protocols.py +++ b/src/noteflow/infrastructure/security/protocols.py @@ -3,8 +3,6 @@ These protocols define the contracts for key storage and encryption components. """ -from __future__ import annotations - from collections.abc import Iterator from dataclasses import dataclass from pathlib import Path diff --git a/src/noteflow/infrastructure/summarization/factory.py b/src/noteflow/infrastructure/summarization/factory.py index abd834a..a5d3e7e 100644 --- a/src/noteflow/infrastructure/summarization/factory.py +++ b/src/noteflow/infrastructure/summarization/factory.py @@ -1,7 +1,5 @@ """Factory for creating configured SummarizationService instances.""" -from __future__ import annotations - import logging from noteflow.application.services.summarization_service import ( diff --git a/src/noteflow/infrastructure/summarization/mock_provider.py b/src/noteflow/infrastructure/summarization/mock_provider.py index 9f69e46..b91b35a 100644 --- a/src/noteflow/infrastructure/summarization/mock_provider.py +++ b/src/noteflow/infrastructure/summarization/mock_provider.py @@ -1,7 +1,5 @@ """Mock summarization provider for testing.""" -from __future__ import annotations - import time from datetime import UTC, datetime diff --git a/src/noteflow/infrastructure/triggers/app_audio.py b/src/noteflow/infrastructure/triggers/app_audio.py index 1c94483..463e45b 100644 --- a/src/noteflow/infrastructure/triggers/app_audio.py +++ b/src/noteflow/infrastructure/triggers/app_audio.py @@ -103,7 +103,7 @@ class _SystemOutputSampler: try: devices = sd.query_devices() except Exception: - return self._extracted_from__select_device_5( + return self._mark_unavailable_with_warning( "Failed to query audio devices for app audio detection" ) for idx, dev in enumerate(devices): @@ -111,21 +111,27 @@ class _SystemOutputSampler: if int(dev.get("max_input_channels", 0)) <= 0: continue if "monitor" in name or "loopback" in name: - return self._extracted_from__select_device_24(idx) + return self._mark_device_available(idx) self._available = False logger.warning("No loopback audio device found - app audio detection disabled") - # TODO Rename this here and in `_select_device` - def _extracted_from__select_device_24(self, arg0): - self._device = arg0 - self._available = True - return + def _mark_device_available(self, device_index: int) -> None: + """Mark the device as available for audio capture. - # TODO Rename this here and in `_select_device` - def _extracted_from__select_device_5(self, arg0): + Args: + device_index: Index of the audio device. + """ + self._device = device_index + self._available = True + + def _mark_unavailable_with_warning(self, message: str) -> None: + """Mark device as unavailable and log a warning. + + Args: + message: Warning message to log. + """ self._available = False - logger.warning(arg0) - return + logger.warning(message) def _ensure_stream(self) -> bool: if self._available is False: diff --git a/src/noteflow/infrastructure/triggers/calendar.py b/src/noteflow/infrastructure/triggers/calendar.py index 8554133..35a7877 100644 --- a/src/noteflow/infrastructure/triggers/calendar.py +++ b/src/noteflow/infrastructure/triggers/calendar.py @@ -8,7 +8,7 @@ from __future__ import annotations import json import logging from dataclasses import dataclass -from datetime import datetime, timedelta, timezone +from datetime import UTC, datetime, timedelta from typing import TYPE_CHECKING from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource @@ -63,7 +63,7 @@ class CalendarProvider: if not self._settings.events: return None - now = datetime.now(timezone.utc) + now = datetime.now(UTC) window_start = now - timedelta(minutes=self._settings.lookbehind_minutes) window_end = now + timedelta(minutes=self._settings.lookahead_minutes) @@ -145,6 +145,5 @@ def _parse_datetime(value: object) -> datetime | None: def _ensure_tz(value: datetime) -> datetime: if value.tzinfo is None: - return value.replace(tzinfo=timezone.utc) - return value.astimezone(timezone.utc) - + return value.replace(tzinfo=UTC) + return value.astimezone(UTC) diff --git a/src/noteflow/infrastructure/triggers/foreground_app.py b/src/noteflow/infrastructure/triggers/foreground_app.py index 7cd7f59..7b5ad4b 100644 --- a/src/noteflow/infrastructure/triggers/foreground_app.py +++ b/src/noteflow/infrastructure/triggers/foreground_app.py @@ -3,8 +3,6 @@ Detect meeting applications in the foreground window. """ -from __future__ import annotations - import logging from dataclasses import dataclass, field diff --git a/support/__init__.py b/support/__init__.py new file mode 100644 index 0000000..55dae89 --- /dev/null +++ b/support/__init__.py @@ -0,0 +1 @@ +"""Shared development/test support utilities.""" diff --git a/support/db_utils.py b/support/db_utils.py new file mode 100644 index 0000000..8668c00 --- /dev/null +++ b/support/db_utils.py @@ -0,0 +1,186 @@ +"""PostgreSQL testcontainer fixtures and utilities.""" + +from __future__ import annotations + +import time +from importlib import import_module +from typing import TYPE_CHECKING +from urllib.parse import quote + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +from noteflow.infrastructure.persistence.models import Base + +if TYPE_CHECKING: + from collections.abc import Self + + from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine + + +class PgTestContainer: + """Minimal Postgres testcontainer wrapper with custom readiness wait.""" + + def __init__( + self, + image: str = "pgvector/pgvector:pg16", + username: str = "test", + password: str = "test", + dbname: str = "noteflow_test", + port: int = 5432, + ) -> None: + """Initialize the container configuration. + + Args: + image: Docker image to use. + username: PostgreSQL username. + password: PostgreSQL password. + dbname: Database name. + port: PostgreSQL port. + """ + self.username = username + self.password = password + self.dbname = dbname + self.port = port + + container_module = import_module("testcontainers.core.container") + docker_container_cls = container_module.DockerContainer + self._container = ( + docker_container_cls(image) + .with_env("POSTGRES_USER", username) + .with_env("POSTGRES_PASSWORD", password) + .with_env("POSTGRES_DB", dbname) + .with_exposed_ports(port) + ) + + def start(self) -> Self: + """Start the container.""" + self._container.start() + self._wait_until_ready() + return self + + def stop(self) -> None: + """Stop the container.""" + self._container.stop() + + def get_connection_url(self) -> str: + """Return a SQLAlchemy-style connection URL.""" + host = self._container.get_container_host_ip() + port = self._container._get_exposed_port(self.port) + quoted_password = quote(self.password, safe=" +") + return ( + f"postgresql+psycopg2://{self.username}:{quoted_password}@{host}:{port}/{self.dbname}" + ) + + def _wait_until_ready(self, timeout: float = 30.0, interval: float = 0.5) -> None: + """Wait for Postgres to accept connections by running a simple query.""" + start_time = time.time() + escaped_password = self.password.replace("'", "'\"'\"'") + cmd = [ + "sh", + "-c", + ( + f"PGPASSWORD='{escaped_password}' " + f"psql --username {self.username} --dbname {self.dbname} --host 127.0.0.1 " + "-c 'select 1;'" + ), + ] + last_error: str | None = None + + while True: + result = self._container.exec(cmd) + if result.exit_code == 0: + return + if result.output: + last_error = result.output.decode(errors="ignore") + if time.time() - start_time > timeout: + raise TimeoutError( + "Postgres container did not become ready in time" + + (f": {last_error}" if last_error else "") + ) + time.sleep(interval) + + +# Module-level container singleton +_container: PgTestContainer | None = None +_database_url: str | None = None + + +def get_or_create_container() -> tuple[PgTestContainer, str]: + """Get or create the PostgreSQL container singleton. + + Returns: + Tuple of (container, async_database_url). + """ + global _container, _database_url + + if _container is None: + container = PgTestContainer().start() + _container = container + url = container.get_connection_url() + _database_url = url.replace("postgresql+psycopg2://", "postgresql+asyncpg://") + + assert _container is not None, "Container should be initialized" + assert _database_url is not None, "Database URL should be initialized" + return _container, _database_url + + +def stop_container() -> None: + """Stop and cleanup the container singleton.""" + global _container + if _container is not None: + _container.stop() + _container = None + + +async def initialize_test_schema(conn: AsyncConnection) -> None: + """Initialize test database schema. + + Creates the pgvector extension and noteflow schema with all tables. + + Args: + conn: Async database connection. + """ + await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) + await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) + await conn.execute(text("CREATE SCHEMA noteflow")) + await conn.run_sync(Base.metadata.create_all) + + +async def cleanup_test_schema(conn: AsyncConnection) -> None: + """Drop the test schema. + + Args: + conn: Async database connection. + """ + await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) + + +def create_test_session_factory(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]: + """Create standard test session factory. + + Args: + engine: SQLAlchemy async engine. + + Returns: + Configured session factory. + """ + return async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, + autocommit=False, + autoflush=False, + ) + + +def create_test_engine(database_url: str) -> AsyncEngine: + """Create test database engine. + + Args: + database_url: Async database URL. + + Returns: + SQLAlchemy async engine. + """ + return create_async_engine(database_url, echo=False) diff --git a/tests/application/test_meeting_service.py b/tests/application/test_meeting_service.py index e0efc79..b2db411 100644 --- a/tests/application/test_meeting_service.py +++ b/tests/application/test_meeting_service.py @@ -21,19 +21,6 @@ if TYPE_CHECKING: class TestMeetingServiceCreation: """Tests for meeting creation operations.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.rollback = AsyncMock() - uow.meetings = MagicMock() - uow.segments = MagicMock() - uow.summaries = MagicMock() - return uow - async def test_create_meeting_success(self, mock_uow: MagicMock) -> None: """Test successful meeting creation.""" created_meeting = Meeting.create(title="Test Meeting") @@ -61,18 +48,6 @@ class TestMeetingServiceCreation: class TestMeetingServiceRetrieval: """Tests for meeting retrieval operations.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - uow.segments = MagicMock() - uow.summaries = MagicMock() - return uow - async def test_get_meeting_found(self, mock_uow: MagicMock) -> None: """Test retrieving existing meeting.""" meeting_id = MeetingId(uuid4()) @@ -116,16 +91,6 @@ class TestMeetingServiceRetrieval: class TestMeetingServiceStateTransitions: """Tests for meeting state transition operations.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - return uow - async def test_start_recording_success(self, mock_uow: MagicMock) -> None: """Test starting recording on existing meeting.""" meeting = Meeting.create(title="Test") @@ -224,16 +189,6 @@ class TestMeetingServiceStateTransitions: class TestMeetingServiceDeletion: """Tests for meeting deletion operations.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - return uow - async def test_delete_meeting_success(self, mock_uow: MagicMock) -> None: """Test successful meeting deletion.""" meeting_id = MeetingId(uuid4()) @@ -315,16 +270,6 @@ class TestMeetingServiceDeletion: class TestMeetingServiceSegments: """Tests for segment operations.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.segments = MagicMock() - return uow - async def test_add_segment_success(self, mock_uow: MagicMock) -> None: """Test adding a segment to meeting.""" meeting_id = MeetingId(uuid4()) @@ -381,16 +326,6 @@ class TestMeetingServiceSegments: class TestMeetingServiceSummaries: """Tests for summary operations.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.summaries = MagicMock() - return uow - async def test_save_summary_success(self, mock_uow: MagicMock) -> None: """Test saving a meeting summary.""" meeting_id = MeetingId(uuid4()) @@ -439,15 +374,6 @@ class TestMeetingServiceSummaries: class TestMeetingServiceSearch: """Tests for semantic search operations.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.segments = MagicMock() - return uow - async def test_search_segments_delegates(self, mock_uow: MagicMock) -> None: """Test search_segments delegates to repository.""" meeting_id = MeetingId(uuid4()) @@ -466,16 +392,6 @@ class TestMeetingServiceSearch: class TestMeetingServiceAnnotations: """Tests for annotation operations.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.annotations = MagicMock() - return uow - async def test_add_annotation_success(self, mock_uow: MagicMock) -> None: """Test adding an annotation commits and returns saved entity.""" meeting_id = MeetingId(uuid4()) @@ -537,19 +453,6 @@ class TestMeetingServiceAnnotations: class TestMeetingServiceAdditionalBranches: """Additional branch coverage for MeetingService.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create a mock UnitOfWork with all repos.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - uow.segments = MagicMock() - uow.summaries = MagicMock() - uow.annotations = MagicMock() - return uow - async def test_stop_meeting_not_found(self, mock_uow: MagicMock) -> None: """stop_meeting should return None when meeting is missing.""" mock_uow.meetings.get = AsyncMock(return_value=None) diff --git a/tests/application/test_recovery_service.py b/tests/application/test_recovery_service.py index 035b985..c2dba6b 100644 --- a/tests/application/test_recovery_service.py +++ b/tests/application/test_recovery_service.py @@ -2,26 +2,19 @@ from __future__ import annotations +from pathlib import Path from unittest.mock import AsyncMock, MagicMock import pytest -from noteflow.application.services.recovery_service import RecoveryService +from noteflow.application.services.recovery_service import ( + AudioValidationResult, + RecoveryService, +) from noteflow.domain.entities import Meeting from noteflow.domain.value_objects import MeetingState -@pytest.fixture -def mock_uow() -> MagicMock: - """Create a mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.commit = AsyncMock() - uow.meetings = MagicMock() - return uow - - class TestRecoveryServiceRecovery: """Tests for crash recovery operations.""" @@ -30,9 +23,10 @@ class TestRecoveryServiceRecovery: mock_uow.meetings.list_all = AsyncMock(return_value=([], 0)) service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() + meetings, audio_failures = await service.recover_crashed_meetings() - assert result == [] + assert meetings == [] + assert audio_failures == 0 mock_uow.commit.assert_not_called() async def test_recover_single_recording_meeting(self, mock_uow: MagicMock) -> None: @@ -45,13 +39,13 @@ class TestRecoveryServiceRecovery: mock_uow.meetings.update = AsyncMock(return_value=meeting) service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() + meetings, _ = await service.recover_crashed_meetings() - assert len(result) == 1 - assert result[0].state == MeetingState.ERROR - assert result[0].metadata["crash_recovered"] == "true" - assert result[0].metadata["crash_previous_state"] == "RECORDING" - assert "crash_recovery_time" in result[0].metadata + assert len(meetings) == 1 + assert meetings[0].state == MeetingState.ERROR + assert meetings[0].metadata["crash_recovered"] == "true" + assert meetings[0].metadata["crash_previous_state"] == "RECORDING" + assert "crash_recovery_time" in meetings[0].metadata mock_uow.meetings.update.assert_called_once() mock_uow.commit.assert_called_once() @@ -66,11 +60,11 @@ class TestRecoveryServiceRecovery: mock_uow.meetings.update = AsyncMock(return_value=meeting) service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() + meetings, _ = await service.recover_crashed_meetings() - assert len(result) == 1 - assert result[0].state == MeetingState.ERROR - assert result[0].metadata["crash_previous_state"] == "STOPPING" + assert len(meetings) == 1 + assert meetings[0].state == MeetingState.ERROR + assert meetings[0].metadata["crash_previous_state"] == "STOPPING" mock_uow.commit.assert_called_once() async def test_recover_multiple_crashed_meetings(self, mock_uow: MagicMock) -> None: @@ -85,18 +79,18 @@ class TestRecoveryServiceRecovery: meeting3 = Meeting.create(title="Crashed 3") meeting3.start_recording() - meetings = [meeting1, meeting2, meeting3] - mock_uow.meetings.list_all = AsyncMock(return_value=(meetings, 3)) - mock_uow.meetings.update = AsyncMock(side_effect=meetings) + crashed_meetings = [meeting1, meeting2, meeting3] + mock_uow.meetings.list_all = AsyncMock(return_value=(crashed_meetings, 3)) + mock_uow.meetings.update = AsyncMock(side_effect=crashed_meetings) service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() + meetings, _ = await service.recover_crashed_meetings() - assert len(result) == 3 - assert all(m.state == MeetingState.ERROR for m in result) - assert result[0].metadata["crash_previous_state"] == "RECORDING" - assert result[1].metadata["crash_previous_state"] == "STOPPING" - assert result[2].metadata["crash_previous_state"] == "RECORDING" + assert len(meetings) == 3 + assert all(m.state == MeetingState.ERROR for m in meetings) + assert meetings[0].metadata["crash_previous_state"] == "RECORDING" + assert meetings[1].metadata["crash_previous_state"] == "STOPPING" + assert meetings[2].metadata["crash_previous_state"] == "RECORDING" assert mock_uow.meetings.update.call_count == 3 mock_uow.commit.assert_called_once() @@ -147,12 +141,191 @@ class TestRecoveryServiceMetadata: mock_uow.meetings.update = AsyncMock(return_value=meeting) service = RecoveryService(mock_uow) - result = await service.recover_crashed_meetings() + meetings, _ = await service.recover_crashed_meetings() - assert len(result) == 1 + assert len(meetings) == 1 # Verify original metadata preserved - assert result[0].metadata["project"] == "NoteFlow" - assert result[0].metadata["important"] == "yes" + assert meetings[0].metadata["project"] == "NoteFlow" + assert meetings[0].metadata["important"] == "yes" # Verify recovery metadata added - assert result[0].metadata["crash_recovered"] == "true" - assert result[0].metadata["crash_previous_state"] == "RECORDING" + assert meetings[0].metadata["crash_recovered"] == "true" + assert meetings[0].metadata["crash_previous_state"] == "RECORDING" + + +class TestRecoveryServiceAudioValidation: + """Tests for audio file validation during recovery.""" + + @pytest.fixture + def meetings_dir(self, tmp_path: Path) -> Path: + """Create temporary meetings directory.""" + return tmp_path / "meetings" + + def test_audio_validation_skipped_without_meetings_dir(self, mock_uow: MagicMock) -> None: + """Test audio validation skipped when no meetings_dir configured.""" + meeting = Meeting.create(title="Test Meeting") + meeting.start_recording() + + service = RecoveryService(mock_uow, meetings_dir=None) + result = service._validate_meeting_audio(meeting) + + assert result.is_valid is True + assert result.manifest_exists is True + assert result.audio_exists is True + assert "skipped" in (result.error_message or "").lower() + + def test_audio_validation_missing_directory( + self, mock_uow: MagicMock, meetings_dir: Path + ) -> None: + """Test validation fails when meeting directory does not exist.""" + meeting = Meeting.create(title="Missing Dir") + meeting.start_recording() + + service = RecoveryService(mock_uow, meetings_dir=meetings_dir) + result = service._validate_meeting_audio(meeting) + + assert result.is_valid is False + assert result.manifest_exists is False + assert result.audio_exists is False + assert "missing" in (result.error_message or "").lower() + + def test_audio_validation_missing_manifest( + self, mock_uow: MagicMock, meetings_dir: Path + ) -> None: + """Test validation fails when only audio.enc exists.""" + meeting = Meeting.create(title="Missing Manifest") + meeting.start_recording() + + # Create meeting directory with only audio.enc + meeting_path = meetings_dir / str(meeting.id) + meeting_path.mkdir(parents=True) + (meeting_path / "audio.enc").touch() + + service = RecoveryService(mock_uow, meetings_dir=meetings_dir) + result = service._validate_meeting_audio(meeting) + + assert result.is_valid is False + assert result.manifest_exists is False + assert result.audio_exists is True + assert "manifest.json" in (result.error_message or "") + + def test_audio_validation_missing_audio(self, mock_uow: MagicMock, meetings_dir: Path) -> None: + """Test validation fails when only manifest.json exists.""" + meeting = Meeting.create(title="Missing Audio") + meeting.start_recording() + + # Create meeting directory with only manifest.json + meeting_path = meetings_dir / str(meeting.id) + meeting_path.mkdir(parents=True) + (meeting_path / "manifest.json").touch() + + service = RecoveryService(mock_uow, meetings_dir=meetings_dir) + result = service._validate_meeting_audio(meeting) + + assert result.is_valid is False + assert result.manifest_exists is True + assert result.audio_exists is False + assert "audio.enc" in (result.error_message or "") + + def test_audio_validation_success(self, mock_uow: MagicMock, meetings_dir: Path) -> None: + """Test validation succeeds when both files exist.""" + meeting = Meeting.create(title="Complete Meeting") + meeting.start_recording() + + # Create meeting directory with both files + meeting_path = meetings_dir / str(meeting.id) + meeting_path.mkdir(parents=True) + (meeting_path / "manifest.json").touch() + (meeting_path / "audio.enc").touch() + + service = RecoveryService(mock_uow, meetings_dir=meetings_dir) + result = service._validate_meeting_audio(meeting) + + assert result.is_valid is True + assert result.manifest_exists is True + assert result.audio_exists is True + assert result.error_message is None + + def test_audio_validation_uses_asset_path_metadata( + self, mock_uow: MagicMock, meetings_dir: Path + ) -> None: + """Test validation uses asset_path from metadata if available.""" + meeting = Meeting.create( + title="Custom Path", + metadata={"asset_path": "custom-path-123"}, + ) + meeting.start_recording() + + # Create meeting at custom asset path + meeting_path = meetings_dir / "custom-path-123" + meeting_path.mkdir(parents=True) + (meeting_path / "manifest.json").touch() + (meeting_path / "audio.enc").touch() + + service = RecoveryService(mock_uow, meetings_dir=meetings_dir) + result = service._validate_meeting_audio(meeting) + + assert result.is_valid is True + + async def test_recovery_counts_audio_failures( + self, mock_uow: MagicMock, meetings_dir: Path + ) -> None: + """Test recovery tracks audio validation failure count.""" + meeting1 = Meeting.create(title="Has Audio") + meeting1.start_recording() + + meeting2 = Meeting.create(title="Missing Audio") + meeting2.start_recording() + + # Create directory for meeting1 only + meeting1_path = meetings_dir / str(meeting1.id) + meeting1_path.mkdir(parents=True) + (meeting1_path / "manifest.json").touch() + (meeting1_path / "audio.enc").touch() + + mock_uow.meetings.list_all = AsyncMock(return_value=([meeting1, meeting2], 2)) + mock_uow.meetings.update = AsyncMock(side_effect=[meeting1, meeting2]) + + service = RecoveryService(mock_uow, meetings_dir=meetings_dir) + meetings, audio_failures = await service.recover_crashed_meetings() + + assert len(meetings) == 2 + assert audio_failures == 1 + assert meetings[0].metadata["audio_valid"] == "true" + assert meetings[1].metadata["audio_valid"] == "false" + assert "audio_error" in meetings[1].metadata + + +class TestAudioValidationResult: + """Tests for AudioValidationResult dataclass.""" + + def test_audio_validation_result_is_frozen(self) -> None: + """Test AudioValidationResult is immutable.""" + result = AudioValidationResult( + is_valid=True, + manifest_exists=True, + audio_exists=True, + ) + + with pytest.raises(AttributeError): + result.is_valid = False # type: ignore[misc] + + def test_audio_validation_result_optional_error(self) -> None: + """Test error_message defaults to None.""" + result = AudioValidationResult( + is_valid=True, + manifest_exists=True, + audio_exists=True, + ) + + assert result.error_message is None + + def test_audio_validation_result_with_error(self) -> None: + """Test AudioValidationResult stores error message.""" + result = AudioValidationResult( + is_valid=False, + manifest_exists=False, + audio_exists=False, + error_message="Test error", + ) + + assert result.error_message == "Test error" diff --git a/tests/application/test_retention_service.py b/tests/application/test_retention_service.py index 68fcdd4..2dfbcb8 100644 --- a/tests/application/test_retention_service.py +++ b/tests/application/test_retention_service.py @@ -58,15 +58,6 @@ class TestRetentionServiceProperties: class TestRetentionServiceFindExpired: """Tests for find_expired_meetings method.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.meetings = MagicMock() - return uow - @pytest.mark.asyncio async def test_find_expired_returns_meetings(self, mock_uow: MagicMock) -> None: """find_expired_meetings should return meetings from repository.""" @@ -93,16 +84,6 @@ class TestRetentionServiceFindExpired: class TestRetentionServiceRunCleanup: """Tests for run_cleanup method.""" - @pytest.fixture - def mock_uow(self) -> MagicMock: - """Create mock UnitOfWork.""" - uow = MagicMock() - uow.__aenter__ = AsyncMock(return_value=uow) - uow.__aexit__ = AsyncMock(return_value=None) - uow.meetings = MagicMock() - uow.commit = AsyncMock() - return uow - @pytest.mark.asyncio async def test_run_cleanup_disabled_returns_empty_report(self, mock_uow: MagicMock) -> None: """run_cleanup should return empty report when disabled.""" diff --git a/tests/application/test_summarization_service.py b/tests/application/test_summarization_service.py index aae1b2a..81a1e50 100644 --- a/tests/application/test_summarization_service.py +++ b/tests/application/test_summarization_service.py @@ -144,7 +144,8 @@ class TestSummarizationServiceConfiguration: assert SummarizationMode.LOCAL not in available - def test_cloud_requires_consent(self) -> None: + @pytest.mark.asyncio + async def test_cloud_requires_consent(self) -> None: """Cloud mode should require consent to be available.""" service = SummarizationService() service.register_provider( @@ -153,22 +154,23 @@ class TestSummarizationServiceConfiguration: ) available_without_consent = service.get_available_modes() - service.grant_cloud_consent() + await service.grant_cloud_consent() available_with_consent = service.get_available_modes() assert SummarizationMode.CLOUD not in available_without_consent assert SummarizationMode.CLOUD in available_with_consent - def test_revoke_cloud_consent(self) -> None: + @pytest.mark.asyncio + async def test_revoke_cloud_consent(self) -> None: """Revoking consent should remove cloud from available modes.""" service = SummarizationService() service.register_provider( SummarizationMode.CLOUD, MockProvider(name="cloud", requires_consent=True), ) - service.grant_cloud_consent() + await service.grant_cloud_consent() - service.revoke_cloud_consent() + await service.revoke_cloud_consent() available = service.get_available_modes() assert SummarizationMode.CLOUD not in available diff --git a/tests/conftest.py b/tests/conftest.py index f6db095..e43fe89 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,7 @@ from __future__ import annotations import sys import types from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock import pytest @@ -94,3 +95,24 @@ def mock_optional_extras() -> None: pywinctl_module.getAllWindows = lambda: [] pywinctl_module.getAllTitles = lambda: [] sys.modules["pywinctl"] = pywinctl_module + + +@pytest.fixture +def mock_uow() -> MagicMock: + """Create a mock UnitOfWork for service tests. + + Provides a fully-configured mock UnitOfWork with all repository mocks + and async context manager support. + """ + uow = MagicMock() + uow.__aenter__ = AsyncMock(return_value=uow) + uow.__aexit__ = AsyncMock(return_value=None) + uow.commit = AsyncMock() + uow.rollback = AsyncMock() + uow.meetings = MagicMock() + uow.segments = MagicMock() + uow.summaries = MagicMock() + uow.annotations = MagicMock() + uow.preferences = MagicMock() + uow.diarization_jobs = MagicMock() + return uow diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 0000000..2fd2e53 --- /dev/null +++ b/tests/fixtures/__init__.py @@ -0,0 +1,15 @@ +"""Shared test fixtures and utilities.""" + +from support.db_utils import ( + PgTestContainer, + create_test_session_factory, + get_or_create_container, + initialize_test_schema, +) + +__all__ = [ + "PgTestContainer", + "create_test_session_factory", + "get_or_create_container", + "initialize_test_schema", +] diff --git a/tests/grpc/test_generate_summary.py b/tests/grpc/test_generate_summary.py index bd6e1fc..ed0c551 100644 --- a/tests/grpc/test_generate_summary.py +++ b/tests/grpc/test_generate_summary.py @@ -59,7 +59,9 @@ async def test_generate_summary_falls_back_when_provider_unavailable() -> None: meeting = store.create("Test Meeting") store.add_segment( str(meeting.id), - Segment(segment_id=1, text="Action item noted", start_time=0.0, end_time=2.0, language="en"), + Segment( + segment_id=1, text="Action item noted", start_time=0.0, end_time=2.0, language="en" + ), ) response = await servicer.GenerateSummary( diff --git a/tests/infrastructure/audio/test_writer.py b/tests/infrastructure/audio/test_writer.py index e18d214..f015ae9 100644 --- a/tests/infrastructure/audio/test_writer.py +++ b/tests/infrastructure/audio/test_writer.py @@ -80,7 +80,8 @@ class TestMeetingAudioWriterBasics: meetings_dir: Path, ) -> None: """Test audio conversion from float32 to PCM16.""" - writer = MeetingAudioWriter(crypto, meetings_dir) + # Use small buffer to force immediate flush + writer = MeetingAudioWriter(crypto, meetings_dir, buffer_size=1000) meeting_id = str(uuid4()) dek = crypto.generate_dek() wrapped_dek = crypto.wrap_dek(dek) @@ -91,10 +92,12 @@ class TestMeetingAudioWriterBasics: test_audio = np.linspace(-1.0, 1.0, 1600, dtype=np.float32) writer.write_chunk(test_audio) + # Audio is 3200 bytes, buffer is 1000, so should flush assert writer.bytes_written > 0 # PCM16 = 2 bytes/sample = 3200 bytes raw, but encrypted with overhead assert writer.bytes_written > 3200 assert writer.chunk_count == 1 + assert writer.write_count == 1 writer.close() @@ -103,25 +106,89 @@ class TestMeetingAudioWriterBasics: crypto: AesGcmCryptoBox, meetings_dir: Path, ) -> None: - """Test writing multiple audio chunks.""" - writer = MeetingAudioWriter(crypto, meetings_dir) + """Test writing multiple audio chunks with buffering.""" + # Use small buffer to test buffering behavior + writer = MeetingAudioWriter(crypto, meetings_dir, buffer_size=10000) meeting_id = str(uuid4()) dek = crypto.generate_dek() wrapped_dek = crypto.wrap_dek(dek) writer.open(meeting_id, dek, wrapped_dek) - # Write 100 chunks - for _ in range(100): + # Write 100 chunks of 1600 samples each (3200 bytes per write) + # Buffer is 10000, so ~3 writes per encrypted chunk + num_writes = 100 + bytes_per_write = 1600 * 2 # 3200 bytes + + for _ in range(num_writes): audio = np.random.uniform(-0.5, 0.5, 1600).astype(np.float32) writer.write_chunk(audio) - # Should have written significant data - assert writer.bytes_written > 100 * 3200 # At least raw PCM16 size - assert writer.chunk_count == 100 + # write_count tracks incoming audio frames + assert writer.write_count == num_writes + + # Due to buffering, chunk_count should be much less than write_count + # 100 writes * 3200 bytes = 320,000 bytes / 10000 buffer = ~32 flushes + # Some bytes may still be buffered + assert writer.chunk_count < num_writes + + # Flush remaining and check bytes written before close + writer.flush() + + # Total raw bytes = 100 * 3200 = 320,000 bytes + # Encrypted size includes overhead per chunk + assert writer.bytes_written > num_writes * bytes_per_write writer.close() + def test_buffering_reduces_chunk_overhead( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test that buffering reduces encryption overhead.""" + # Create two writers with different buffer sizes + small_buffer_writer = MeetingAudioWriter(crypto, meetings_dir, buffer_size=1000) + large_buffer_writer = MeetingAudioWriter(crypto, meetings_dir, buffer_size=1_000_000) + + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + # Write same audio to both + meeting_id_small = str(uuid4()) + meeting_id_large = str(uuid4()) + + small_buffer_writer.open(meeting_id_small, dek, wrapped_dek) + large_buffer_writer.open(meeting_id_large, dek, wrapped_dek) + + # Generate consistent test data + np.random.seed(42) + + # Write 50 chunks (160,000 bytes raw) + for _ in range(50): + audio = np.random.uniform(-0.5, 0.5, 1600).astype(np.float32) + small_buffer_writer.write_chunk(audio) + + np.random.seed(42) # Reset seed to generate same audio + + for _ in range(50): + audio = np.random.uniform(-0.5, 0.5, 1600).astype(np.float32) + large_buffer_writer.write_chunk(audio) + + # Flush to ensure all data is written before comparing + small_buffer_writer.flush() + large_buffer_writer.flush() + + # Large buffer should have fewer encrypted chunks (less overhead) + assert large_buffer_writer.chunk_count < small_buffer_writer.chunk_count + + # Large buffer should use less total disk space due to fewer chunks + # Each chunk has 32 bytes overhead (4 length + 12 nonce + 16 tag) + assert large_buffer_writer.bytes_written < small_buffer_writer.bytes_written + + small_buffer_writer.close() + large_buffer_writer.close() + def test_write_chunk_clamps_audio_range( self, crypto: AesGcmCryptoBox, @@ -135,7 +202,7 @@ class TestMeetingAudioWriterBasics: writer.open(meeting_id, dek, wrapped_dek) writer.write_chunk(np.array([-2.0, 0.0, 2.0], dtype=np.float32)) - writer.close() + writer.close() # Flushes buffer to disk audio_path = meetings_dir / meeting_id / "audio.enc" reader = ChunkedAssetReader(crypto) @@ -150,6 +217,37 @@ class TestMeetingAudioWriterBasics: reader.close() + def test_flush_writes_buffered_data( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test explicit flush writes buffered data to disk.""" + # Large buffer to prevent auto-flush + writer = MeetingAudioWriter(crypto, meetings_dir, buffer_size=1_000_000) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + + # Write small audio chunk (won't trigger auto-flush) + writer.write_chunk(np.zeros(1600, dtype=np.float32)) + + # Data should be buffered, not written + assert writer.buffered_bytes > 0 + assert writer.chunk_count == 0 + + # Explicit flush + writer.flush() + + # Data should now be written + assert writer.buffered_bytes == 0 + assert writer.chunk_count == 1 + assert writer.bytes_written > 0 + + writer.close() + class TestMeetingAudioWriterErrors: """Tests for MeetingAudioWriter error handling.""" @@ -258,7 +356,7 @@ class TestMeetingAudioWriterIntegration: meetings_dir: Path, ) -> None: """Test writing audio, then reading it back encrypted.""" - # Write audio + # Write audio (default buffer aggregates chunks) writer = MeetingAudioWriter(crypto, meetings_dir) meeting_id = str(uuid4()) dek = crypto.generate_dek() @@ -282,22 +380,21 @@ class TestMeetingAudioWriterIntegration: reader = ChunkedAssetReader(crypto) reader.open(audio_path, dek) - read_chunks: list[np.ndarray] = [] - for chunk_bytes in reader.read_chunks(): - # Convert bytes back to PCM16 then to float32 - pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16) - audio_float = pcm16.astype(np.float32) / 32767.0 - read_chunks.append(audio_float) - + # Collect all decrypted audio bytes (may be fewer chunks due to buffering) + all_audio_bytes = b"".join(reader.read_chunks()) reader.close() - # Verify we read same number of chunks - assert len(read_chunks) == len(original_chunks) + # Convert bytes back to float32 + pcm16 = np.frombuffer(all_audio_bytes, dtype=np.int16) + read_audio = pcm16.astype(np.float32) / 32767.0 + + # Concatenate original chunks for comparison + original_audio = np.concatenate(original_chunks) # Verify audio content matches (within quantization error) - for orig, read in zip(original_chunks, read_chunks, strict=True): - # PCM16 quantization adds ~0.00003 max error - assert np.allclose(orig, read, atol=0.0001) + assert len(read_audio) == len(original_audio) + # PCM16 quantization adds ~0.00003 max error + assert np.allclose(original_audio, read_audio, atol=0.0001) def test_manifest_wrapped_dek_can_decrypt_audio( self, @@ -332,3 +429,121 @@ class TestMeetingAudioWriterIntegration: assert len(chunks) == 1 # Should read the one chunk we wrote reader.close() + + +class TestMeetingAudioWriterPeriodicFlush: + """Tests for periodic flush thread functionality.""" + + def test_periodic_flush_thread_starts_on_open( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test periodic flush thread is started when writer is opened.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + assert writer._flush_thread is None + + writer.open(meeting_id, dek, wrapped_dek) + assert writer._flush_thread is not None + assert writer._flush_thread.is_alive() + + writer.close() + assert writer._flush_thread is None or not writer._flush_thread.is_alive() + + def test_periodic_flush_thread_stops_on_close( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test periodic flush thread stops cleanly on close.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + flush_thread = writer._flush_thread + assert flush_thread is not None + + writer.close() + + # Thread should be stopped + assert not flush_thread.is_alive() + assert writer._stop_flush.is_set() + + def test_flush_is_thread_safe( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test concurrent writes and flushes do not corrupt data.""" + import threading + + writer = MeetingAudioWriter(crypto, meetings_dir, buffer_size=1_000_000) + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + + errors: list[Exception] = [] + write_count = 100 + + def write_audio() -> None: + try: + for _ in range(write_count): + audio = np.random.uniform(-0.5, 0.5, 1600).astype(np.float32) + writer.write_chunk(audio) + except Exception as e: + errors.append(e) + + def flush_repeatedly() -> None: + try: + for _ in range(50): + writer.flush() + except Exception as e: + errors.append(e) + + write_thread = threading.Thread(target=write_audio) + flush_thread = threading.Thread(target=flush_repeatedly) + + write_thread.start() + flush_thread.start() + + write_thread.join() + flush_thread.join() + + # Check write_count before close (close resets it) + assert writer.write_count == write_count + + writer.close() + + # No exceptions should have occurred + assert not errors + + def test_flush_when_closed_raises_error( + self, + crypto: AesGcmCryptoBox, + meetings_dir: Path, + ) -> None: + """Test flush raises RuntimeError when writer is closed.""" + writer = MeetingAudioWriter(crypto, meetings_dir) + + # Should raise when not open + with pytest.raises(RuntimeError, match="not open"): + writer.flush() + + # Open, then close, then flush should also raise + meeting_id = str(uuid4()) + dek = crypto.generate_dek() + wrapped_dek = crypto.wrap_dek(dek) + + writer.open(meeting_id, dek, wrapped_dek) + writer.close() + + with pytest.raises(RuntimeError, match="not open"): + writer.flush() diff --git a/tests/infrastructure/security/test_keystore.py b/tests/infrastructure/security/test_keystore.py index c9a478c..619d0d5 100644 --- a/tests/infrastructure/security/test_keystore.py +++ b/tests/infrastructure/security/test_keystore.py @@ -54,8 +54,11 @@ def test_get_or_create_master_key_creates_and_reuses(monkeypatch: pytest.MonkeyP assert ("svc", "key") in storage -def test_get_or_create_master_key_wraps_keyring_errors(monkeypatch: pytest.MonkeyPatch) -> None: - """Keyring errors should surface as RuntimeError.""" +def test_get_or_create_master_key_falls_back_to_file( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Any, +) -> None: + """Keyring errors should fall back to file-based key storage.""" class DummyErrors: class KeyringError(Exception): ... @@ -73,10 +76,15 @@ def test_get_or_create_master_key_wraps_keyring_errors(monkeypatch: pytest.Monke delete_password=raise_error, ), ) + # Use temp path for file fallback + key_file = tmp_path / ".master_key" + monkeypatch.setattr(keystore, "DEFAULT_KEY_FILE", key_file) ks = keystore.KeyringKeyStore() - with pytest.raises(RuntimeError, match="Keyring unavailable"): - ks.get_or_create_master_key() + key = ks.get_or_create_master_key() + + assert len(key) == keystore.KEY_SIZE + assert key_file.exists() def test_delete_master_key_handles_missing(monkeypatch: pytest.MonkeyPatch) -> None: @@ -130,3 +138,81 @@ def test_has_master_key_false_on_errors(monkeypatch: pytest.MonkeyPatch) -> None ks = keystore.KeyringKeyStore() assert ks.has_master_key() is False + + +class TestFileKeyStore: + """Tests for FileKeyStore fallback implementation.""" + + def test_creates_and_reuses_key(self, tmp_path: Any) -> None: + """File key store should create key once and reuse it.""" + key_file = tmp_path / ".master_key" + fks = keystore.FileKeyStore(key_file) + + first = fks.get_or_create_master_key() + second = fks.get_or_create_master_key() + + assert len(first) == keystore.KEY_SIZE + assert first == second + assert key_file.exists() + + def test_creates_parent_directories(self, tmp_path: Any) -> None: + """File key store should create parent directories.""" + key_file = tmp_path / "nested" / "dir" / ".master_key" + fks = keystore.FileKeyStore(key_file) + + fks.get_or_create_master_key() + + assert key_file.exists() + + def test_has_master_key_true_when_exists(self, tmp_path: Any) -> None: + """has_master_key should return True when file exists.""" + key_file = tmp_path / ".master_key" + fks = keystore.FileKeyStore(key_file) + fks.get_or_create_master_key() + + assert fks.has_master_key() is True + + def test_has_master_key_false_when_missing(self, tmp_path: Any) -> None: + """has_master_key should return False when file is missing.""" + key_file = tmp_path / ".master_key" + fks = keystore.FileKeyStore(key_file) + + assert fks.has_master_key() is False + + def test_delete_master_key_removes_file(self, tmp_path: Any) -> None: + """delete_master_key should remove the key file.""" + key_file = tmp_path / ".master_key" + fks = keystore.FileKeyStore(key_file) + fks.get_or_create_master_key() + + fks.delete_master_key() + + assert not key_file.exists() + + def test_delete_master_key_safe_when_missing(self, tmp_path: Any) -> None: + """delete_master_key should not raise when file is missing.""" + key_file = tmp_path / ".master_key" + fks = keystore.FileKeyStore(key_file) + + fks.delete_master_key() # Should not raise + + def test_invalid_base64_raises_runtime_error(self, tmp_path: Any) -> None: + """Invalid base64 in key file should raise RuntimeError.""" + key_file = tmp_path / ".master_key" + key_file.write_text("not-valid-base64!!!") + fks = keystore.FileKeyStore(key_file) + + with pytest.raises(RuntimeError, match="invalid base64"): + fks.get_or_create_master_key() + + def test_wrong_size_raises_runtime_error(self, tmp_path: Any) -> None: + """Wrong key size in file should raise RuntimeError.""" + import base64 + + key_file = tmp_path / ".master_key" + # Write a key that's too short (16 bytes instead of 32) + key_file.write_text(base64.b64encode(b"short_key").decode()) + fks = keystore.FileKeyStore(key_file) + + with pytest.raises(RuntimeError, match="wrong key size"): + fks.get_or_create_master_key() diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 6e84b94..cc98d2b 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -2,112 +2,19 @@ from __future__ import annotations -import time from collections.abc import AsyncGenerator -from importlib import import_module -from typing import TYPE_CHECKING -from urllib.parse import quote import pytest -from sqlalchemy import text -from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker -if TYPE_CHECKING: - from collections.abc import Self - -from noteflow.infrastructure.persistence.models import Base - - -# Store container reference at module level to reuse -class PgTestContainer: - """Minimal Postgres testcontainer wrapper with custom readiness wait.""" - - def __init__( - self, - image: str = "pgvector/pgvector:pg16", - username: str = "test", - password: str = "test", - dbname: str = "noteflow_test", - port: int = 5432, - ) -> None: - self.username = username - self.password = password - self.dbname = dbname - self.port = port - - container_module = import_module("testcontainers.core.container") - docker_container_cls = container_module.DockerContainer - self._container = ( - docker_container_cls(image) - .with_env("POSTGRES_USER", username) - .with_env("POSTGRES_PASSWORD", password) - .with_env("POSTGRES_DB", dbname) - .with_exposed_ports(port) - ) - - def start(self) -> Self: - """Start the container.""" - self._container.start() - self._wait_until_ready() - return self - - def stop(self) -> None: - """Stop the container.""" - self._container.stop() - - def get_connection_url(self) -> str: - """Return a SQLAlchemy-style connection URL.""" - host = self._container.get_container_host_ip() - port = self._container._get_exposed_port(self.port) - quoted_password = quote(self.password, safe=" +") - return f"postgresql+psycopg2://{self.username}:{quoted_password}@{host}:{port}/{self.dbname}" - - def _wait_until_ready(self, timeout: float = 30.0, interval: float = 0.5) -> None: - """Wait for Postgres to accept connections by running a simple query.""" - start_time = time.time() - escaped_password = self.password.replace("'", "'\"'\"'") - cmd = [ - "sh", - "-c", - ( - f"PGPASSWORD='{escaped_password}' " - f"psql --username {self.username} --dbname {self.dbname} --host 127.0.0.1 " - "-c 'select 1;'" - ), - ] - last_error: str | None = None - - while True: - result = self._container.exec(cmd) - if result.exit_code == 0: - return - if result.output: - last_error = result.output.decode(errors="ignore") - if time.time() - start_time > timeout: - raise TimeoutError( - "Postgres container did not become ready in time" - + (f": {last_error}" if last_error else "") - ) - time.sleep(interval) - - -_container: PgTestContainer | None = None -_database_url: str | None = None - - -def get_or_create_container() -> tuple[PgTestContainer, str]: - """Get or create the PostgreSQL container.""" - global _container, _database_url - - if _container is None: - container = PgTestContainer().start() - _container = container - url = container.get_connection_url() - _database_url = url.replace("postgresql+psycopg2://", "postgresql+asyncpg://") - - assert _container is not None, "Container should be initialized" - assert _database_url is not None, "Database URL should be initialized" - return _container, _database_url +from support.db_utils import ( + cleanup_test_schema, + create_test_engine, + create_test_session_factory, + get_or_create_container, + initialize_test_schema, + stop_container, +) @pytest.fixture @@ -115,26 +22,16 @@ async def session_factory() -> AsyncGenerator[async_sessionmaker[AsyncSession], """Create a session factory and initialize the database schema.""" _, database_url = get_or_create_container() - engine = create_async_engine(database_url, echo=False) + engine = create_test_engine(database_url) async with engine.begin() as conn: - # Create pgvector extension and schema - await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) - await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) - await conn.execute(text("CREATE SCHEMA noteflow")) - # Create all tables - await conn.run_sync(Base.metadata.create_all) + await initialize_test_schema(conn) + + yield create_test_session_factory(engine) - yield async_sessionmaker( - engine, - class_=AsyncSession, - expire_on_commit=False, - autocommit=False, - autoflush=False, - ) # Cleanup - drop schema to reset for next test async with engine.begin() as conn: - await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) + await cleanup_test_schema(conn) await engine.dispose() @@ -152,7 +49,4 @@ async def session( def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: """Cleanup container after all tests complete.""" - global _container - if _container is not None: - _container.stop() - _container = None + stop_container() diff --git a/tests/stress/conftest.py b/tests/stress/conftest.py index a3a5e7b..639bb9f 100644 --- a/tests/stress/conftest.py +++ b/tests/stress/conftest.py @@ -2,12 +2,9 @@ from __future__ import annotations -import time from dataclasses import dataclass -from importlib import import_module from pathlib import Path from typing import TYPE_CHECKING -from urllib.parse import quote from unittest.mock import MagicMock import numpy as np @@ -16,10 +13,17 @@ import pytest from noteflow.grpc.service import NoteFlowServicer from noteflow.infrastructure.security.crypto import AesGcmCryptoBox from noteflow.infrastructure.security.keystore import InMemoryKeyStore +from support.db_utils import ( + cleanup_test_schema, + create_test_engine, + create_test_session_factory, + get_or_create_container, + initialize_test_schema, + stop_container, +) if TYPE_CHECKING: from collections.abc import AsyncGenerator - from collections.abc import Self from numpy.typing import NDArray from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker @@ -38,98 +42,6 @@ class MockAsrResult: no_speech_prob: float = 0.01 -# Store container reference at module level to reuse in stress tests -class PgTestContainer: - """Minimal Postgres testcontainer wrapper with custom readiness wait.""" - - def __init__( - self, - image: str = "pgvector/pgvector:pg16", - username: str = "test", - password: str = "test", - dbname: str = "noteflow_test", - port: int = 5432, - ) -> None: - self.username = username - self.password = password - self.dbname = dbname - self.port = port - - container_module = import_module("testcontainers.core.container") - docker_container_cls = container_module.DockerContainer - self._container = ( - docker_container_cls(image) - .with_env("POSTGRES_USER", username) - .with_env("POSTGRES_PASSWORD", password) - .with_env("POSTGRES_DB", dbname) - .with_exposed_ports(port) - ) - - def start(self) -> Self: - """Start the container.""" - self._container.start() - self._wait_until_ready() - return self - - def stop(self) -> None: - """Stop the container.""" - self._container.stop() - - def get_connection_url(self) -> str: - """Return a SQLAlchemy-style connection URL.""" - host = self._container.get_container_host_ip() - port = self._container._get_exposed_port(self.port) - quoted_password = quote(self.password, safe=" +") - return f"postgresql+psycopg2://{self.username}:{quoted_password}@{host}:{port}/{self.dbname}" - - def _wait_until_ready(self, timeout: float = 30.0, interval: float = 0.5) -> None: - """Wait for Postgres to accept connections by running a simple query.""" - start_time = time.time() - escaped_password = self.password.replace("'", "'\"'\"'") - cmd = [ - "sh", - "-c", - ( - f"PGPASSWORD='{escaped_password}' " - f"psql --username {self.username} --dbname {self.dbname} --host 127.0.0.1 " - "-c 'select 1;'" - ), - ] - last_error: str | None = None - - while True: - result = self._container.exec(cmd) - if result.exit_code == 0: - return - if result.output: - last_error = result.output.decode(errors="ignore") - if time.time() - start_time > timeout: - raise TimeoutError( - "Postgres container did not become ready in time" - + (f": {last_error}" if last_error else "") - ) - time.sleep(interval) - - -_container: PgTestContainer | None = None -_database_url: str | None = None - - -def get_or_create_container() -> tuple[PgTestContainer, str]: - """Get or create the PostgreSQL container for stress tests.""" - global _container, _database_url - - if _container is None: - container = PgTestContainer().start() - _container = container - url = container.get_connection_url() - _database_url = url.replace("postgresql+psycopg2://", "postgresql+asyncpg://") - - assert _container is not None, "Container should be initialized" - assert _database_url is not None, "Database URL should be initialized" - return _container, _database_url - - def create_mock_asr_engine(transcribe_results: list[str] | None = None) -> MagicMock: """Create mock ASR engine with configurable transcription results. @@ -199,47 +111,27 @@ def memory_servicer(mock_asr_engine: MagicMock, tmp_path: Path) -> NoteFlowServi ) -# Import session_factory from integration tests for PostgreSQL backend -# This is lazily imported to avoid requiring testcontainers for non-integration tests @pytest.fixture async def postgres_session_factory() -> AsyncGenerator[async_sessionmaker[AsyncSession], None]: """Create PostgreSQL session factory using testcontainers. Uses a local container helper to avoid importing test modules. """ - # Import here to avoid requiring testcontainers for all stress tests - from sqlalchemy import text - from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine - - from noteflow.infrastructure.persistence.models import Base - _, database_url = get_or_create_container() - engine = create_async_engine(database_url, echo=False) + engine = create_test_engine(database_url) async with engine.begin() as conn: - await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) - await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) - await conn.execute(text("CREATE SCHEMA noteflow")) - await conn.run_sync(Base.metadata.create_all) + await initialize_test_schema(conn) - yield async_sessionmaker( - engine, - class_=AsyncSession, - expire_on_commit=False, - autocommit=False, - autoflush=False, - ) + yield create_test_session_factory(engine) async with engine.begin() as conn: - await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE")) + await cleanup_test_schema(conn) await engine.dispose() def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: """Cleanup container after stress tests complete.""" - global _container - if _container is not None: - _container.stop() - _container = None + stop_container() diff --git a/tests/stress/test_audio_integrity.py b/tests/stress/test_audio_integrity.py index 91d18e8..8130d82 100644 --- a/tests/stress/test_audio_integrity.py +++ b/tests/stress/test_audio_integrity.py @@ -409,7 +409,11 @@ class TestWriterReaderRoundTrip: @pytest.mark.stress def test_multiple_chunks_roundtrip(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: - """Multiple chunk write and read preserves data.""" + """Multiple chunk write and read preserves data. + + Note: Due to buffering, the number of encrypted chunks may differ from + the number of writes. This test verifies content integrity, not chunk count. + """ meeting_id = str(uuid4()) dek = crypto.generate_dek() wrapped_dek = crypto.wrap_dek(dek) @@ -425,14 +429,22 @@ class TestWriterReaderRoundTrip: reader = MeetingAudioReader(crypto, meetings_dir) loaded_chunks = reader.load_meeting_audio(meeting_id) - assert len(loaded_chunks) == len(original_chunks) - for original, loaded in zip(original_chunks, loaded_chunks, strict=True): - np.testing.assert_array_almost_equal(loaded.frames, original, decimal=4) + # Concatenate original and loaded audio for comparison + # Buffering may merge chunks, so we compare total content + original_audio = np.concatenate(original_chunks) + loaded_audio = np.concatenate([chunk.frames for chunk in loaded_chunks]) + + assert len(loaded_audio) == len(original_audio) + np.testing.assert_array_almost_equal(loaded_audio, original_audio, decimal=4) @pytest.mark.stress @pytest.mark.slow def test_large_audio_roundtrip(self, crypto: AesGcmCryptoBox, meetings_dir: Path) -> None: - """Large audio file (1000 chunks) write and read succeeds.""" + """Large audio file (1000 chunks) write and read succeeds. + + Note: Due to buffering, the number of encrypted chunks may differ from + the number of writes. This test verifies total duration and sample count. + """ meeting_id = str(uuid4()) dek = crypto.generate_dek() wrapped_dek = crypto.wrap_dek(dek) @@ -449,11 +461,15 @@ class TestWriterReaderRoundTrip: reader = MeetingAudioReader(crypto, meetings_dir) chunks = reader.load_meeting_audio(meeting_id) - assert len(chunks) == chunk_count + # Verify total duration and sample count (not chunk count) total_duration = sum(c.duration for c in chunks) expected_duration = chunk_count * (1600 / 16000) assert abs(total_duration - expected_duration) < 0.01 + total_samples = sum(len(c.frames) for c in chunks) + expected_samples = chunk_count * 1600 + assert total_samples == expected_samples + class TestFileVersionHandling: """Test file version validation.""" diff --git a/tests/stress/test_segmenter_fuzz.py b/tests/stress/test_segmenter_fuzz.py index c46b7ae..ba690f3 100644 --- a/tests/stress/test_segmenter_fuzz.py +++ b/tests/stress/test_segmenter_fuzz.py @@ -318,9 +318,7 @@ class TestEdgeCaseConfigurations: list(segmenter.process_audio(silence, is_speech=False)) list(segmenter.process_audio(speech, is_speech=True)) - if segments := list( - segmenter.process_audio(more_silence, is_speech=False) - ): + if segments := list(segmenter.process_audio(more_silence, is_speech=False)): seg = segments[0] assert seg.duration > 0