- Created .dockerignore to exclude unnecessary files from Docker builds. - Added .repomixignore for managing ignored patterns in Repomix. - Introduced Dockerfile.dev for development environment setup with Python 3.12. - Configured docker-compose.yaml to define services, including a PostgreSQL database. - Established a devcontainer.json for Visual Studio Code integration. - Implemented postCreate.sh for automatic dependency installation in the dev container. - Added constants.py to centralize configuration constants for the project. - Updated pyproject.toml to include new development dependencies. - Created initial documentation files for project overview and style conventions. - Added tests for new functionalities to ensure reliability and correctness.
1014 KiB
1014 KiB
This file is a merged representation of a subset of the codebase, containing specifically included files, combined into a single document by Repomix.
File Summary
Purpose
This file contains a packed representation of a subset of the repository's contents that is considered the most important context. It is designed to be easily consumable by AI systems for analysis, code review, or other automated processes.
File Format
The content is organized as follows:
- This summary section
- Repository information
- Directory structure
- Repository files (if enabled)
- Multiple file entries, each consisting of: a. A header with the file path (## File: path/to/file) b. The full contents of the file in a code block
Usage Guidelines
- This file should be treated as read-only. Any changes should be made to the original repository files, not this packed version.
- When processing this file, use the file path to distinguish between different files in the repository.
- Be aware that this file may contain sensitive information. Handle it with the same level of security as you would the original repository.
Notes
- Some files may have been excluded based on .gitignore rules and Repomix's configuration
- Binary files are not included in this packed representation. Please refer to the Repository Structure section for a complete list of file paths, including binary files
- Only files matching these patterns are included: src/, tests/
- Files matching patterns in .gitignore are excluded
- Files matching default ignore patterns are excluded
- Files are sorted by Git change count (files with more changes are at the bottom)
Directory Structure
src/
noteflow/
application/
services/
__init__.py
export_service.py
meeting_service.py
recovery_service.py
retention_service.py
summarization_service.py
trigger_service.py
__init__.py
cli/
__init__.py
__main__.py
retention.py
client/
components/
__init__.py
_async_mixin.py
_thread_mixin.py
annotation_display.py
annotation_toolbar.py
connection_panel.py
meeting_library.py
playback_controls.py
playback_sync.py
recording_timer.py
summary_panel.py
transcript.py
vu_meter.py
__init__.py
_trigger_mixin.py
app.py
state.py
config/
__init__.py
constants.py
settings.py
core/
__init__.py
domain/
entities/
__init__.py
annotation.py
meeting.py
segment.py
summary.py
ports/
__init__.py
repositories.py
unit_of_work.py
summarization/
__init__.py
ports.py
triggers/
__init__.py
entities.py
ports.py
__init__.py
value_objects.py
grpc/
proto/
__init__.py
noteflow_pb2_grpc.py
noteflow_pb2.py
noteflow_pb2.pyi
noteflow.proto
__init__.py
client.py
meeting_store.py
server.py
service.py
infrastructure/
asr/
__init__.py
dto.py
engine.py
protocols.py
segmenter.py
streaming_vad.py
audio/
__init__.py
capture.py
dto.py
levels.py
playback.py
protocols.py
reader.py
ring_buffer.py
writer.py
converters/
__init__.py
asr_converters.py
orm_converters.py
diarization/
__init__.py
assigner.py
dto.py
engine.py
export/
__init__.py
_formatting.py
html.py
markdown.py
protocols.py
persistence/
migrations/
versions/
6a9d9f408f40_initial_schema.py
b5c3e8a2d1f0_add_annotations_table.py
c7d4e9f3a2b1_add_speaker_fields_to_segments.py
__init__.py
env.py
README
script.py.mako
repositories/
__init__.py
_base.py
annotation_repo.py
meeting_repo.py
segment_repo.py
summary_repo.py
__init__.py
database.py
models.py
unit_of_work.py
security/
__init__.py
crypto.py
keystore.py
protocols.py
summarization/
__init__.py
_parsing.py
citation_verifier.py
cloud_provider.py
factory.py
mock_provider.py
ollama_provider.py
triggers/
__init__.py
app_audio.py
audio_activity.py
calendar.py
foreground_app.py
__init__.py
__init__.py
noteflow_pb2.py
tests/
application/
__init__.py
test_export_service.py
test_meeting_service.py
test_recovery_service.py
test_retention_service.py
test_summarization_service.py
test_trigger_service.py
client/
test_async_mixin.py
test_summary_panel.py
test_transcript_component.py
domain/
__init__.py
test_annotation.py
test_meeting.py
test_segment.py
test_summary.py
test_triggers.py
test_value_objects.py
grpc/
__init__.py
test_diarization_refine.py
test_generate_summary.py
test_partial_transcription.py
infrastructure/
asr/
__init__.py
test_dto.py
test_engine.py
test_segmenter.py
test_streaming_vad.py
audio/
__init__.py
conftest.py
test_capture.py
test_dto.py
test_levels.py
test_reader.py
test_ring_buffer.py
test_writer.py
export/
test_formatting.py
test_html.py
test_markdown.py
security/
test_crypto.py
test_keystore.py
summarization/
test_citation_verifier.py
test_cloud_provider.py
test_mock_provider.py
test_ollama_provider.py
triggers/
conftest.py
test_audio_activity.py
test_foreground_app.py
__init__.py
test_converters.py
test_diarization.py
integration/
__init__.py
conftest.py
test_repositories.py
test_trigger_settings.py
test_unit_of_work.py
__init__.py
conftest.py
Files
File: src/noteflow/config/constants.py
"""Centralized constants for NoteFlow.
This module provides shared constants used across the codebase to avoid
magic numbers and ensure consistency.
"""
from __future__ import annotations
from typing import Final
# Audio constants
DEFAULT_SAMPLE_RATE: Final[int] = 16000
"""Default audio sample rate in Hz (16 kHz)."""
# gRPC constants
DEFAULT_GRPC_PORT: Final[int] = 50051
"""Default gRPC server port."""
MAX_GRPC_MESSAGE_SIZE: Final[int] = 100 * 1024 * 1024
"""Maximum gRPC message size in bytes (100 MB)."""
File: src/noteflow/infrastructure/triggers/app_audio.py
"""App audio activity provider.
Detects audio activity from system output while whitelisted meeting apps are active.
This is a best-effort heuristic: it combines (a) system output activity and
(b) presence of whitelisted app windows to infer a likely meeting.
"""
from __future__ import annotations
import logging
import time
from dataclasses import dataclass, field
from typing import TYPE_CHECKING
from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource
from noteflow.infrastructure.audio.levels import RmsLevelProvider
from noteflow.infrastructure.triggers.audio_activity import (
AudioActivityProvider,
AudioActivitySettings,
)
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
logger = logging.getLogger(__name__)
@dataclass
class AppAudioSettings:
"""Configuration for app audio detection.
Attributes:
enabled: Whether app audio detection is enabled.
threshold_db: Minimum dB level to consider as activity.
window_seconds: Time window for sustained activity detection.
min_active_ratio: Minimum ratio of active samples in window.
min_samples: Minimum samples required before evaluation.
max_history: Maximum samples retained in history.
weight: Confidence weight contributed by this provider.
meeting_apps: Set of app name substrings to match (lowercase).
suppressed_apps: App substrings to ignore even if matched.
sample_rate: Sample rate for system output capture.
sample_duration_seconds: Duration of each sampling read.
chunk_duration_seconds: Duration of sub-chunks for activity history updates.
"""
enabled: bool
threshold_db: float
window_seconds: float
min_active_ratio: float
min_samples: int
max_history: int
weight: float
meeting_apps: set[str] = field(default_factory=set)
suppressed_apps: set[str] = field(default_factory=set)
sample_rate: int = 16000
sample_duration_seconds: float = 0.5
chunk_duration_seconds: float = 0.1
def __post_init__(self) -> None:
self.meeting_apps = {app.lower() for app in self.meeting_apps}
self.suppressed_apps = {app.lower() for app in self.suppressed_apps}
class _SystemOutputSampler:
"""Best-effort system output sampler using sounddevice."""
def __init__(self, sample_rate: int, channels: int = 1) -> None:
self._sample_rate = sample_rate
self._channels = channels
self._stream = None
self._extra_settings = None
self._device = None
self._available: bool | None = None
def _select_device(self) -> None:
try:
import sounddevice as sd
except ImportError:
return self._extracted_from__select_device_5(
"sounddevice not available - app audio detection disabled"
)
# Default to output device and WASAPI loopback when available (Windows)
try:
default_output = sd.default.device[1]
except (TypeError, IndexError):
default_output = None
try:
hostapi_index = sd.default.hostapi
hostapi = sd.query_hostapis(hostapi_index) if hostapi_index is not None else None
except Exception:
hostapi = None
if hostapi and hostapi.get("type") == "Windows WASAPI" and default_output is not None:
# On WASAPI, loopback devices appear as separate input devices
# Fall through to monitor/loopback device detection below
pass
# Fallback: look for monitor/loopback devices (Linux/PulseAudio)
try:
devices = sd.query_devices()
except Exception:
return self._extracted_from__select_device_5(
"Failed to query audio devices for app audio detection"
)
for idx, dev in enumerate(devices):
name = str(dev.get("name", "")).lower()
if int(dev.get("max_input_channels", 0)) <= 0:
continue
if "monitor" in name or "loopback" in name:
return self._extracted_from__select_device_24(idx)
self._available = False
logger.warning("No loopback audio device found - app audio detection disabled")
# TODO Rename this here and in `_select_device`
def _extracted_from__select_device_24(self, arg0):
self._device = arg0
self._available = True
return
# TODO Rename this here and in `_select_device`
def _extracted_from__select_device_5(self, arg0):
self._available = False
logger.warning(arg0)
return
def _ensure_stream(self) -> bool:
if self._available is False:
return False
if self._available is None:
self._select_device()
if self._available is False:
return False
if self._stream is not None:
return True
try:
import sounddevice as sd
self._stream = sd.InputStream(
device=self._device,
channels=self._channels,
samplerate=self._sample_rate,
dtype="float32",
extra_settings=self._extra_settings,
)
self._stream.start()
return True
except Exception as exc:
logger.warning("Failed to start system output capture: %s", exc)
self._stream = None
self._available = False
return False
def read_frames(self, duration_seconds: float) -> NDArray[np.float32] | None:
if not self._ensure_stream():
return None
if self._stream is None:
return None
frames = max(1, int(self._sample_rate * duration_seconds))
try:
data, _ = self._stream.read(frames)
except Exception as exc:
logger.debug("System output read failed: %s", exc)
return None
return data.reshape(-1).astype("float32")
def close(self) -> None:
if self._stream is None:
return
try:
self._stream.stop()
self._stream.close()
except Exception:
logger.debug("Failed to close system output stream", exc_info=True)
finally:
self._stream = None
class AppAudioProvider:
"""Detect app audio activity from whitelisted meeting apps."""
def __init__(self, settings: AppAudioSettings) -> None:
self._settings = settings
self._sampler = _SystemOutputSampler(sample_rate=settings.sample_rate)
self._level_provider = RmsLevelProvider()
self._audio_activity = AudioActivityProvider(
self._level_provider,
AudioActivitySettings(
enabled=settings.enabled,
threshold_db=settings.threshold_db,
window_seconds=settings.window_seconds,
min_active_ratio=settings.min_active_ratio,
min_samples=settings.min_samples,
max_history=settings.max_history,
weight=settings.weight,
),
)
@property
def source(self) -> TriggerSource:
return TriggerSource.AUDIO_ACTIVITY
@property
def max_weight(self) -> float:
return self._settings.weight
def is_enabled(self) -> bool:
return self._settings.enabled
def get_signal(self) -> TriggerSignal | None:
if not self.is_enabled():
return None
if not self._settings.meeting_apps:
return None
app_title = self._detect_meeting_app()
if not app_title:
return None
frames = self._sampler.read_frames(self._settings.sample_duration_seconds)
if frames is None or frames.size == 0:
return None
self._update_activity_history(frames)
if self._audio_activity.get_signal() is None:
return None
return TriggerSignal(
source=self.source,
weight=self.max_weight,
app_name=app_title,
)
def _update_activity_history(self, frames: NDArray[np.float32]) -> None:
chunk_size = max(1, int(self._settings.sample_rate * self._settings.chunk_duration_seconds))
now = time.monotonic()
for offset in range(0, len(frames), chunk_size):
chunk = frames[offset : offset + chunk_size]
if chunk.size == 0:
continue
self._audio_activity.update(chunk, now)
def _detect_meeting_app(self) -> str | None:
try:
import pywinctl
except ImportError:
return None
titles: list[str] = []
try:
if hasattr(pywinctl, "getAllWindows"):
windows = pywinctl.getAllWindows()
titles = [w.title for w in windows if getattr(w, "title", None)]
elif hasattr(pywinctl, "getAllTitles"):
titles = [t for t in pywinctl.getAllTitles() if t]
except Exception as exc:
logger.debug("Failed to list windows for app detection: %s", exc)
return None
for title in titles:
title_lower = title.lower()
if any(suppressed in title_lower for suppressed in self._settings.suppressed_apps):
continue
if any(app in title_lower for app in self._settings.meeting_apps):
return title
return None
def close(self) -> None:
"""Release system audio resources."""
self._sampler.close()
File: src/noteflow/infrastructure/triggers/calendar.py
"""Calendar trigger provider.
Best-effort calendar integration using configured event windows.
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import TYPE_CHECKING
from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource
if TYPE_CHECKING:
from collections.abc import Iterable
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class CalendarEvent:
"""Simple calendar event window."""
start: datetime
end: datetime
title: str | None = None
@dataclass
class CalendarSettings:
"""Configuration for calendar trigger detection."""
enabled: bool
weight: float
lookahead_minutes: int
lookbehind_minutes: int
events: list[CalendarEvent]
class CalendarProvider:
"""Provide trigger signal based on calendar proximity."""
def __init__(self, settings: CalendarSettings) -> None:
self._settings = settings
@property
def source(self) -> TriggerSource:
return TriggerSource.CALENDAR
@property
def max_weight(self) -> float:
return self._settings.weight
def is_enabled(self) -> bool:
return self._settings.enabled
def get_signal(self) -> TriggerSignal | None:
if not self.is_enabled():
return None
if not self._settings.events:
return None
now = datetime.now(timezone.utc)
window_start = now - timedelta(minutes=self._settings.lookbehind_minutes)
window_end = now + timedelta(minutes=self._settings.lookahead_minutes)
return next(
(
TriggerSignal(
source=self.source,
weight=self.max_weight,
app_name=event.title,
)
for event in self._settings.events
if self._event_overlaps_window(event, window_start, window_end)
),
None,
)
@staticmethod
def _event_overlaps_window(
event: CalendarEvent,
window_start: datetime,
window_end: datetime,
) -> bool:
event_start = _ensure_tz(event.start)
event_end = _ensure_tz(event.end)
return event_start <= window_end and event_end >= window_start
def parse_calendar_events(raw_events: object) -> list[CalendarEvent]:
"""Parse calendar events from config/env payloads."""
if raw_events is None:
return []
if isinstance(raw_events, str):
raw_events = _load_events_from_json(raw_events)
if isinstance(raw_events, dict):
raw_events = [raw_events]
if not isinstance(raw_events, Iterable):
return []
events: list[CalendarEvent] = []
for item in raw_events:
if isinstance(item, CalendarEvent):
events.append(item)
continue
if isinstance(item, dict):
start = _parse_datetime(item.get("start"))
end = _parse_datetime(item.get("end"))
if start and end:
events.append(CalendarEvent(start=start, end=end, title=item.get("title")))
return events
def _load_events_from_json(raw: str) -> list[dict[str, object]]:
try:
parsed = json.loads(raw)
except json.JSONDecodeError:
logger.debug("Failed to parse calendar events JSON")
return []
if isinstance(parsed, list):
return [item for item in parsed if isinstance(item, dict)]
return [parsed] if isinstance(parsed, dict) else []
def _parse_datetime(value: object) -> datetime | None:
if isinstance(value, datetime):
return value
if not isinstance(value, str) or not value:
return None
cleaned = value.strip()
if cleaned.endswith("Z"):
cleaned = f"{cleaned[:-1]}+00:00"
try:
return datetime.fromisoformat(cleaned)
except ValueError:
return None
def _ensure_tz(value: datetime) -> datetime:
if value.tzinfo is None:
return value.replace(tzinfo=timezone.utc)
return value.astimezone(timezone.utc)
File: tests/grpc/test_diarization_refine.py
"""Tests for RefineSpeakerDiarization RPC guards."""
from __future__ import annotations
import pytest
from noteflow.grpc.proto import noteflow_pb2
from noteflow.grpc.service import NoteFlowServicer
class _DummyContext:
"""Minimal gRPC context that raises if abort is invoked."""
async def abort(self, code, details): # type: ignore[override]
raise AssertionError(f"abort called: {code} - {details}")
@pytest.mark.asyncio
async def test_refine_speaker_diarization_rejects_active_meeting() -> None:
"""Refinement should be blocked while a meeting is still recording."""
servicer = NoteFlowServicer(diarization_engine=object())
store = servicer._get_memory_store()
meeting = store.create("Active meeting")
meeting.start_recording()
store.update(meeting)
response = await servicer.RefineSpeakerDiarization(
noteflow_pb2.RefineSpeakerDiarizationRequest(meeting_id=str(meeting.id)),
_DummyContext(),
)
assert response.segments_updated == 0
assert response.error_message
assert "stopped" in response.error_message.lower()
File: tests/infrastructure/test_diarization.py
"""Tests for speaker diarization infrastructure.
Tests the SpeakerTurn DTO and speaker assignment utilities.
"""
from __future__ import annotations
import pytest
from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker, assign_speakers_batch
class TestSpeakerTurn:
"""Tests for the SpeakerTurn dataclass."""
def test_create_valid_turn(self) -> None:
"""Create a valid speaker turn."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0)
assert turn.speaker == "SPEAKER_00"
assert turn.start == 0.0
assert turn.end == 5.0
assert turn.confidence == 1.0
def test_create_turn_with_confidence(self) -> None:
"""Create a turn with custom confidence."""
turn = SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0, confidence=0.85)
assert turn.confidence == 0.85
def test_invalid_end_before_start_raises(self) -> None:
"""End time before start time raises ValueError."""
with pytest.raises(ValueError, match=r"end.*<.*start"):
SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=5.0)
def test_invalid_confidence_negative_raises(self) -> None:
"""Negative confidence raises ValueError."""
with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"):
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=-0.1)
def test_invalid_confidence_above_one_raises(self) -> None:
"""Confidence above 1.0 raises ValueError."""
with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"):
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=1.5)
def test_duration_property(self) -> None:
"""Duration property calculates correctly."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=2.5, end=7.5)
assert turn.duration == 5.0
def test_overlaps_returns_true_for_overlap(self) -> None:
"""overlaps() returns True when ranges overlap."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlaps(3.0, 7.0)
assert turn.overlaps(7.0, 12.0)
assert turn.overlaps(5.0, 10.0)
assert turn.overlaps(0.0, 15.0)
def test_overlaps_returns_false_for_no_overlap(self) -> None:
"""overlaps() returns False when ranges don't overlap."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert not turn.overlaps(0.0, 5.0)
assert not turn.overlaps(10.0, 15.0)
assert not turn.overlaps(0.0, 3.0)
assert not turn.overlaps(12.0, 20.0)
def test_overlap_duration_full_overlap(self) -> None:
"""overlap_duration() for full overlap returns turn duration."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlap_duration(0.0, 15.0) == 5.0
def test_overlap_duration_partial_overlap_left(self) -> None:
"""overlap_duration() for partial overlap on left side."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlap_duration(3.0, 7.0) == 2.0
def test_overlap_duration_partial_overlap_right(self) -> None:
"""overlap_duration() for partial overlap on right side."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlap_duration(8.0, 15.0) == 2.0
def test_overlap_duration_contained(self) -> None:
"""overlap_duration() when range is contained within turn."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=20.0)
assert turn.overlap_duration(5.0, 10.0) == 5.0
def test_overlap_duration_no_overlap(self) -> None:
"""overlap_duration() returns 0.0 when no overlap."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlap_duration(0.0, 3.0) == 0.0
assert turn.overlap_duration(12.0, 20.0) == 0.0
class TestAssignSpeaker:
"""Tests for the assign_speaker function."""
def test_empty_turns_returns_none(self) -> None:
"""Empty turns list returns None with 0 confidence."""
speaker, confidence = assign_speaker(0.0, 5.0, [])
assert speaker is None
assert confidence == 0.0
def test_zero_duration_segment_returns_none(self) -> None:
"""Zero duration segment returns None."""
turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
speaker, confidence = assign_speaker(5.0, 5.0, turns)
assert speaker is None
assert confidence == 0.0
def test_single_turn_full_overlap(self) -> None:
"""Single turn with full overlap returns high confidence."""
turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
speaker, confidence = assign_speaker(2.0, 8.0, turns)
assert speaker == "SPEAKER_00"
assert confidence == 1.0
def test_single_turn_partial_overlap(self) -> None:
"""Single turn with partial overlap returns proportional confidence."""
turns = [SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)]
speaker, confidence = assign_speaker(0.0, 10.0, turns)
assert speaker == "SPEAKER_00"
assert confidence == 0.5
def test_multiple_turns_chooses_dominant_speaker(self) -> None:
"""Multiple turns chooses speaker with most overlap."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0),
SpeakerTurn(speaker="SPEAKER_01", start=3.0, end=10.0),
]
speaker, confidence = assign_speaker(0.0, 10.0, turns)
assert speaker == "SPEAKER_01"
assert confidence == 0.7
def test_no_overlap_returns_none(self) -> None:
"""No overlapping turns returns None."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0),
]
speaker, confidence = assign_speaker(6.0, 9.0, turns)
assert speaker is None
assert confidence == 0.0
def test_equal_overlap_chooses_first_encountered(self) -> None:
"""Equal overlap chooses first speaker encountered."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0),
]
speaker, confidence = assign_speaker(3.0, 7.0, turns)
# SPEAKER_00: overlap 2.0, SPEAKER_01: overlap 2.0
# First one wins since > not >=
assert speaker == "SPEAKER_00"
assert confidence == 0.5
class TestAssignSpeakersBatch:
"""Tests for the assign_speakers_batch function."""
def test_empty_segments(self) -> None:
"""Empty segments list returns empty results."""
turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
results = assign_speakers_batch([], turns)
assert results == []
def test_empty_turns(self) -> None:
"""Empty turns returns all None speakers."""
segments = [(0.0, 5.0), (5.0, 10.0)]
results = assign_speakers_batch(segments, [])
assert len(results) == 2
assert all(speaker is None for speaker, _ in results)
assert all(conf == 0.0 for _, conf in results)
def test_batch_assignment(self) -> None:
"""Batch assignment processes all segments."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0),
SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=15.0),
]
segments = [(0.0, 5.0), (5.0, 10.0), (10.0, 15.0)]
results = assign_speakers_batch(segments, turns)
assert len(results) == 3
assert results[0] == ("SPEAKER_00", 1.0)
assert results[1] == ("SPEAKER_01", 1.0)
assert results[2] == ("SPEAKER_00", 1.0)
def test_batch_with_gaps(self) -> None:
"""Batch assignment handles gaps between turns."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0),
SpeakerTurn(speaker="SPEAKER_01", start=7.0, end=10.0),
]
segments = [(0.0, 3.0), (3.0, 7.0), (7.0, 10.0)]
results = assign_speakers_batch(segments, turns)
assert results[0] == ("SPEAKER_00", 1.0)
assert results[1] == (None, 0.0)
assert results[2] == ("SPEAKER_01", 1.0)
File: src/noteflow/application/services/export_service.py
"""Export application service.
Orchestrates transcript export to various formats.
"""
from __future__ import annotations
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING
from noteflow.infrastructure.export import HtmlExporter, MarkdownExporter, TranscriptExporter
if TYPE_CHECKING:
from noteflow.domain.entities import Meeting, Segment
from noteflow.domain.ports.unit_of_work import UnitOfWork
from noteflow.domain.value_objects import MeetingId
class ExportFormat(Enum):
"""Supported export formats."""
MARKDOWN = "markdown"
HTML = "html"
class ExportService:
"""Application service for transcript export operations.
Provides use cases for exporting meeting transcripts to various formats.
"""
def __init__(self, uow: UnitOfWork) -> None:
"""Initialize the export service.
Args:
uow: Unit of work for persistence.
"""
self._uow = uow
self._exporters: dict[ExportFormat, TranscriptExporter] = {
ExportFormat.MARKDOWN: MarkdownExporter(),
ExportFormat.HTML: HtmlExporter(),
}
def _get_exporter(self, fmt: ExportFormat) -> TranscriptExporter:
"""Get exporter for format.
Args:
fmt: Export format.
Returns:
Exporter instance.
Raises:
ValueError: If format is not supported.
"""
exporter = self._exporters.get(fmt)
if exporter is None:
raise ValueError(f"Unsupported export format: {fmt}")
return exporter
async def export_transcript(
self,
meeting_id: MeetingId,
fmt: ExportFormat = ExportFormat.MARKDOWN,
) -> str:
"""Export meeting transcript to string.
Args:
meeting_id: Meeting identifier.
fmt: Export format.
Returns:
Formatted transcript string.
Raises:
ValueError: If meeting not found.
"""
async with self._uow:
meeting = await self._uow.meetings.get(meeting_id)
if meeting is None:
raise ValueError(f"Meeting {meeting_id} not found")
segments = await self._uow.segments.get_by_meeting(meeting_id)
exporter = self._get_exporter(fmt)
return exporter.export(meeting, segments)
async def export_to_file(
self,
meeting_id: MeetingId,
output_path: Path,
fmt: ExportFormat | None = None,
) -> Path:
"""Export meeting transcript to file.
Args:
meeting_id: Meeting identifier.
output_path: Output file path (extension determines format if not specified).
fmt: Export format (optional, inferred from extension if not provided).
Returns:
Path to the exported file.
Raises:
ValueError: If meeting not found or format cannot be determined.
"""
# Determine format from extension if not provided
if fmt is None:
fmt = self._infer_format_from_extension(output_path.suffix)
content = await self.export_transcript(meeting_id, fmt)
# Ensure correct extension
exporter = self._get_exporter(fmt)
if output_path.suffix != exporter.file_extension:
output_path = output_path.with_suffix(exporter.file_extension)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(content, encoding="utf-8")
return output_path
def _infer_format_from_extension(self, extension: str) -> ExportFormat:
"""Infer export format from file extension.
Args:
extension: File extension (e.g., '.md', '.html').
Returns:
Inferred export format.
Raises:
ValueError: If extension is not recognized.
"""
extension_map = {
".md": ExportFormat.MARKDOWN,
".markdown": ExportFormat.MARKDOWN,
".html": ExportFormat.HTML,
".htm": ExportFormat.HTML,
}
fmt = extension_map.get(extension.lower())
if fmt is None:
raise ValueError(
f"Cannot infer format from extension '{extension}'. "
f"Supported: {', '.join(extension_map.keys())}"
)
return fmt
def get_supported_formats(self) -> list[tuple[str, str]]:
"""Get list of supported export formats.
Returns:
List of (format_name, file_extension) tuples.
"""
return [(e.format_name, e.file_extension) for e in self._exporters.values()]
async def preview_export(
self,
meeting: Meeting,
segments: list[Segment],
fmt: ExportFormat = ExportFormat.MARKDOWN,
) -> str:
"""Preview export without fetching from database.
Useful for previewing exports with in-memory data.
Args:
meeting: Meeting entity.
segments: List of segments.
fmt: Export format.
Returns:
Formatted transcript string.
"""
exporter = self._get_exporter(fmt)
return exporter.export(meeting, segments)
File: src/noteflow/application/services/recovery_service.py
"""Recovery service for crash recovery on startup.
Detect and recover meetings left in active states after server restart.
"""
from __future__ import annotations
import logging
from datetime import UTC, datetime
from typing import TYPE_CHECKING, ClassVar
from noteflow.domain.value_objects import MeetingState
if TYPE_CHECKING:
from noteflow.domain.entities import Meeting
from noteflow.domain.ports.unit_of_work import UnitOfWork
logger = logging.getLogger(__name__)
class RecoveryService:
"""Recover meetings from crash states on server startup.
Find meetings left in RECORDING or STOPPING state and mark them as ERROR.
This handles the case where the server crashed during an active meeting.
"""
ACTIVE_STATES: ClassVar[list[MeetingState]] = [
MeetingState.RECORDING,
MeetingState.STOPPING,
]
def __init__(self, uow: UnitOfWork) -> None:
"""Initialize recovery service.
Args:
uow: Unit of work for persistence.
"""
self._uow = uow
async def recover_crashed_meetings(self) -> list[Meeting]:
"""Find and recover meetings left in active states.
Mark all meetings in RECORDING or STOPPING state as ERROR
with metadata explaining the crash recovery.
Returns:
List of recovered meetings.
"""
async with self._uow:
# Find all meetings in active states
meetings, total = await self._uow.meetings.list_all(
states=self.ACTIVE_STATES,
limit=1000, # Handle up to 1000 crashed meetings
)
if total == 0:
logger.info("No crashed meetings found during recovery")
return []
logger.warning(
"Found %d meetings in active state during startup, marking as ERROR",
total,
)
recovered: list[Meeting] = []
recovery_time = datetime.now(UTC).isoformat()
for meeting in meetings:
previous_state = meeting.state.name
meeting.mark_error()
# Add crash recovery metadata
meeting.metadata["crash_recovered"] = "true"
meeting.metadata["crash_recovery_time"] = recovery_time
meeting.metadata["crash_previous_state"] = previous_state
await self._uow.meetings.update(meeting)
recovered.append(meeting)
logger.info(
"Recovered crashed meeting: id=%s, previous_state=%s",
meeting.id,
previous_state,
)
await self._uow.commit()
logger.info("Crash recovery complete: %d meetings recovered", len(recovered))
return recovered
async def count_crashed_meetings(self) -> int:
"""Count meetings currently in crash states.
Returns:
Number of meetings in RECORDING or STOPPING state.
"""
async with self._uow:
total = 0
for state in self.ACTIVE_STATES:
total += await self._uow.meetings.count_by_state(state)
return total
File: src/noteflow/application/services/trigger_service.py
"""Trigger evaluation and decision service.
Orchestrate trigger detection with rate limiting and snooze support.
"""
from __future__ import annotations
import logging
import time
from dataclasses import dataclass
from typing import TYPE_CHECKING
from noteflow.domain.triggers.entities import TriggerAction, TriggerDecision, TriggerSignal
if TYPE_CHECKING:
from noteflow.domain.triggers.ports import SignalProvider
logger = logging.getLogger(__name__)
@dataclass
class TriggerServiceSettings:
"""Configuration for trigger service.
Attributes:
enabled: Whether trigger detection is enabled.
auto_start_enabled: Whether to auto-start recording at high confidence.
rate_limit_seconds: Minimum seconds between trigger prompts.
snooze_seconds: Default snooze duration.
threshold_ignore: Confidence below which triggers are ignored.
threshold_auto_start: Confidence at or above which auto-start is allowed.
"""
enabled: bool
auto_start_enabled: bool
rate_limit_seconds: int
snooze_seconds: int
threshold_ignore: float
threshold_auto_start: float
def __post_init__(self) -> None:
if self.threshold_auto_start < self.threshold_ignore:
msg = "threshold_auto_start must be >= threshold_ignore"
raise ValueError(msg)
class TriggerService:
"""Orchestrate trigger detection with rate limiting and snooze.
Evaluates all signal providers and determines the appropriate action
based on combined confidence scores, rate limits, and snooze state.
Threshold behavior is driven by TriggerServiceSettings:
- Confidence < threshold_ignore: IGNORE
- Confidence >= threshold_auto_start: AUTO_START (if enabled, else NOTIFY)
- Otherwise: NOTIFY
"""
def __init__(
self,
providers: list[SignalProvider],
settings: TriggerServiceSettings,
) -> None:
"""Initialize trigger service.
Args:
providers: List of signal providers to evaluate.
settings: Configuration settings for trigger behavior.
"""
self._providers = providers
self._settings = settings
self._last_prompt: float | None = None
self._snoozed_until: float | None = None
@property
def is_enabled(self) -> bool:
"""Check if trigger service is enabled."""
return self._settings.enabled
@property
def is_snoozed(self) -> bool:
"""Check if triggers are currently snoozed."""
if self._snoozed_until is None:
return False
return time.monotonic() < self._snoozed_until
@property
def snooze_remaining_seconds(self) -> float:
"""Get remaining snooze time in seconds, or 0 if not snoozed."""
if self._snoozed_until is None:
return 0.0
remaining = self._snoozed_until - time.monotonic()
return max(0.0, remaining)
def evaluate(self) -> TriggerDecision:
"""Evaluate all providers and determine action.
Returns:
TriggerDecision with action and confidence details.
"""
now = time.monotonic()
# Check if disabled
if not self._settings.enabled:
return self._make_decision(TriggerAction.IGNORE, 0.0, ())
# Check if snoozed
if self._snoozed_until is not None and now < self._snoozed_until:
return self._make_decision(TriggerAction.IGNORE, 0.0, ())
# Collect signals from all enabled providers
signals = []
for provider in self._providers:
if not provider.is_enabled():
continue
if signal := provider.get_signal():
signals.append(signal)
# Calculate total confidence
confidence = sum(s.weight for s in signals)
# Determine action
action = self._determine_action(confidence, now)
# Record prompt time for rate limiting
if action in (TriggerAction.NOTIFY, TriggerAction.AUTO_START):
self._last_prompt = now
logger.info(
"Trigger %s: confidence=%.2f, signals=%d",
action.value,
confidence,
len(signals),
)
return self._make_decision(action, confidence, tuple(signals))
def _determine_action(self, confidence: float, now: float) -> TriggerAction:
"""Determine action based on confidence and rate limits.
Args:
confidence: Total confidence from all signals.
now: Current monotonic time.
Returns:
TriggerAction to take.
"""
# Check threshold_ignore first
if confidence < self._settings.threshold_ignore:
return TriggerAction.IGNORE
# AUTO_START bypasses rate limit (high-confidence trigger should not be delayed)
if confidence >= self._settings.threshold_auto_start and self._settings.auto_start_enabled:
return TriggerAction.AUTO_START
# Rate limit applies only to NOTIFY actions
if self._last_prompt is not None:
elapsed = now - self._last_prompt
if elapsed < self._settings.rate_limit_seconds:
return TriggerAction.IGNORE
return TriggerAction.NOTIFY
def _make_decision(
self,
action: TriggerAction,
confidence: float,
signals: tuple[TriggerSignal, ...],
) -> TriggerDecision:
"""Create a TriggerDecision with the given parameters."""
return TriggerDecision(
action=action,
confidence=confidence,
signals=signals,
)
def snooze(self, seconds: int | None = None) -> None:
"""Snooze triggers for the specified duration.
Args:
seconds: Snooze duration in seconds (uses default if None).
"""
duration = seconds if seconds is not None else self._settings.snooze_seconds
self._snoozed_until = time.monotonic() + duration
logger.info("Triggers snoozed for %d seconds", duration)
def clear_snooze(self) -> None:
"""Clear any active snooze."""
if self._snoozed_until is not None:
self._snoozed_until = None
logger.info("Trigger snooze cleared")
def set_enabled(self, enabled: bool) -> None:
"""Enable or disable trigger detection.
Args:
enabled: Whether triggers should be enabled.
"""
self._settings.enabled = enabled
logger.info("Triggers %s", "enabled" if enabled else "disabled")
def set_auto_start(self, enabled: bool) -> None:
"""Enable or disable auto-start on high confidence.
Args:
enabled: Whether auto-start should be enabled.
"""
self._settings.auto_start_enabled = enabled
logger.info("Auto-start %s", "enabled" if enabled else "disabled")
File: src/noteflow/application/init.py
"""NoteFlow application layer.
Contains application services that orchestrate use cases.
"""
File: src/noteflow/cli/init.py
"""NoteFlow CLI tools."""
File: src/noteflow/cli/main.py
"""Main entry point for NoteFlow CLI."""
from noteflow.cli.retention import main
if __name__ == "__main__":
main()
File: src/noteflow/client/components/_thread_mixin.py
"""Mixin for background worker thread lifecycle management.
Provides standardized thread start/stop patterns for UI components
that need background polling or timer threads.
"""
from __future__ import annotations
import threading
from collections.abc import Callable
class BackgroundWorkerMixin:
"""Mixin providing background worker thread lifecycle management.
Manages thread creation, start, stop, and cleanup for components
that need background polling loops.
Usage:
class MyComponent(BackgroundWorkerMixin):
def __init__(self):
self._init_worker()
def start_polling(self):
self._start_worker(self._poll_loop, "MyPoller")
def stop_polling(self):
self._stop_worker()
def _poll_loop(self):
while self._should_run():
# Do work
self._wait_interval(0.1)
"""
_worker_thread: threading.Thread | None
_stop_event: threading.Event
def _init_worker(self) -> None:
"""Initialize worker attributes.
Call this in __init__ of classes using this mixin.
"""
self._worker_thread = None
self._stop_event = threading.Event()
def _start_worker(self, target: Callable[[], None], name: str) -> None:
"""Start background worker thread.
No-op if worker is already running.
Args:
target: Callable to run in background thread.
name: Thread name for debugging.
"""
if self._worker_thread and self._worker_thread.is_alive():
return
self._stop_event.clear()
self._worker_thread = threading.Thread(
target=target,
daemon=True,
name=name,
)
self._worker_thread.start()
def _stop_worker(self, timeout: float = 1.0) -> None:
"""Stop background worker thread.
Signals stop event and waits for thread to finish.
Args:
timeout: Maximum seconds to wait for thread join.
"""
self._stop_event.set()
if self._worker_thread:
self._worker_thread.join(timeout=timeout)
self._worker_thread = None
def _should_run(self) -> bool:
"""Check if worker loop should continue.
Returns:
True if worker should continue, False if stop requested.
"""
return not self._stop_event.is_set()
def _wait_interval(self, seconds: float) -> None:
"""Wait for interval, returning early if stop requested.
Use this instead of time.sleep() in worker loops.
Args:
seconds: Seconds to wait (returns early if stop signaled).
"""
self._stop_event.wait(seconds)
File: src/noteflow/client/components/connection_panel.py
"""Server connection management panel.
Uses NoteFlowClient directly (not wrapped) and follows same callback pattern.
Does not recreate any types - imports and uses existing ones.
"""
from __future__ import annotations
import logging
import threading
from collections.abc import Callable
from typing import TYPE_CHECKING, Final
import flet as ft
# REUSE existing types - do not recreate
from noteflow.grpc.client import NoteFlowClient, ServerInfo
if TYPE_CHECKING:
from noteflow.client.state import AppState
logger = logging.getLogger(__name__)
RECONNECT_ATTEMPTS: Final[int] = 3
RECONNECT_DELAY_SECONDS: Final[float] = 2.0
class ConnectionPanelComponent:
"""Server connection management panel.
Uses NoteFlowClient directly (not wrapped) and follows same callback pattern.
"""
def __init__(
self,
state: AppState,
on_connected: Callable[[NoteFlowClient, ServerInfo], None] | None = None,
on_disconnected: Callable[[], None] | None = None,
on_transcript_callback: Callable[..., None] | None = None,
on_connection_change_callback: Callable[[bool, str], None] | None = None,
) -> None:
"""Initialize connection panel.
Args:
state: Centralized application state.
on_connected: Callback when connected with client and server info.
on_disconnected: Callback when disconnected.
on_transcript_callback: Callback to pass to NoteFlowClient for transcripts.
on_connection_change_callback: Callback to pass to NoteFlowClient for connection changes.
"""
self._state = state
self._on_connected = on_connected
self._on_disconnected = on_disconnected
self._on_transcript_callback = on_transcript_callback
self._on_connection_change_callback = on_connection_change_callback
self._client: NoteFlowClient | None = None
self._manual_disconnect = False
self._auto_reconnect_enabled = False
self._reconnect_thread: threading.Thread | None = None
self._reconnect_stop_event = threading.Event()
self._reconnect_lock = threading.Lock()
self._reconnect_in_progress = False
self._suppress_connection_events = False
self._server_field: ft.TextField | None = None
self._connect_btn: ft.ElevatedButton | None = None
self._status_text: ft.Text | None = None
self._server_info_text: ft.Text | None = None
@property
def client(self) -> NoteFlowClient | None:
"""Get current gRPC client instance."""
return self._client
def build(self) -> ft.Column:
"""Build connection panel UI.
Returns:
Column containing connection controls and status.
"""
self._status_text = ft.Text(
"Not connected",
size=14,
color=ft.Colors.GREY_600,
)
self._server_info_text = ft.Text(
"",
size=12,
color=ft.Colors.GREY_500,
)
self._server_field = ft.TextField(
value=self._state.server_address,
label="Server Address",
width=300,
on_change=self._on_server_change,
)
self._connect_btn = ft.ElevatedButton(
"Connect",
on_click=self._on_connect_click,
icon=ft.Icons.CLOUD_OFF,
)
return ft.Column(
[
self._status_text,
self._server_info_text,
ft.Row([self._server_field, self._connect_btn]),
],
spacing=10,
)
def update_button_state(self) -> None:
"""Update connect button state based on connection status."""
if self._connect_btn:
if self._state.connected:
self._connect_btn.text = "Disconnect"
self._connect_btn.icon = ft.Icons.CLOUD_DONE
else:
self._connect_btn.text = "Connect"
self._connect_btn.icon = ft.Icons.CLOUD_OFF
self._state.request_update()
def disconnect(self) -> None:
"""Disconnect from server."""
self._manual_disconnect = True
self._auto_reconnect_enabled = False
self._cancel_reconnect()
if self._client:
self._suppress_connection_events = True
try:
self._client.disconnect()
finally:
self._suppress_connection_events = False
self._client = None
self._state.connected = False
self._state.server_info = None
self._update_status("Disconnected", ft.Colors.GREY_600)
self.update_button_state()
# Follow NoteFlowClient callback pattern with error handling
if self._on_disconnected:
try:
self._on_disconnected()
except Exception as e:
logger.error("on_disconnected callback error: %s", e)
def _on_server_change(self, e: ft.ControlEvent) -> None:
"""Handle server address change.
Args:
e: Control event.
"""
self._state.server_address = str(e.control.value)
def _on_connect_click(self, e: ft.ControlEvent) -> None:
"""Handle connect/disconnect button click.
Args:
e: Control event.
"""
if self._state.connected:
self.disconnect()
else:
self._manual_disconnect = False
self._cancel_reconnect()
threading.Thread(target=self._connect, daemon=True).start()
def _connect(self) -> None:
"""Connect to server (background thread)."""
self._update_status("Connecting...", ft.Colors.ORANGE)
try:
if self._client:
self._suppress_connection_events = True
try:
self._client.disconnect()
finally:
self._suppress_connection_events = False
# Create client with callbacks - use NoteFlowClient directly
self._client = NoteFlowClient(
server_address=self._state.server_address,
on_transcript=self._on_transcript_callback,
on_connection_change=self._handle_connection_change,
)
if self._client.connect(timeout=10.0):
if info := self._client.get_server_info():
self._state.connected = True
self._state.server_info = info
self._state.run_on_ui_thread(lambda: self._on_connect_success(info))
else:
self._update_status("Failed to get server info", ft.Colors.RED)
if self._client:
self._suppress_connection_events = True
try:
self._client.disconnect()
finally:
self._suppress_connection_events = False
self._client = None
self._state.connected = False
self._state.run_on_ui_thread(self.update_button_state)
else:
self._update_status("Connection failed", ft.Colors.RED)
except Exception as exc:
logger.error("Connection error: %s", exc)
self._update_status(f"Error: {exc}", ft.Colors.RED)
def _handle_connection_change(self, connected: bool, message: str) -> None:
"""Handle connection state change from NoteFlowClient.
Args:
connected: Connection state.
message: Status message.
"""
if self._suppress_connection_events:
return
self._state.connected = connected
if connected:
self._auto_reconnect_enabled = True
self._manual_disconnect = False
self._reconnect_stop_event.set()
self._reconnect_in_progress = False
self._state.run_on_ui_thread(
lambda: self._update_status(f"Connected: {message}", ft.Colors.GREEN)
)
elif self._manual_disconnect or not self._auto_reconnect_enabled:
self._state.run_on_ui_thread(
lambda: self._update_status(f"Disconnected: {message}", ft.Colors.RED)
)
elif not self._reconnect_in_progress:
self._start_reconnect_loop(message)
self._state.run_on_ui_thread(self.update_button_state)
# Forward to external callback if provided
if (callback := self._on_connection_change_callback) is not None:
try:
self._state.run_on_ui_thread(lambda: callback(connected, message))
except Exception as e:
logger.error("on_connection_change callback error: %s", e)
def _on_connect_success(self, info: ServerInfo) -> None:
"""Handle successful connection (UI thread).
Args:
info: Server info from connection.
"""
self._auto_reconnect_enabled = True
self._reconnect_stop_event.set()
self._reconnect_in_progress = False
self.update_button_state()
self._update_status("Connected", ft.Colors.GREEN)
# Update server info display
if self._server_info_text:
asr_status = "ready" if info.asr_ready else "not ready"
self._server_info_text.value = (
f"Server v{info.version} | "
f"ASR: {info.asr_model} ({asr_status}) | "
f"Active meetings: {info.active_meetings}"
)
self._state.request_update()
# Follow NoteFlowClient callback pattern with error handling
if self._on_connected and self._client:
try:
self._on_connected(self._client, info)
except Exception as e:
logger.error("on_connected callback error: %s", e)
def _start_reconnect_loop(self, message: str) -> None:
"""Start background reconnect attempts."""
with self._reconnect_lock:
if self._reconnect_in_progress:
return
self._reconnect_in_progress = True
self._reconnect_stop_event.clear()
self._reconnect_thread = threading.Thread(
target=self._reconnect_worker,
args=(message,),
daemon=True,
)
self._reconnect_thread.start()
def _reconnect_worker(self, message: str) -> None:
"""Attempt to reconnect several times before giving up."""
if not self._client:
self._reconnect_in_progress = False
return
# Stop streaming here to avoid audio queue growth while reconnecting.
self._client.stop_streaming()
for attempt in range(1, RECONNECT_ATTEMPTS + 1):
if self._reconnect_stop_event.is_set():
self._reconnect_in_progress = False
return
warning = f"Disconnected: {message}. Reconnecting ({attempt}/{RECONNECT_ATTEMPTS})"
if self._state.recording:
warning += " - recording will stop if not reconnected."
self._update_status(warning, ft.Colors.ORANGE)
if self._attempt_reconnect():
self._reconnect_in_progress = False
return
self._reconnect_stop_event.wait(RECONNECT_DELAY_SECONDS)
self._reconnect_in_progress = False
self._auto_reconnect_enabled = False
if self._state.recording:
final_message = "Reconnection failed. Recording stopped."
else:
final_message = "Reconnection failed."
self._finalize_disconnect(final_message)
def _attempt_reconnect(self) -> bool:
"""Attempt a single reconnect.
Returns:
True if reconnected successfully.
"""
if not self._client:
return False
self._suppress_connection_events = True
try:
self._client.disconnect()
finally:
self._suppress_connection_events = False
if not self._client.connect(timeout=10.0):
return False
info = self._client.get_server_info()
if not info:
self._suppress_connection_events = True
try:
self._client.disconnect()
finally:
self._suppress_connection_events = False
return False
self._state.connected = True
self._state.server_info = info
self._state.run_on_ui_thread(lambda: self._on_connect_success(info))
return True
def _finalize_disconnect(self, message: str) -> None:
"""Finalize disconnect after failed reconnect attempts."""
self._state.connected = False
self._state.server_info = None
self._update_status(message, ft.Colors.RED)
self._state.run_on_ui_thread(self.update_button_state)
def handle_disconnect() -> None:
if self._on_disconnected:
try:
self._on_disconnected()
except Exception as e:
logger.error("on_disconnected callback error: %s", e)
if self._client:
threading.Thread(target=self._disconnect_client, daemon=True).start()
self._state.run_on_ui_thread(handle_disconnect)
def _disconnect_client(self) -> None:
"""Disconnect client without triggering connection callbacks."""
if not self._client:
return
self._suppress_connection_events = True
try:
self._client.disconnect()
finally:
self._suppress_connection_events = False
self._client = None
def _cancel_reconnect(self) -> None:
"""Stop any in-progress reconnect attempt."""
self._reconnect_stop_event.set()
def _update_status(self, message: str, color: str) -> None:
"""Update status text.
Args:
message: Status message.
color: Text color.
"""
def update() -> None:
if self._status_text:
self._status_text.value = message
self._status_text.color = color
self._state.request_update()
self._state.run_on_ui_thread(update)
File: src/noteflow/client/components/meeting_library.py
"""Meeting library component for browsing and exporting meetings.
Uses MeetingInfo, ExportResult from grpc.client and format_datetime from _formatting.
Does not recreate any types - imports and uses existing ones.
"""
from __future__ import annotations
import logging
import threading
import time
from collections.abc import Callable
from datetime import datetime
from typing import TYPE_CHECKING
import flet as ft
# REUSE existing formatting - do not recreate
from noteflow.infrastructure.export._formatting import format_datetime
if TYPE_CHECKING:
from noteflow.client.state import AppState
from noteflow.grpc.client import MeetingInfo, NoteFlowClient
logger = logging.getLogger(__name__)
class MeetingLibraryComponent:
"""Meeting library for browsing and exporting meetings.
Uses NoteFlowClient.list_meetings() and export_transcript() for data.
"""
DIARIZATION_POLL_INTERVAL_SECONDS: float = 2.0
def __init__(
self,
state: AppState,
get_client: Callable[[], NoteFlowClient | None],
on_meeting_selected: Callable[[MeetingInfo], None] | None = None,
) -> None:
"""Initialize meeting library.
Args:
state: Centralized application state.
get_client: Callable that returns current gRPC client or None.
on_meeting_selected: Callback when a meeting is selected.
"""
self._state = state
self._get_client = get_client
self._on_meeting_selected = on_meeting_selected
# UI elements
self._search_field: ft.TextField | None = None
self._list_view: ft.ListView | None = None
self._export_btn: ft.ElevatedButton | None = None
self._analyze_btn: ft.ElevatedButton | None = None
self._rename_btn: ft.ElevatedButton | None = None
self._refresh_btn: ft.IconButton | None = None
self._column: ft.Column | None = None
# Export dialog
self._export_dialog: ft.AlertDialog | None = None
self._format_dropdown: ft.Dropdown | None = None
# Analyze speakers dialog
self._analyze_dialog: ft.AlertDialog | None = None
self._num_speakers_field: ft.TextField | None = None
# Rename speakers dialog
self._rename_dialog: ft.AlertDialog | None = None
self._rename_fields: dict[str, ft.TextField] = {}
def build(self) -> ft.Column:
"""Build meeting library UI.
Returns:
Column containing search, list, and export controls.
"""
self._search_field = ft.TextField(
label="Search meetings",
prefix_icon=ft.Icons.SEARCH,
on_change=self._on_search_change,
expand=True,
)
self._refresh_btn = ft.IconButton(
icon=ft.Icons.REFRESH,
tooltip="Refresh meetings",
on_click=self._on_refresh_click,
)
self._export_btn = ft.ElevatedButton(
"Export",
icon=ft.Icons.DOWNLOAD,
on_click=self._show_export_dialog,
disabled=True,
)
self._analyze_btn = ft.ElevatedButton(
"Refine Speakers",
icon=ft.Icons.RECORD_VOICE_OVER,
on_click=self._show_analyze_dialog,
disabled=True,
)
self._rename_btn = ft.ElevatedButton(
"Rename Speakers",
icon=ft.Icons.EDIT,
on_click=self._show_rename_dialog,
disabled=True,
)
self._list_view = ft.ListView(
spacing=5,
padding=10,
height=200,
)
self._column = ft.Column(
[
ft.Row([self._search_field, self._refresh_btn]),
ft.Container(
content=self._list_view,
border=ft.border.all(1, ft.Colors.GREY_400),
border_radius=8,
),
ft.Row(
[self._analyze_btn, self._rename_btn, self._export_btn],
alignment=ft.MainAxisAlignment.END,
spacing=10,
),
],
spacing=10,
)
return self._column
def refresh_meetings(self) -> None:
"""Refresh meeting list from server."""
client = self._get_client()
if not client:
logger.warning("No gRPC client available")
return
try:
meetings = client.list_meetings(limit=50)
self._state.meetings = meetings
self._state.run_on_ui_thread(self._render_meetings)
except Exception as exc:
logger.error("Error fetching meetings: %s", exc)
def _on_search_change(self, e: ft.ControlEvent) -> None:
"""Handle search field change."""
self._render_meetings()
def _on_refresh_click(self, e: ft.ControlEvent) -> None:
"""Handle refresh button click."""
self.refresh_meetings()
def _render_meetings(self) -> None:
"""Render meeting list (UI thread only)."""
if not self._list_view:
return
self._list_view.controls.clear()
# Filter by search query
search_query = (self._search_field.value or "").lower() if self._search_field else ""
filtered_meetings = [m for m in self._state.meetings if search_query in m.title.lower()]
for meeting in filtered_meetings:
self._list_view.controls.append(self._create_meeting_row(meeting))
self._state.request_update()
def _create_meeting_row(self, meeting: MeetingInfo) -> ft.Container:
"""Create a row for a meeting.
Args:
meeting: Meeting info to display.
Returns:
Container with meeting details.
"""
# Format datetime from timestamp
created_dt = datetime.fromtimestamp(meeting.created_at) if meeting.created_at else None
date_str = format_datetime(created_dt)
# Format duration
duration = meeting.duration_seconds
duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else "--:--"
is_selected = self._state.selected_meeting and self._state.selected_meeting.id == meeting.id
row = ft.Row(
[
ft.Column(
[
ft.Text(meeting.title, weight=ft.FontWeight.BOLD, size=14),
ft.Text(
f"{date_str} | {meeting.state} | {meeting.segment_count} segments | {duration_str}",
size=11,
color=ft.Colors.GREY_600,
),
],
spacing=2,
expand=True,
),
]
)
return ft.Container(
content=row,
padding=10,
border_radius=4,
bgcolor=ft.Colors.BLUE_50 if is_selected else None,
on_click=lambda e, m=meeting: self._on_meeting_click(m),
ink=True,
)
def _on_meeting_click(self, meeting: MeetingInfo) -> None:
"""Handle meeting row click.
Args:
meeting: Selected meeting.
"""
self._state.selected_meeting = meeting
# Enable action buttons
if self._export_btn:
self._export_btn.disabled = False
if self._analyze_btn:
self._analyze_btn.disabled = not self._can_refine_speakers(meeting)
if self._rename_btn:
self._rename_btn.disabled = not self._can_refine_speakers(meeting)
# Re-render to update selection
self._render_meetings()
# Notify callback
if self._on_meeting_selected:
self._on_meeting_selected(meeting)
def _show_export_dialog(self, e: ft.ControlEvent) -> None:
"""Show export format selection dialog."""
if not self._state.selected_meeting:
return
self._format_dropdown = ft.Dropdown(
label="Export Format",
options=[
ft.dropdown.Option("markdown", "Markdown (.md)"),
ft.dropdown.Option("html", "HTML (.html)"),
],
value="markdown",
width=200,
)
self._export_dialog = ft.AlertDialog(
title=ft.Text("Export Transcript"),
content=ft.Column(
[
ft.Text(f"Meeting: {self._state.selected_meeting.title}"),
self._format_dropdown,
],
spacing=10,
tight=True,
),
actions=[
ft.TextButton("Cancel", on_click=self._close_export_dialog),
ft.ElevatedButton("Export", on_click=self._do_export),
],
actions_alignment=ft.MainAxisAlignment.END,
)
if self._state._page:
self._state._page.dialog = self._export_dialog
self._export_dialog.open = True
self._state.request_update()
def _close_export_dialog(self, e: ft.ControlEvent | None = None) -> None:
"""Close the export dialog."""
if self._export_dialog:
self._export_dialog.open = False
self._state.request_update()
def _do_export(self, e: ft.ControlEvent) -> None:
"""Perform the export."""
if not self._state.selected_meeting or not self._format_dropdown:
return
format_name = self._format_dropdown.value or "markdown"
meeting_id = self._state.selected_meeting.id
self._close_export_dialog()
client = self._get_client()
if not client:
logger.warning("No gRPC client available for export")
return
try:
if result := client.export_transcript(meeting_id, format_name):
self._save_export(result.content, result.file_extension)
else:
logger.error("Export failed - no result returned")
except Exception as exc:
logger.error("Error exporting transcript: %s", exc)
def _save_export(self, content: str, extension: str) -> None:
"""Save exported content to file.
Args:
content: Export content.
extension: File extension.
"""
if not self._state.selected_meeting:
return
# Create filename from meeting title
safe_title = "".join(
c if c.isalnum() or c in " -_" else "_" for c in self._state.selected_meeting.title
)
filename = f"{safe_title}.{extension}"
# Use FilePicker for save dialog
if self._state._page:
def on_save(e: ft.FilePickerResultEvent) -> None:
if e.path:
try:
with open(e.path, "w", encoding="utf-8") as f:
f.write(content)
logger.info("Exported to: %s", e.path)
except OSError as exc:
logger.error("Error saving export: %s", exc)
picker = ft.FilePicker(on_result=on_save)
self._state._page.overlay.append(picker)
self._state._page.update()
picker.save_file(
file_name=filename,
allowed_extensions=[extension],
)
# =========================================================================
# Speaker Refinement Methods
# =========================================================================
def _show_analyze_dialog(self, e: ft.ControlEvent) -> None:
"""Show speaker refinement dialog."""
if not self._state.selected_meeting:
return
if not self._can_refine_speakers(self._state.selected_meeting):
self._show_simple_dialog(
"Meeting still active",
ft.Text("Stop the meeting before refining speakers."),
)
return
self._num_speakers_field = ft.TextField(
label="Number of speakers (optional)",
hint_text="Leave empty for auto-detect",
width=200,
keyboard_type=ft.KeyboardType.NUMBER,
)
self._analyze_dialog = ft.AlertDialog(
title=ft.Text("Refine Speakers"),
content=ft.Column(
[
ft.Text(f"Meeting: {self._state.selected_meeting.title}"),
ft.Text(
"Refine speaker labels using offline diarization.",
size=12,
color=ft.Colors.GREY_600,
),
self._num_speakers_field,
],
spacing=10,
tight=True,
),
actions=[
ft.TextButton("Cancel", on_click=self._close_analyze_dialog),
ft.ElevatedButton("Analyze", on_click=self._do_analyze),
],
actions_alignment=ft.MainAxisAlignment.END,
)
if self._state._page:
self._state._page.dialog = self._analyze_dialog
self._analyze_dialog.open = True
self._state.request_update()
def _close_analyze_dialog(self, e: ft.ControlEvent | None = None) -> None:
"""Close the analyze dialog."""
if self._analyze_dialog:
self._analyze_dialog.open = False
self._state.request_update()
def _do_analyze(self, e: ft.ControlEvent) -> None:
"""Perform speaker analysis."""
if not self._state.selected_meeting:
return
# Parse number of speakers (optional)
num_speakers: int | None = None
if self._num_speakers_field and self._num_speakers_field.value:
try:
num_speakers = int(self._num_speakers_field.value)
if num_speakers < 1:
num_speakers = None
except ValueError:
logger.debug("Invalid speaker count input '%s', using auto-detection", self._num_speakers_field.value)
meeting_id = self._state.selected_meeting.id
self._close_analyze_dialog()
client = self._get_client()
if not client:
logger.warning("No gRPC client available for analysis")
return
# Show progress indicator
self._show_analysis_progress("Starting...")
try:
result = client.refine_speaker_diarization(meeting_id, num_speakers)
except Exception as exc:
logger.error("Error analyzing speakers: %s", exc)
self._show_analysis_error(str(exc))
return
if not result:
self._show_analysis_error("Analysis failed - no response from server")
return
if result.is_terminal:
if result.success:
self._show_analysis_result(result.segments_updated, result.speaker_ids)
else:
self._show_analysis_error(result.error_message or "Analysis failed")
return
if not result.job_id:
self._show_analysis_error(result.error_message or "Server did not return job ID")
return
# Job queued/running - poll for completion
self._show_analysis_progress(self._format_job_status(result.status))
self._start_diarization_poll(result.job_id)
def _show_analysis_progress(self, status: str = "Refining...") -> None:
"""Show refinement in progress indicator."""
if self._analyze_btn:
self._analyze_btn.disabled = True
self._analyze_btn.text = status
self._state.request_update()
def _show_analysis_result(self, segments_updated: int, speaker_ids: list[str]) -> None:
"""Show refinement success result.
Args:
segments_updated: Number of segments with speaker labels.
speaker_ids: List of detected speaker IDs.
"""
if self._analyze_btn:
self._analyze_btn.disabled = False
self._analyze_btn.text = "Refine Speakers"
speaker_list = ", ".join(speaker_ids) if speaker_ids else "None found"
result_dialog = ft.AlertDialog(
title=ft.Text("Refinement Complete"),
content=ft.Column(
[
ft.Text(f"Segments updated: {segments_updated}"),
ft.Text(f"Speakers found: {speaker_list}"),
ft.Text(
"Reload the meeting to see speaker labels.",
size=12,
color=ft.Colors.GREY_600,
italic=True,
),
],
spacing=5,
tight=True,
),
actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))],
)
if self._state._page:
self._state._page.dialog = result_dialog
result_dialog.open = True
self._state.request_update()
def _show_analysis_error(self, error_message: str) -> None:
"""Show analysis error.
Args:
error_message: Error description.
"""
if self._analyze_btn:
self._analyze_btn.disabled = False
self._analyze_btn.text = "Refine Speakers"
self._show_simple_dialog("Refinement Failed", ft.Text(error_message))
def _close_result_dialog(self, e: ft.ControlEvent) -> None:
"""Close any result dialog."""
if self._state._page and self._state._page.dialog:
self._state._page.dialog.open = False
self._state.request_update()
def _start_diarization_poll(self, job_id: str) -> None:
"""Start polling for diarization job completion."""
page = self._state._page
if page and hasattr(page, "run_thread"):
page.run_thread(lambda: self._poll_diarization_job(job_id))
return
threading.Thread(
target=self._poll_diarization_job,
args=(job_id,),
daemon=True,
name="diarization-poll",
).start()
def _poll_diarization_job(self, job_id: str) -> None:
"""Poll background diarization job until completion."""
client = self._get_client()
if not client:
self._state.run_on_ui_thread(
lambda: self._show_analysis_error("No gRPC client available for polling")
)
return
while True:
result = client.get_diarization_job_status(job_id)
if not result:
self._state.run_on_ui_thread(
lambda: self._show_analysis_error("Failed to fetch diarization status")
)
return
if result.is_terminal:
if result.success:
self._state.run_on_ui_thread(
lambda r=result: self._show_analysis_result(
r.segments_updated,
r.speaker_ids,
)
)
else:
self._state.run_on_ui_thread(
lambda r=result: self._show_analysis_error(
r.error_message or "Diarization failed"
)
)
return
# Update status text while running
self._state.run_on_ui_thread(
lambda r=result: self._show_analysis_progress(self._format_job_status(r.status))
)
time.sleep(self.DIARIZATION_POLL_INTERVAL_SECONDS)
@staticmethod
def _format_job_status(status: str) -> str:
"""Format job status for button label."""
return {
"queued": "Queued...",
"running": "Refining...",
}.get(status, "Refining...")
def _show_simple_dialog(self, title: str, content: ft.Control) -> None:
"""Show a simple dialog with title, content, and OK button.
Args:
title: Dialog title.
content: Dialog content control.
"""
dialog = ft.AlertDialog(
title=ft.Text(title),
content=content,
actions=[ft.TextButton("OK", on_click=self._close_result_dialog)],
)
if self._state._page:
self._state._page.dialog = dialog
dialog.open = True
self._state.request_update()
# =========================================================================
# Speaker Rename Methods
# =========================================================================
def _show_rename_dialog(self, e: ft.ControlEvent) -> None:
"""Show speaker rename dialog with current speaker IDs."""
if not self._state.selected_meeting:
return
if not self._can_refine_speakers(self._state.selected_meeting):
self._show_simple_dialog(
"Meeting still active",
ft.Text("Stop the meeting before renaming speakers."),
)
return
client = self._get_client()
if not client:
logger.warning("No gRPC client available")
return
# Get segments to extract distinct speaker IDs
meeting_id = self._state.selected_meeting.id
segments = client.get_meeting_segments(meeting_id)
# Extract distinct speaker IDs
speaker_ids = sorted({s.speaker_id for s in segments if s.speaker_id})
if not speaker_ids:
self._show_no_speakers_message()
return
# Create text fields for each speaker
self._rename_fields.clear()
speaker_controls: list[ft.Control] = []
for speaker_id in speaker_ids:
field = ft.TextField(
label=f"{speaker_id}",
hint_text="Enter new name",
width=200,
)
self._rename_fields[speaker_id] = field
speaker_controls.append(
ft.Row(
[
ft.Text(speaker_id, width=120, size=12),
ft.Icon(ft.Icons.ARROW_RIGHT, size=16),
field,
],
alignment=ft.MainAxisAlignment.START,
)
)
self._rename_dialog = ft.AlertDialog(
title=ft.Text("Rename Speakers"),
content=ft.Column(
[
ft.Text(f"Meeting: {self._state.selected_meeting.title}"),
ft.Text(
"Enter new names for speakers (leave blank to keep current):",
size=12,
color=ft.Colors.GREY_600,
),
ft.Divider(),
*speaker_controls,
],
spacing=10,
scroll=ft.ScrollMode.AUTO,
height=300,
),
actions=[
ft.TextButton("Cancel", on_click=self._close_rename_dialog),
ft.ElevatedButton("Apply", on_click=self._do_rename),
],
actions_alignment=ft.MainAxisAlignment.END,
)
if self._state._page:
self._state._page.dialog = self._rename_dialog
self._rename_dialog.open = True
self._state.request_update()
def _close_rename_dialog(self, e: ft.ControlEvent | None = None) -> None:
"""Close the rename dialog."""
if self._rename_dialog:
self._rename_dialog.open = False
self._state.request_update()
def _show_no_speakers_message(self) -> None:
"""Show message when no speakers found."""
self._show_simple_dialog(
"No Speakers Found",
ft.Text(
"This meeting has no speaker labels. "
"Run 'Refine Speakers' first to identify speakers."
),
)
def _do_rename(self, e: ft.ControlEvent) -> None:
"""Apply speaker renames."""
if not self._state.selected_meeting:
return
client = self._get_client()
if not client:
logger.warning("No gRPC client available")
return
meeting_id = self._state.selected_meeting.id
self._close_rename_dialog()
# Collect renames (only non-empty values)
renames: list[tuple[str, str]] = []
for old_id, field in self._rename_fields.items():
new_name = (field.value or "").strip()
if new_name and new_name != old_id:
renames.append((old_id, new_name))
if not renames:
return
# Apply renames
total_updated = 0
errors: list[str] = []
for old_id, new_name in renames:
try:
result = client.rename_speaker(meeting_id, old_id, new_name)
if result and result.success:
total_updated += result.segments_updated
else:
errors.append(f"{old_id}: rename failed")
except Exception as exc:
logger.error("Error renaming speaker %s: %s", old_id, exc)
errors.append(f"{old_id}: {exc}")
# Show result
if errors:
self._show_rename_errors(errors)
else:
self._show_rename_success(total_updated, len(renames))
def _show_rename_success(self, segments_updated: int, speakers_renamed: int) -> None:
"""Show rename success message.
Args:
segments_updated: Total number of segments updated.
speakers_renamed: Number of speakers renamed.
"""
success_dialog = ft.AlertDialog(
title=ft.Text("Rename Complete"),
content=ft.Column(
[
ft.Text(f"Renamed {speakers_renamed} speaker(s)"),
ft.Text(f"Updated {segments_updated} segment(s)"),
ft.Text(
"Reload the meeting to see the new speaker names.",
size=12,
color=ft.Colors.GREY_600,
italic=True,
),
],
spacing=5,
tight=True,
),
actions=[ft.TextButton("OK", on_click=lambda e: self._close_result_dialog(e))],
)
if self._state._page:
self._state._page.dialog = success_dialog
success_dialog.open = True
self._state.request_update()
def _show_rename_errors(self, errors: list[str]) -> None:
"""Show rename errors.
Args:
errors: List of error messages.
"""
self._show_simple_dialog("Rename Errors", ft.Text("\n".join(errors)))
@staticmethod
def _can_refine_speakers(meeting: MeetingInfo) -> bool:
"""Return True when meeting is stopped/completed and safe to refine/rename."""
return meeting.state in {"stopped", "completed", "error"}
File: src/noteflow/client/components/playback_sync.py
"""Playback-transcript synchronization controller.
Polls playback position and updates transcript highlight state.
Follows RecordingTimerComponent pattern for background threading.
"""
from __future__ import annotations
import logging
import threading
from collections.abc import Callable
from typing import TYPE_CHECKING, Final
from noteflow.infrastructure.audio import PlaybackState
if TYPE_CHECKING:
from noteflow.client.state import AppState
logger = logging.getLogger(__name__)
POSITION_POLL_INTERVAL: Final[float] = 0.1 # 100ms for smooth highlighting
class PlaybackSyncController:
"""Synchronize playback position with transcript highlighting.
Polls playback position and updates state.highlighted_segment_index.
Triggers UI updates via state.run_on_ui_thread().
"""
def __init__(
self,
state: AppState,
on_highlight_change: Callable[[int | None], None] | None = None,
) -> None:
"""Initialize sync controller.
Args:
state: Centralized application state.
on_highlight_change: Callback when highlighted segment changes.
"""
self._state = state
self._on_highlight_change = on_highlight_change
self._sync_thread: threading.Thread | None = None
self._stop_event = threading.Event()
def start(self) -> None:
"""Start position sync polling."""
if self._sync_thread and self._sync_thread.is_alive():
return
self._stop_event.clear()
self._sync_thread = threading.Thread(
target=self._sync_loop,
daemon=True,
name="PlaybackSyncController",
)
self._sync_thread.start()
logger.debug("Started playback sync controller")
def stop(self) -> None:
"""Stop position sync polling."""
self._stop_event.set()
if self._sync_thread:
self._sync_thread.join(timeout=2.0)
self._sync_thread = None
logger.debug("Stopped playback sync controller")
def _sync_loop(self) -> None:
"""Background sync loop - polls position and updates highlight."""
while not self._stop_event.is_set():
playback = self._state.playback
if playback.state == PlaybackState.PLAYING:
position = playback.current_position
self._update_position(position)
elif playback.state == PlaybackState.STOPPED:
# Clear highlight when stopped
if self._state.highlighted_segment_index is not None:
self._state.highlighted_segment_index = None
self._state.run_on_ui_thread(self._notify_highlight_change)
self._stop_event.wait(POSITION_POLL_INTERVAL)
def _update_position(self, position: float) -> None:
"""Update state with current position and find matching segment."""
self._state.playback_position = position
new_index = self._state.find_segment_at_position(position)
old_index = self._state.highlighted_segment_index
if new_index != old_index:
self._state.highlighted_segment_index = new_index
self._state.run_on_ui_thread(self._notify_highlight_change)
def _notify_highlight_change(self) -> None:
"""Notify UI of highlight change (UI thread only)."""
if self._on_highlight_change:
try:
self._on_highlight_change(self._state.highlighted_segment_index)
except Exception as e:
logger.error("Highlight change callback error: %s", e)
self._state.request_update()
def seek_to_segment(self, segment_index: int) -> bool:
"""Seek playback to start of specified segment.
Args:
segment_index: Index into state.transcript_segments.
Returns:
True if seek was successful.
"""
segments = self._state.transcript_segments
if not (0 <= segment_index < len(segments)):
logger.warning("Invalid segment index: %d", segment_index)
return False
playback = self._state.playback
segment = segments[segment_index]
if playback.seek(segment.start_time):
self._state.highlighted_segment_index = segment_index
self._state.playback_position = segment.start_time
self._state.run_on_ui_thread(self._notify_highlight_change)
return True
return False
File: src/noteflow/client/components/vu_meter.py
"""VU meter component for audio level visualization.
Uses RmsLevelProvider from AppState (not a new instance).
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import flet as ft
import numpy as np
from numpy.typing import NDArray
if TYPE_CHECKING:
from noteflow.client.state import AppState
class VuMeterComponent:
"""Audio level visualization component.
Uses RmsLevelProvider from AppState (not a new instance).
"""
def __init__(self, state: AppState) -> None:
"""Initialize VU meter component.
Args:
state: Centralized application state with level_provider.
"""
self._state = state
# REUSE level_provider from state - do not create new instance
self._progress_bar: ft.ProgressBar | None = None
self._label: ft.Text | None = None
def build(self) -> ft.Row:
"""Build VU meter UI elements.
Returns:
Row containing progress bar and level label.
"""
self._progress_bar = ft.ProgressBar(
value=0,
width=300,
bar_height=20,
color=ft.Colors.GREEN,
bgcolor=ft.Colors.GREY_300,
)
self._label = ft.Text("-60 dB", size=12, width=60)
return ft.Row(
[
ft.Text("Level:", size=12),
self._progress_bar,
self._label,
]
)
def on_audio_frames(self, frames: NDArray[np.float32]) -> None:
"""Process incoming audio frames for level metering.
Uses state.level_provider.get_db() - existing RmsLevelProvider method.
Args:
frames: Audio samples as float32 array.
"""
# REUSE existing RmsLevelProvider from state
db_level = self._state.level_provider.get_db(frames)
self._state.current_db_level = db_level
self._state.run_on_ui_thread(self._update_display)
def _update_display(self) -> None:
"""Update VU meter display (UI thread only)."""
if not self._progress_bar or not self._label:
return
db = self._state.current_db_level
# Convert dB to 0-1 range (-60 to 0 dB)
normalized = max(0.0, min(1.0, (db + 60) / 60))
self._progress_bar.value = normalized
self._progress_bar.color = (
ft.Colors.RED if db > -6 else ft.Colors.YELLOW if db > -20 else ft.Colors.GREEN
)
self._label.value = f"{db:.0f} dB"
self._state.request_update()
File: src/noteflow/client/init.py
"""NoteFlow client application."""
File: src/noteflow/client/_trigger_mixin.py
"""Trigger detection mixin for NoteFlow client.
Extracts trigger detection logic from app.py to keep file under 750 lines.
Handles meeting detection triggers via app audio activity and calendar proximity.
"""
from __future__ import annotations
import asyncio
import logging
from typing import TYPE_CHECKING, Protocol
import flet as ft
from noteflow.application.services import TriggerService, TriggerServiceSettings
from noteflow.config.settings import TriggerSettings, get_trigger_settings
from noteflow.domain.triggers import TriggerAction, TriggerDecision
from noteflow.infrastructure.triggers import (
AppAudioProvider,
AppAudioSettings,
CalendarProvider,
CalendarSettings,
)
from noteflow.infrastructure.triggers.calendar import parse_calendar_events
if TYPE_CHECKING:
from noteflow.client.state import AppState
logger = logging.getLogger(__name__)
class TriggerHost(Protocol):
"""Protocol for app hosting trigger mixin."""
_state: AppState
_trigger_settings: TriggerSettings | None
_trigger_service: TriggerService | None
_app_audio: AppAudioProvider | None
_calendar_provider: CalendarProvider | None
_trigger_poll_interval: float
_trigger_task: asyncio.Task | None
def _start_recording(self) -> None:
"""Start recording audio."""
...
def _ensure_audio_capture(self) -> bool:
"""Ensure audio capture is running."""
...
class TriggerMixin:
"""Mixin providing trigger detection functionality.
Requires host to implement TriggerHost protocol.
"""
def _initialize_triggers(self: TriggerHost) -> None:
"""Initialize trigger settings, providers, and service."""
self._trigger_settings = get_trigger_settings()
self._state.trigger_enabled = self._trigger_settings.trigger_enabled
self._trigger_poll_interval = self._trigger_settings.trigger_poll_interval_seconds
meeting_apps = {app.lower() for app in self._trigger_settings.trigger_meeting_apps}
suppressed_apps = {app.lower() for app in self._trigger_settings.trigger_suppressed_apps}
app_audio_settings = AppAudioSettings(
enabled=self._trigger_settings.trigger_audio_enabled,
threshold_db=self._trigger_settings.trigger_audio_threshold_db,
window_seconds=self._trigger_settings.trigger_audio_window_seconds,
min_active_ratio=self._trigger_settings.trigger_audio_min_active_ratio,
min_samples=self._trigger_settings.trigger_audio_min_samples,
max_history=self._trigger_settings.trigger_audio_max_history,
weight=self._trigger_settings.trigger_weight_audio,
meeting_apps=meeting_apps,
suppressed_apps=suppressed_apps,
)
calendar_settings = CalendarSettings(
enabled=self._trigger_settings.trigger_calendar_enabled,
weight=self._trigger_settings.trigger_weight_calendar,
lookahead_minutes=self._trigger_settings.trigger_calendar_lookahead_minutes,
lookbehind_minutes=self._trigger_settings.trigger_calendar_lookbehind_minutes,
events=parse_calendar_events(self._trigger_settings.trigger_calendar_events),
)
self._app_audio = AppAudioProvider(app_audio_settings)
self._calendar_provider = CalendarProvider(calendar_settings)
self._trigger_service = TriggerService(
providers=[self._app_audio, self._calendar_provider],
settings=TriggerServiceSettings(
enabled=self._trigger_settings.trigger_enabled,
auto_start_enabled=self._trigger_settings.trigger_auto_start,
rate_limit_seconds=self._trigger_settings.trigger_rate_limit_minutes * 60,
snooze_seconds=self._trigger_settings.trigger_snooze_minutes * 60,
threshold_ignore=self._trigger_settings.trigger_confidence_ignore,
threshold_auto_start=self._trigger_settings.trigger_confidence_auto,
),
)
def _should_keep_capture_running(self: TriggerHost) -> bool:
"""Return True if background audio capture should remain active."""
return False
async def _trigger_check_loop(self: TriggerHost) -> None:
"""Background loop to check trigger conditions.
Runs every poll interval while not recording.
"""
check_interval = self._trigger_poll_interval
try:
while True:
await asyncio.sleep(check_interval)
# Skip if recording or trigger pending
if self._state.recording or self._state.trigger_pending:
continue
# Skip if triggers disabled
if not self._state.trigger_enabled or not self._trigger_service:
continue
# Evaluate triggers
decision = self._trigger_service.evaluate()
self._state.trigger_decision = decision
if decision.action == TriggerAction.IGNORE:
continue
if decision.action == TriggerAction.AUTO_START:
# Auto-start if connected
if self._state.connected:
logger.info(
"Auto-starting recording (confidence=%.2f)", decision.confidence
)
self._start_recording()
elif decision.action == TriggerAction.NOTIFY:
# Show prompt to user
self._show_trigger_prompt(decision)
except asyncio.CancelledError:
logger.debug("Trigger loop cancelled")
raise
def _show_trigger_prompt(self: TriggerHost, decision: TriggerDecision) -> None:
"""Show trigger notification prompt to user.
Args:
decision: Trigger decision with confidence and signals.
"""
self._state.trigger_pending = True
# Build signal description
signal_desc = ", ".join(s.app_name or s.source.value for s in decision.signals)
def handle_start(_: ft.ControlEvent) -> None:
self._state.trigger_pending = False
if dialog.open:
dialog.open = False
self._state.request_update()
if self._state.connected:
self._start_recording()
def handle_snooze(_: ft.ControlEvent) -> None:
self._state.trigger_pending = False
if self._trigger_service:
self._trigger_service.snooze()
if dialog.open:
dialog.open = False
self._state.request_update()
def handle_dismiss(_: ft.ControlEvent) -> None:
self._state.trigger_pending = False
if dialog.open:
dialog.open = False
self._state.request_update()
dialog = ft.AlertDialog(
title=ft.Text("Meeting Detected"),
content=ft.Text(
"Detected: "
f"{signal_desc}\n"
f"Confidence: {decision.confidence:.0%}\n\n"
"Start recording?"
),
actions=[
ft.TextButton("Start", on_click=handle_start),
ft.TextButton("Snooze", on_click=handle_snooze),
ft.TextButton("Dismiss", on_click=handle_dismiss),
],
actions_alignment=ft.MainAxisAlignment.END,
)
if self._state._page:
self._state._page.dialog = dialog
dialog.open = True
self._state.request_update()
File: src/noteflow/core/init.py
"""Core types and protocols for NoteFlow."""
File: src/noteflow/domain/entities/init.py
"""Domain entities for NoteFlow."""
from .annotation import Annotation
from .meeting import Meeting
from .segment import Segment, WordTiming
from .summary import ActionItem, KeyPoint, Summary
__all__ = [
"ActionItem",
"Annotation",
"KeyPoint",
"Meeting",
"Segment",
"Summary",
"WordTiming",
]
File: src/noteflow/domain/entities/annotation.py
"""Annotation entity for user-created annotations during recording.
Distinct from LLM-extracted ActionItem/KeyPoint in summaries.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId
@dataclass
class Annotation:
"""User-created annotation during recording.
Evidence-linked to specific transcript segments for navigation.
Unlike ActionItem/KeyPoint (LLM-extracted from Summary), annotations
are created in real-time during recording and belong directly to Meeting.
"""
id: AnnotationId
meeting_id: MeetingId
annotation_type: AnnotationType
text: str
start_time: float
end_time: float
segment_ids: list[int] = field(default_factory=list)
created_at: datetime = field(default_factory=datetime.now)
# Database primary key (set after persistence)
db_id: int | None = None
def __post_init__(self) -> None:
"""Validate annotation data."""
if self.end_time < self.start_time:
raise ValueError(
f"end_time ({self.end_time}) must be >= start_time ({self.start_time})"
)
@property
def duration(self) -> float:
"""Annotation duration in seconds."""
return self.end_time - self.start_time
def has_segments(self) -> bool:
"""Check if annotation is linked to transcript segments."""
return len(self.segment_ids) > 0
File: src/noteflow/domain/entities/meeting.py
"""Meeting aggregate root entity."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import TYPE_CHECKING
from uuid import UUID, uuid4
from noteflow.domain.value_objects import MeetingId, MeetingState
if TYPE_CHECKING:
from noteflow.domain.entities.segment import Segment
from noteflow.domain.entities.summary import Summary
@dataclass
class Meeting:
"""Meeting aggregate root.
The central entity representing a recorded meeting with its
transcript segments and optional summary.
"""
id: MeetingId
title: str
state: MeetingState = MeetingState.CREATED
created_at: datetime = field(default_factory=datetime.now)
started_at: datetime | None = None
ended_at: datetime | None = None
segments: list[Segment] = field(default_factory=list)
summary: Summary | None = None
metadata: dict[str, str] = field(default_factory=dict)
wrapped_dek: bytes | None = None # Encrypted data encryption key
@classmethod
def create(
cls,
title: str = "",
metadata: dict[str, str] | None = None,
) -> Meeting:
"""Factory method to create a new meeting.
Args:
title: Optional meeting title.
metadata: Optional metadata dictionary.
Returns:
New Meeting instance.
"""
meeting_id = MeetingId(uuid4())
now = datetime.now()
if not title:
title = f"Meeting {now.strftime('%Y-%m-%d %H:%M')}"
return cls(
id=meeting_id,
title=title,
state=MeetingState.CREATED,
created_at=now,
metadata=metadata or {},
)
@classmethod
def from_uuid_str(
cls,
uuid_str: str,
title: str = "",
state: MeetingState = MeetingState.CREATED,
created_at: datetime | None = None,
started_at: datetime | None = None,
ended_at: datetime | None = None,
metadata: dict[str, str] | None = None,
wrapped_dek: bytes | None = None,
) -> Meeting:
"""Create meeting with existing UUID string.
Args:
uuid_str: UUID string for meeting ID.
title: Meeting title.
state: Meeting state.
created_at: Creation timestamp.
started_at: Start timestamp.
ended_at: End timestamp.
metadata: Meeting metadata.
wrapped_dek: Encrypted data encryption key.
Returns:
Meeting instance with specified ID.
"""
meeting_id = MeetingId(UUID(uuid_str))
return cls(
id=meeting_id,
title=title,
state=state,
created_at=created_at or datetime.now(),
started_at=started_at,
ended_at=ended_at,
metadata=metadata or {},
wrapped_dek=wrapped_dek,
)
def start_recording(self) -> None:
"""Transition to recording state.
Raises:
ValueError: If transition is not valid.
"""
if not self.state.can_transition_to(MeetingState.RECORDING):
raise ValueError(f"Cannot start recording from state {self.state.name}")
self.state = MeetingState.RECORDING
self.started_at = datetime.now()
def begin_stopping(self) -> None:
"""Transition to stopping state for graceful shutdown.
This intermediate state allows audio writers and other resources
to flush and close properly before the meeting is fully stopped.
Raises:
ValueError: If transition is not valid.
"""
if not self.state.can_transition_to(MeetingState.STOPPING):
raise ValueError(f"Cannot begin stopping from state {self.state.name}")
self.state = MeetingState.STOPPING
def stop_recording(self) -> None:
"""Transition to stopped state (from STOPPING).
Raises:
ValueError: If transition is not valid.
"""
if not self.state.can_transition_to(MeetingState.STOPPED):
raise ValueError(f"Cannot stop recording from state {self.state.name}")
self.state = MeetingState.STOPPED
if self.ended_at is None:
self.ended_at = datetime.now()
def complete(self) -> None:
"""Transition to completed state.
Raises:
ValueError: If transition is not valid.
"""
if not self.state.can_transition_to(MeetingState.COMPLETED):
raise ValueError(f"Cannot complete from state {self.state.name}")
self.state = MeetingState.COMPLETED
def mark_error(self) -> None:
"""Transition to error state."""
self.state = MeetingState.ERROR
def add_segment(self, segment: Segment) -> None:
"""Add a transcript segment.
Args:
segment: Segment to add.
"""
self.segments.append(segment)
def set_summary(self, summary: Summary) -> None:
"""Set the meeting summary.
Args:
summary: Summary to set.
"""
self.summary = summary
@property
def duration_seconds(self) -> float:
"""Calculate meeting duration in seconds."""
if self.ended_at and self.started_at:
return (self.ended_at - self.started_at).total_seconds()
if self.started_at:
return (datetime.now() - self.started_at).total_seconds()
return 0.0
@property
def next_segment_id(self) -> int:
"""Get the next available segment ID."""
return max(s.segment_id for s in self.segments) + 1 if self.segments else 0
@property
def segment_count(self) -> int:
"""Number of transcript segments."""
return len(self.segments)
@property
def full_transcript(self) -> str:
"""Concatenate all segment text."""
return " ".join(s.text for s in self.segments)
def is_active(self) -> bool:
"""Check if meeting is in an active state (created or recording).
Note: STOPPING is not considered active as it's transitioning to stopped.
"""
return self.state in (MeetingState.CREATED, MeetingState.RECORDING)
def has_summary(self) -> bool:
"""Check if meeting has a summary."""
return self.summary is not None
File: src/noteflow/domain/entities/summary.py
"""Summary-related entities for meeting summaries."""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from noteflow.domain.value_objects import MeetingId
@dataclass
class KeyPoint:
"""A key point extracted from the meeting.
Evidence-linked to specific transcript segments for verification.
"""
text: str
segment_ids: list[int] = field(default_factory=list)
start_time: float = 0.0
end_time: float = 0.0
# Database primary key (set after persistence)
db_id: int | None = None
def has_evidence(self) -> bool:
"""Check if key point is backed by transcript evidence."""
return len(self.segment_ids) > 0
@dataclass
class ActionItem:
"""An action item extracted from the meeting.
Evidence-linked to specific transcript segments for verification.
"""
text: str
assignee: str = ""
due_date: datetime | None = None
priority: int = 0 # 0=unspecified, 1=low, 2=medium, 3=high
segment_ids: list[int] = field(default_factory=list)
# Database primary key (set after persistence)
db_id: int | None = None
def has_evidence(self) -> bool:
"""Check if action item is backed by transcript evidence."""
return len(self.segment_ids) > 0
def is_assigned(self) -> bool:
"""Check if action item has an assignee."""
return bool(self.assignee)
def has_due_date(self) -> bool:
"""Check if action item has a due date."""
return self.due_date is not None
@dataclass
class Summary:
"""Meeting summary entity.
Contains executive summary, key points, and action items,
all evidence-linked to transcript segments.
"""
meeting_id: MeetingId
executive_summary: str = ""
key_points: list[KeyPoint] = field(default_factory=list)
action_items: list[ActionItem] = field(default_factory=list)
generated_at: datetime | None = None
model_version: str = ""
# Database primary key (set after persistence)
db_id: int | None = None
def all_points_have_evidence(self) -> bool:
"""Check if all key points have transcript evidence."""
return all(kp.has_evidence() for kp in self.key_points)
def all_actions_have_evidence(self) -> bool:
"""Check if all action items have transcript evidence."""
return all(ai.has_evidence() for ai in self.action_items)
def is_fully_evidenced(self) -> bool:
"""Check if entire summary is backed by transcript evidence."""
return self.all_points_have_evidence() and self.all_actions_have_evidence()
@property
def key_point_count(self) -> int:
"""Number of key points."""
return len(self.key_points)
@property
def action_item_count(self) -> int:
"""Number of action items."""
return len(self.action_items)
@property
def unevidenced_points(self) -> list[KeyPoint]:
"""Key points without transcript evidence."""
return [kp for kp in self.key_points if not kp.has_evidence()]
@property
def unevidenced_actions(self) -> list[ActionItem]:
"""Action items without transcript evidence."""
return [ai for ai in self.action_items if not ai.has_evidence()]
File: src/noteflow/domain/ports/init.py
"""Domain ports (interfaces) for NoteFlow."""
from .repositories import (
AnnotationRepository,
MeetingRepository,
SegmentRepository,
SummaryRepository,
)
from .unit_of_work import UnitOfWork
__all__ = [
"AnnotationRepository",
"MeetingRepository",
"SegmentRepository",
"SummaryRepository",
"UnitOfWork",
]
File: src/noteflow/domain/ports/unit_of_work.py
"""Unit of Work protocol for transaction management."""
from __future__ import annotations
from typing import TYPE_CHECKING, Protocol, Self
if TYPE_CHECKING:
from .repositories import (
AnnotationRepository,
MeetingRepository,
SegmentRepository,
SummaryRepository,
)
class UnitOfWork(Protocol):
"""Unit of Work protocol for managing transactions across repositories.
Provides transactional consistency when operating on multiple
aggregates. Use as a context manager for automatic commit/rollback.
Example:
async with uow:
meeting = await uow.meetings.get(meeting_id)
await uow.segments.add(meeting_id, segment)
await uow.commit()
"""
annotations: AnnotationRepository
meetings: MeetingRepository
segments: SegmentRepository
summaries: SummaryRepository
async def __aenter__(self) -> Self:
"""Enter the unit of work context.
Returns:
Self for use in async with statement.
"""
...
async def __aexit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: object,
) -> None:
"""Exit the unit of work context.
Rolls back on exception, otherwise commits.
Args:
exc_type: Exception type if raised.
exc_val: Exception value if raised.
exc_tb: Exception traceback if raised.
"""
...
async def commit(self) -> None:
"""Commit the current transaction.
Persists all changes made within the unit of work.
"""
...
async def rollback(self) -> None:
"""Rollback the current transaction.
Discards all changes made within the unit of work.
"""
...
File: src/noteflow/domain/summarization/init.py
"""Summarization domain module.
Provides protocols and data transfer objects for meeting summarization.
"""
from noteflow.domain.summarization.ports import (
CitationVerificationResult,
CitationVerifier,
InvalidResponseError,
ProviderUnavailableError,
SummarizationError,
SummarizationRequest,
SummarizationResult,
SummarizationTimeoutError,
SummarizerProvider,
)
__all__ = [
"CitationVerificationResult",
"CitationVerifier",
"InvalidResponseError",
"ProviderUnavailableError",
"SummarizationError",
"SummarizationRequest",
"SummarizationResult",
"SummarizationTimeoutError",
"SummarizerProvider",
]
File: src/noteflow/domain/summarization/ports.py
"""Summarization provider port protocols."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Protocol
if TYPE_CHECKING:
from collections.abc import Sequence
from noteflow.domain.entities import Segment, Summary
from noteflow.domain.value_objects import MeetingId
@dataclass(frozen=True)
class SummarizationRequest:
"""Request for meeting summarization.
Contains the meeting context needed for summary generation.
"""
meeting_id: MeetingId
segments: Sequence[Segment]
max_key_points: int = 5
max_action_items: int = 10
@property
def transcript_text(self) -> str:
"""Concatenate all segment text into a single transcript."""
return " ".join(seg.text for seg in self.segments)
@property
def segment_count(self) -> int:
"""Number of segments in the request."""
return len(self.segments)
@property
def total_duration(self) -> float:
"""Total duration of all segments in seconds."""
if not self.segments:
return 0.0
return self.segments[-1].end_time - self.segments[0].start_time
@dataclass(frozen=True)
class SummarizationResult:
"""Result from summarization provider.
Contains the generated summary along with metadata.
"""
summary: Summary
model_name: str
provider_name: str
tokens_used: int | None = None
latency_ms: float = 0.0
@property
def is_success(self) -> bool:
"""Check if summarization succeeded with content."""
return bool(self.summary.executive_summary)
@dataclass(frozen=True)
class CitationVerificationResult:
"""Result of citation verification.
Identifies which citations are valid and which are invalid.
"""
is_valid: bool
invalid_key_point_indices: tuple[int, ...] = field(default_factory=tuple)
invalid_action_item_indices: tuple[int, ...] = field(default_factory=tuple)
missing_segment_ids: tuple[int, ...] = field(default_factory=tuple)
@property
def invalid_count(self) -> int:
"""Total number of invalid citations."""
return len(self.invalid_key_point_indices) + len(self.invalid_action_item_indices)
class SummarizerProvider(Protocol):
"""Protocol for LLM summarization providers.
Implementations must provide async summarization with evidence linking.
"""
@property
def provider_name(self) -> str:
"""Provider identifier (e.g., 'mock', 'ollama', 'openai')."""
...
@property
def is_available(self) -> bool:
"""Check if provider is configured and available."""
...
@property
def requires_cloud_consent(self) -> bool:
"""Return True if data is sent to external services.
Cloud providers must return True to ensure explicit user consent.
"""
...
async def summarize(self, request: SummarizationRequest) -> SummarizationResult:
"""Generate evidence-linked summary from transcript segments.
Args:
request: Summarization request with segments and constraints.
Returns:
SummarizationResult with generated summary and metadata.
Raises:
SummarizationError: If summarization fails.
"""
...
class CitationVerifier(Protocol):
"""Protocol for verifying evidence citations.
Validates that segment_ids in summaries reference actual segments.
"""
def verify_citations(
self,
summary: Summary,
segments: Sequence[Segment],
) -> CitationVerificationResult:
"""Verify all segment_ids exist in the transcript.
Args:
summary: Summary with key points and action items to verify.
segments: Available transcript segments.
Returns:
CitationVerificationResult with validation status and details.
"""
...
class SummarizationError(Exception):
"""Base exception for summarization errors."""
pass
class ProviderUnavailableError(SummarizationError):
"""Provider is not available or not configured."""
pass
class SummarizationTimeoutError(SummarizationError):
"""Summarization operation timed out."""
pass
class InvalidResponseError(SummarizationError):
"""Provider returned an invalid or unparseable response."""
pass
File: src/noteflow/domain/triggers/init.py
"""Trigger domain package."""
from noteflow.domain.triggers.entities import (
TriggerAction,
TriggerDecision,
TriggerSignal,
TriggerSource,
)
from noteflow.domain.triggers.ports import SignalProvider
__all__ = [
"SignalProvider",
"TriggerAction",
"TriggerDecision",
"TriggerSignal",
"TriggerSource",
]
File: src/noteflow/domain/triggers/ports.py
"""Trigger signal provider port protocol.
Define the interface for signal providers that detect meeting conditions.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Protocol
if TYPE_CHECKING:
from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource
class SignalProvider(Protocol):
"""Protocol for trigger signal providers.
Signal providers detect specific conditions (audio activity, foreground app, etc.)
and return weighted signals used in trigger evaluation.
Each provider:
- Has a specific source type
- Has a maximum weight contribution
- Can be enabled/disabled
- Returns a signal when conditions are met, None otherwise
"""
@property
def source(self) -> TriggerSource:
"""Get the source type for this provider."""
...
@property
def max_weight(self) -> float:
"""Get the maximum weight this provider can contribute."""
...
def get_signal(self) -> TriggerSignal | None:
"""Get current signal if conditions are met.
Returns:
TriggerSignal if provider conditions are satisfied, None otherwise.
"""
...
def is_enabled(self) -> bool:
"""Check if this provider is enabled.
Returns:
True if provider is enabled and can produce signals.
"""
...
File: src/noteflow/domain/init.py
"""NoteFlow domain layer."""
from .value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState
__all__ = ["AnnotationId", "AnnotationType", "MeetingId", "MeetingState"]
File: src/noteflow/grpc/proto/init.py
"""Generated protobuf and gRPC code."""
File: src/noteflow/grpc/init.py
"""NoteFlow gRPC server and client components."""
from noteflow.domain.value_objects import MeetingState
from .client import (
AnnotationInfo,
DiarizationResult,
ExportResult,
MeetingInfo,
NoteFlowClient,
RenameSpeakerResult,
ServerInfo,
TranscriptSegment,
)
from .meeting_store import MeetingStore
from .service import NoteFlowServicer
__all__ = [
"AnnotationInfo",
"DiarizationResult",
"ExportResult",
"MeetingInfo",
"MeetingState",
"MeetingStore",
"NoteFlowClient",
"NoteFlowServicer",
"RenameSpeakerResult",
"ServerInfo",
"TranscriptSegment",
]
File: src/noteflow/infrastructure/asr/init.py
"""ASR infrastructure module.
Provides speech-to-text transcription using faster-whisper.
"""
from noteflow.infrastructure.asr.dto import (
AsrResult,
PartialUpdate,
VadEvent,
VadEventType,
WordTiming,
)
from noteflow.infrastructure.asr.engine import FasterWhisperEngine
from noteflow.infrastructure.asr.protocols import AsrEngine
from noteflow.infrastructure.asr.segmenter import (
AudioSegment,
Segmenter,
SegmenterConfig,
SegmenterState,
)
from noteflow.infrastructure.asr.streaming_vad import (
EnergyVad,
EnergyVadConfig,
StreamingVad,
VadEngine,
)
__all__ = [
"AsrEngine",
"AsrResult",
"AudioSegment",
"EnergyVad",
"EnergyVadConfig",
"FasterWhisperEngine",
"PartialUpdate",
"Segmenter",
"SegmenterConfig",
"SegmenterState",
"StreamingVad",
"VadEngine",
"VadEvent",
"VadEventType",
"WordTiming",
]
File: src/noteflow/infrastructure/asr/dto.py
"""Data Transfer Objects for ASR.
These DTOs define the data structures used by ASR components.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
@dataclass(frozen=True)
class WordTiming:
"""Word-level timing information."""
word: str
start: float # Start time in seconds
end: float # End time in seconds
probability: float # Confidence (0.0-1.0)
def __post_init__(self) -> None:
"""Validate timing data."""
if self.end < self.start:
raise ValueError(f"Word end ({self.end}) < start ({self.start})")
if not 0.0 <= self.probability <= 1.0:
raise ValueError(f"Probability must be 0.0-1.0, got {self.probability}")
@dataclass(frozen=True)
class AsrResult:
"""ASR transcription result for a segment."""
text: str
start: float # Start time in seconds
end: float # End time in seconds
words: tuple[WordTiming, ...] = field(default_factory=tuple)
language: str = "en"
language_probability: float = 1.0
avg_logprob: float = 0.0
no_speech_prob: float = 0.0
def __post_init__(self) -> None:
"""Validate result data."""
if self.end < self.start:
raise ValueError(f"Segment end ({self.end}) < start ({self.start})")
@property
def duration(self) -> float:
"""Duration of the segment in seconds."""
return self.end - self.start
@dataclass
class PartialUpdate:
"""Unstable partial transcript (may be replaced)."""
text: str
start: float
end: float
def __post_init__(self) -> None:
"""Validate partial data."""
if self.end < self.start:
raise ValueError(f"Partial end ({self.end}) < start ({self.start})")
class VadEventType(Enum):
"""Voice Activity Detection event types."""
SPEECH_START = "speech_start"
SPEECH_END = "speech_end"
@dataclass(frozen=True)
class VadEvent:
"""Voice Activity Detection event.
Represents a speech/silence transition detected by VAD.
"""
event_type: VadEventType
timestamp: float # Seconds from stream start
confidence: float = 1.0 # Detection confidence (0.0-1.0)
def __post_init__(self) -> None:
"""Validate event data."""
if self.timestamp < 0:
raise ValueError(f"Timestamp must be non-negative, got {self.timestamp}")
if not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be 0.0-1.0, got {self.confidence}")
File: src/noteflow/infrastructure/asr/engine.py
"""ASR engine implementation using faster-whisper.
Provides Whisper-based transcription with word-level timestamps.
"""
from __future__ import annotations
import asyncio
import logging
from collections.abc import Iterator
from functools import partial
from typing import TYPE_CHECKING, Final
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
from noteflow.infrastructure.asr.dto import AsrResult, WordTiming
logger = logging.getLogger(__name__)
# Available model sizes
VALID_MODEL_SIZES: Final[tuple[str, ...]] = (
"tiny",
"tiny.en",
"base",
"base.en",
"small",
"small.en",
"medium",
"medium.en",
"large-v1",
"large-v2",
"large-v3",
)
class FasterWhisperEngine:
"""faster-whisper based ASR engine.
Uses CTranslate2 for efficient Whisper inference on CPU or GPU.
"""
def __init__(
self,
compute_type: str = "int8",
device: str = "cpu",
num_workers: int = 1,
) -> None:
"""Initialize the engine.
Args:
compute_type: Computation type ("int8", "float16", "float32").
device: Device to use ("cpu" or "cuda").
num_workers: Number of worker threads.
"""
self._compute_type = compute_type
self._device = device
self._num_workers = num_workers
self._model = None
self._model_size: str | None = None
def load_model(self, model_size: str = "base") -> None:
"""Load the ASR model.
Args:
model_size: Model size (e.g., "tiny", "base", "small").
Raises:
ValueError: If model_size is invalid.
RuntimeError: If model loading fails.
"""
from faster_whisper import WhisperModel
if model_size not in VALID_MODEL_SIZES:
raise ValueError(
f"Invalid model size: {model_size}. Valid sizes: {', '.join(VALID_MODEL_SIZES)}"
)
logger.info(
"Loading Whisper model '%s' on %s with %s compute...",
model_size,
self._device,
self._compute_type,
)
try:
self._model = WhisperModel(
model_size,
device=self._device,
compute_type=self._compute_type,
num_workers=self._num_workers,
)
self._model_size = model_size
logger.info("Model loaded successfully")
except Exception as e:
raise RuntimeError(f"Failed to load model: {e}") from e
def transcribe(
self,
audio: NDArray[np.float32],
language: str | None = None,
) -> Iterator[AsrResult]:
"""Transcribe audio and yield results.
Args:
audio: Audio samples as float32 array (16kHz mono, normalized).
language: Optional language code (e.g., "en").
Yields:
AsrResult segments with word-level timestamps.
"""
if self._model is None:
raise RuntimeError("Model not loaded. Call load_model() first.")
# Transcribe with word timestamps
segments, info = self._model.transcribe(
audio,
language=language,
word_timestamps=True,
beam_size=5,
vad_filter=True, # Filter out non-speech
)
logger.debug(
"Detected language: %s (prob: %.2f)",
info.language,
info.language_probability,
)
for segment in segments:
# Convert word info to WordTiming objects
words: list[WordTiming] = []
if segment.words:
words = [
WordTiming(
word=word.word,
start=word.start,
end=word.end,
probability=word.probability,
)
for word in segment.words
]
yield AsrResult(
text=segment.text.strip(),
start=segment.start,
end=segment.end,
words=tuple(words),
language=info.language,
language_probability=info.language_probability,
avg_logprob=segment.avg_logprob,
no_speech_prob=segment.no_speech_prob,
)
async def transcribe_async(
self,
audio: NDArray[np.float32],
language: str | None = None,
) -> list[AsrResult]:
"""Transcribe audio asynchronously using executor.
Offloads blocking transcription to a thread pool executor to avoid
blocking the asyncio event loop.
Args:
audio: Audio samples as float32 array (16kHz mono, normalized).
language: Optional language code (e.g., "en").
Returns:
List of AsrResult segments with word-level timestamps.
"""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
None,
partial(lambda a, lang: list(self.transcribe(a, lang)), audio, language),
)
@property
def is_loaded(self) -> bool:
"""Return True if model is loaded."""
return self._model is not None
@property
def model_size(self) -> str | None:
"""Return the loaded model size, or None if not loaded."""
return self._model_size
def unload(self) -> None:
"""Unload the model to free memory."""
self._model = None
self._model_size = None
logger.info("Model unloaded")
@property
def compute_type(self) -> str:
"""Return the compute type."""
return self._compute_type
@property
def device(self) -> str:
"""Return the device."""
return self._device
File: src/noteflow/infrastructure/asr/protocols.py
"""ASR protocols defining contracts for ASR components."""
from __future__ import annotations
from collections.abc import Iterator
from typing import TYPE_CHECKING, Protocol
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
from noteflow.infrastructure.asr.dto import AsrResult
class AsrEngine(Protocol):
"""Protocol for ASR transcription engine.
Implementations should handle model loading, caching, and inference.
"""
def load_model(self, model_size: str = "base") -> None:
"""Load the ASR model.
Downloads the model if not cached.
Args:
model_size: Model size ("tiny", "base", "small", "medium", "large").
Raises:
ValueError: If model_size is invalid.
RuntimeError: If model loading fails.
"""
...
def transcribe(
self,
audio: NDArray[np.float32],
language: str | None = None,
) -> Iterator[AsrResult]:
"""Transcribe audio and yield results.
Args:
audio: Audio samples as float32 array (16kHz mono, normalized).
language: Optional language code (e.g., "en"). Auto-detected if None.
Yields:
AsrResult segments.
Raises:
RuntimeError: If model not loaded.
"""
...
@property
def is_loaded(self) -> bool:
"""Return True if model is loaded."""
...
@property
def model_size(self) -> str | None:
"""Return the loaded model size, or None if not loaded."""
...
def unload(self) -> None:
"""Unload the model to free memory."""
...
File: src/noteflow/infrastructure/audio/capture.py
"""Audio capture implementation using sounddevice.
Provide cross-platform audio input capture with device handling.
"""
from __future__ import annotations
import logging
import time
from typing import TYPE_CHECKING
import numpy as np
import sounddevice as sd
from noteflow.infrastructure.audio.dto import AudioDeviceInfo, AudioFrameCallback
if TYPE_CHECKING:
from numpy.typing import NDArray
logger = logging.getLogger(__name__)
class SoundDeviceCapture:
"""sounddevice-based implementation of AudioCapture.
Handle device enumeration, stream management, and device change detection.
Use PortAudio under the hood for cross-platform audio capture.
"""
def __init__(self) -> None:
"""Initialize the capture instance."""
self._stream: sd.InputStream | None = None
self._callback: AudioFrameCallback | None = None
self._device_id: int | None = None
self._sample_rate: int = 16000
self._channels: int = 1
def list_devices(self) -> list[AudioDeviceInfo]:
"""List available audio input devices.
Returns:
List of AudioDeviceInfo for all available input devices.
"""
devices: list[AudioDeviceInfo] = []
device_list = sd.query_devices()
# Get default input device index
try:
default_input = sd.default.device[0] # Input device index
except (TypeError, IndexError):
default_input = -1
devices.extend(
AudioDeviceInfo(
device_id=idx,
name=dev["name"],
channels=int(dev["max_input_channels"]),
sample_rate=int(dev["default_samplerate"]),
is_default=(idx == default_input),
)
for idx, dev in enumerate(device_list)
if int(dev["max_input_channels"]) > 0
)
return devices
def get_default_device(self) -> AudioDeviceInfo | None:
"""Get the default input device.
Returns:
Default input device info, or None if no input devices available.
"""
devices = self.list_devices()
for dev in devices:
if dev.is_default:
return dev
return devices[0] if devices else None
def start(
self,
device_id: int | None,
on_frames: AudioFrameCallback,
sample_rate: int = 16000,
channels: int = 1,
chunk_duration_ms: int = 100,
) -> None:
"""Start capturing audio from the specified device.
Args:
device_id: Device ID to capture from, or None for default device.
on_frames: Callback receiving (frames, timestamp) for each chunk.
sample_rate: Sample rate in Hz (default 16kHz for ASR).
channels: Number of channels (default 1 for mono).
chunk_duration_ms: Duration of each audio chunk in milliseconds.
Raises:
RuntimeError: If already capturing.
ValueError: If device_id is invalid.
"""
if self._stream is not None:
raise RuntimeError("Already capturing audio")
self._callback = on_frames
self._device_id = device_id
self._sample_rate = sample_rate
self._channels = channels
# Calculate block size from chunk duration
blocksize = int(sample_rate * chunk_duration_ms / 1000)
def _stream_callback(
indata: NDArray[np.float32],
frames: int,
time_info: object, # cffi CData from sounddevice, unused
status: sd.CallbackFlags,
) -> None:
"""Internal sounddevice callback."""
# Suppress unused parameter warnings
_ = frames, time_info
if status:
logger.warning("Audio stream status: %s", status)
if self._callback is not None:
# Copy the data and flatten to 1D array
audio_data = indata.copy().flatten().astype(np.float32)
timestamp = time.monotonic()
self._callback(audio_data, timestamp)
try:
self._stream = sd.InputStream(
device=device_id,
channels=channels,
samplerate=sample_rate,
blocksize=blocksize,
dtype=np.float32,
callback=_stream_callback,
)
self._stream.start()
logger.info(
"Started audio capture: device=%s, rate=%d, channels=%d, blocksize=%d",
device_id,
sample_rate,
channels,
blocksize,
)
except sd.PortAudioError as e:
self._stream = None
self._callback = None
raise RuntimeError(f"Failed to start audio capture: {e}") from e
def stop(self) -> None:
"""Stop audio capture.
Safe to call even if not capturing.
"""
if self._stream is not None:
try:
self._stream.stop()
self._stream.close()
except sd.PortAudioError as e:
logger.warning("Error stopping audio stream: %s", e)
finally:
self._stream = None
self._callback = None
logger.info("Stopped audio capture")
def is_capturing(self) -> bool:
"""Check if currently capturing audio.
Returns:
True if capture is active.
"""
return self._stream is not None and self._stream.active
@property
def current_device_id(self) -> int | None:
"""Get the current device ID being used for capture."""
return self._device_id
@property
def sample_rate(self) -> int:
"""Get the current sample rate."""
return self._sample_rate
@property
def channels(self) -> int:
"""Get the current number of channels."""
return self._channels
File: src/noteflow/infrastructure/audio/dto.py
"""Data Transfer Objects for audio capture.
Define data structures used by audio capture components.
"""
from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
import numpy as np
from numpy.typing import NDArray
@dataclass(frozen=True)
class AudioDeviceInfo:
"""Information about an audio input device."""
device_id: int
name: str
channels: int
sample_rate: int
is_default: bool
@dataclass
class TimestampedAudio:
"""Audio frames with capture timestamp."""
frames: NDArray[np.float32]
timestamp: float # Monotonic time when captured
duration: float # Duration in seconds
def __post_init__(self) -> None:
"""Validate audio data."""
if self.duration < 0:
raise ValueError("Duration must be non-negative")
if self.timestamp < 0:
raise ValueError("Timestamp must be non-negative")
# Type alias for audio frame callback
AudioFrameCallback = Callable[[NDArray[np.float32], float], None]
File: src/noteflow/infrastructure/audio/playback.py
"""Audio playback implementation using sounddevice.
Provide cross-platform audio output playback from ring buffer audio.
"""
from __future__ import annotations
import logging
import threading
from enum import Enum, auto
from typing import TYPE_CHECKING
import numpy as np
import sounddevice as sd
from numpy.typing import NDArray
if TYPE_CHECKING:
from noteflow.infrastructure.audio.dto import TimestampedAudio
logger = logging.getLogger(__name__)
class PlaybackState(Enum):
"""Playback state machine states."""
STOPPED = auto()
PLAYING = auto()
PAUSED = auto()
class SoundDevicePlayback:
"""sounddevice-based implementation of AudioPlayback.
Handle audio output playback with position tracking and state management.
Thread-safe for UI callbacks.
"""
def __init__(self, sample_rate: int = 16000, channels: int = 1) -> None:
"""Initialize the playback instance.
Args:
sample_rate: Sample rate in Hz (default 16kHz for ASR audio).
channels: Number of channels (default 1 for mono).
"""
self._sample_rate = sample_rate
self._channels = channels
# Playback state
self._state = PlaybackState.STOPPED
self._lock = threading.Lock()
# Audio data
self._audio_data: NDArray[np.float32] | None = None
self._total_samples: int = 0
self._current_sample: int = 0
# Stream
self._stream: sd.OutputStream | None = None
def play(self, audio: list[TimestampedAudio]) -> None:
"""Start playback of audio chunks.
Args:
audio: List of TimestampedAudio chunks to play, ordered oldest to newest.
"""
if not audio:
logger.warning("No audio chunks to play")
return
with self._lock:
# Stop any existing playback
self._stop_internal()
# Concatenate all audio frames
frames = [chunk.frames for chunk in audio]
self._audio_data = np.concatenate(frames).astype(np.float32)
self._total_samples = len(self._audio_data)
self._current_sample = 0
# Create and start stream
self._start_stream()
self._state = PlaybackState.PLAYING
logger.info(
"Started playback: %d samples (%.2f seconds)",
self._total_samples,
self.total_duration,
)
def pause(self) -> None:
"""Pause playback.
Safe to call even if not playing.
"""
with self._lock:
if self._state == PlaybackState.PLAYING and self._stream is not None:
self._stream.stop()
self._state = PlaybackState.PAUSED
logger.debug("Paused playback at %.2f seconds", self.current_position)
def resume(self) -> None:
"""Resume paused playback.
No-op if not paused.
"""
with self._lock:
if self._state == PlaybackState.PAUSED and self._stream is not None:
self._stream.start()
self._state = PlaybackState.PLAYING
logger.debug("Resumed playback from %.2f seconds", self.current_position)
def stop(self) -> None:
"""Stop playback and reset position.
Safe to call even if not playing.
"""
with self._lock:
self._stop_internal()
def _stop_internal(self) -> None:
"""Internal stop without lock (caller must hold lock)."""
if self._stream is not None:
try:
self._stream.stop()
self._stream.close()
except sd.PortAudioError as e:
logger.warning("Error stopping playback stream: %s", e)
finally:
self._stream = None
self._state = PlaybackState.STOPPED
self._current_sample = 0
self._audio_data = None
self._total_samples = 0
logger.debug("Stopped playback")
def _start_stream(self) -> None:
"""Start the output stream (caller must hold lock)."""
def _stream_callback(
outdata: NDArray[np.float32],
frames: int,
time_info: object,
status: sd.CallbackFlags,
) -> None:
"""Internal sounddevice output callback."""
_ = time_info # Unused
if status:
logger.warning("Playback stream status: %s", status)
with self._lock:
if self._audio_data is None or self._state != PlaybackState.PLAYING:
# Output silence
outdata.fill(0)
return
# Calculate how many samples we can provide
available = self._total_samples - self._current_sample
to_copy = min(frames, available)
if to_copy > 0:
# Copy audio data to output buffer
outdata[:to_copy, 0] = self._audio_data[
self._current_sample : self._current_sample + to_copy
]
self._current_sample += to_copy
# Fill remaining with silence
if to_copy < frames:
outdata[to_copy:] = 0
# Check if playback is complete
if self._current_sample >= self._total_samples:
# Schedule stop on another thread to avoid deadlock
threading.Thread(target=self._on_playback_complete, daemon=True).start()
try:
self._stream = sd.OutputStream(
channels=self._channels,
samplerate=self._sample_rate,
dtype=np.float32,
callback=_stream_callback,
)
self._stream.start()
except sd.PortAudioError as e:
self._stream = None
raise RuntimeError(f"Failed to start playback stream: {e}") from e
def _on_playback_complete(self) -> None:
"""Handle playback completion."""
logger.info("Playback completed")
self.stop()
def seek(self, position: float) -> bool:
"""Seek to a specific position in the audio.
Thread-safe. Can be called from any thread.
Args:
position: Position in seconds from start of audio.
Returns:
True if seek was successful, False if no audio loaded or position out of bounds.
"""
with self._lock:
if self._audio_data is None:
logger.warning("Cannot seek: no audio loaded")
return False
# Clamp position to valid range
max_position = self._total_samples / self._sample_rate
clamped_position = max(0.0, min(position, max_position))
# Convert to sample position
self._current_sample = int(clamped_position * self._sample_rate)
logger.debug(
"Seeked to %.2f seconds (sample %d)",
clamped_position,
self._current_sample,
)
return True
def is_playing(self) -> bool:
"""Check if currently playing audio.
Returns:
True if playback is active (not paused or stopped).
"""
with self._lock:
return self._state == PlaybackState.PLAYING
@property
def current_position(self) -> float:
"""Current playback position in seconds from start of loaded audio."""
with self._lock:
return self._current_sample / self._sample_rate
@property
def total_duration(self) -> float:
"""Total duration of loaded audio in seconds."""
with self._lock:
return self._total_samples / self._sample_rate
@property
def state(self) -> PlaybackState:
"""Current playback state."""
with self._lock:
return self._state
@property
def sample_rate(self) -> int:
"""Sample rate in Hz."""
return self._sample_rate
@property
def channels(self) -> int:
"""Number of channels."""
return self._channels
File: src/noteflow/infrastructure/audio/protocols.py
"""Audio protocols defining contracts for audio components.
Define Protocol interfaces for audio capture, level metering, and buffering.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Protocol
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
from noteflow.infrastructure.audio.dto import (
AudioDeviceInfo,
AudioFrameCallback,
TimestampedAudio,
)
class AudioCapture(Protocol):
"""Protocol for audio input capture.
Implementations should handle device enumeration, stream management,
and device change detection.
"""
def list_devices(self) -> list[AudioDeviceInfo]:
"""List available audio input devices.
Returns:
List of AudioDeviceInfo for all available input devices.
"""
...
def start(
self,
device_id: int | None,
on_frames: AudioFrameCallback,
sample_rate: int = 16000,
channels: int = 1,
chunk_duration_ms: int = 100,
) -> None:
"""Start capturing audio from the specified device.
Args:
device_id: Device ID to capture from, or None for default device.
on_frames: Callback receiving (frames, timestamp) for each chunk.
sample_rate: Sample rate in Hz (default 16kHz for ASR).
channels: Number of channels (default 1 for mono).
chunk_duration_ms: Duration of each audio chunk in milliseconds.
Raises:
RuntimeError: If already capturing.
ValueError: If device_id is invalid.
"""
...
def stop(self) -> None:
"""Stop audio capture.
Safe to call even if not capturing.
"""
...
def is_capturing(self) -> bool:
"""Check if currently capturing audio.
Returns:
True if capture is active.
"""
...
class AudioLevelProvider(Protocol):
"""Protocol for computing audio levels (VU meter data)."""
def get_rms(self, frames: NDArray[np.float32]) -> float:
"""Calculate RMS level from audio frames.
Args:
frames: Audio samples as float32 array (normalized -1.0 to 1.0).
Returns:
RMS level normalized to 0.0-1.0 range.
"""
...
def get_db(self, frames: NDArray[np.float32]) -> float:
"""Calculate dB level from audio frames.
Args:
frames: Audio samples as float32 array (normalized -1.0 to 1.0).
Returns:
Level in dB (typically -60 to 0 range).
"""
...
class RingBuffer(Protocol):
"""Protocol for timestamped audio ring buffer.
Ring buffers store recent audio with timestamps for ASR processing
and playback sync.
"""
def push(self, audio: TimestampedAudio) -> None:
"""Add audio to the buffer.
Old audio is discarded if buffer exceeds max_duration.
Args:
audio: Timestamped audio chunk to add.
"""
...
def get_window(self, duration_seconds: float) -> list[TimestampedAudio]:
"""Get the last N seconds of audio.
Args:
duration_seconds: How many seconds of audio to retrieve.
Returns:
List of TimestampedAudio chunks, ordered oldest to newest.
"""
...
def clear(self) -> None:
"""Clear all audio from the buffer."""
...
@property
def duration(self) -> float:
"""Total duration of buffered audio in seconds."""
...
@property
def max_duration(self) -> float:
"""Maximum buffer duration in seconds."""
...
class AudioPlayback(Protocol):
"""Protocol for audio output playback.
Implementations should handle output device management, playback state,
and position tracking for sync with UI.
"""
def play(self, audio: list[TimestampedAudio]) -> None:
"""Start playback of audio chunks.
Args:
audio: List of TimestampedAudio chunks to play, ordered oldest to newest.
"""
...
def pause(self) -> None:
"""Pause playback.
Safe to call even if not playing.
"""
...
def resume(self) -> None:
"""Resume paused playback.
No-op if not paused.
"""
...
def stop(self) -> None:
"""Stop playback and reset position.
Safe to call even if not playing.
"""
...
def is_playing(self) -> bool:
"""Check if currently playing audio.
Returns:
True if playback is active (not paused or stopped).
"""
...
@property
def current_position(self) -> float:
"""Current playback position in seconds from start of loaded audio."""
...
@property
def total_duration(self) -> float:
"""Total duration of loaded audio in seconds."""
...
File: src/noteflow/infrastructure/audio/ring_buffer.py
"""Timestamped audio ring buffer implementation.
Store recent audio with timestamps for ASR processing and playback sync.
"""
from __future__ import annotations
from collections import deque
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from noteflow.infrastructure.audio.dto import TimestampedAudio
class TimestampedRingBuffer:
"""Ring buffer for timestamped audio chunks.
Automatically discard old audio when the buffer exceeds max_duration.
Thread-safe for single-producer, single-consumer use.
"""
def __init__(self, max_duration: float = 30.0) -> None:
"""Initialize ring buffer.
Args:
max_duration: Maximum audio duration to keep in seconds.
Raises:
ValueError: If max_duration is not positive.
"""
if max_duration <= 0:
raise ValueError("max_duration must be positive")
self._max_duration = max_duration
self._buffer: deque[TimestampedAudio] = deque()
self._total_duration: float = 0.0
def push(self, audio: TimestampedAudio) -> None:
"""Add audio to the buffer.
Old audio is discarded if buffer exceeds max_duration.
Args:
audio: Timestamped audio chunk to add.
"""
self._buffer.append(audio)
self._total_duration += audio.duration
# Evict old chunks if over capacity
while self._total_duration > self._max_duration and self._buffer:
old = self._buffer.popleft()
self._total_duration -= old.duration
def get_window(self, duration_seconds: float) -> list[TimestampedAudio]:
"""Get the last N seconds of audio.
Args:
duration_seconds: How many seconds of audio to retrieve.
Returns:
List of TimestampedAudio chunks, ordered oldest to newest.
"""
if duration_seconds <= 0:
return []
result: list[TimestampedAudio] = []
accumulated_duration = 0.0
# Iterate from newest to oldest
for audio in reversed(self._buffer):
result.append(audio)
accumulated_duration += audio.duration
if accumulated_duration >= duration_seconds:
break
# Return in chronological order (oldest first)
result.reverse()
return result
def get_all(self) -> list[TimestampedAudio]:
"""Get all buffered audio.
Returns:
List of all TimestampedAudio chunks, ordered oldest to newest.
"""
return list(self._buffer)
def clear(self) -> None:
"""Clear all audio from the buffer."""
self._buffer.clear()
self._total_duration = 0.0
@property
def duration(self) -> float:
"""Total duration of buffered audio in seconds."""
return self._total_duration
@property
def max_duration(self) -> float:
"""Maximum buffer duration in seconds."""
return self._max_duration
@property
def chunk_count(self) -> int:
"""Number of audio chunks in the buffer."""
return len(self._buffer)
def __len__(self) -> int:
"""Return number of chunks in buffer."""
return len(self._buffer)
File: src/noteflow/infrastructure/audio/writer.py
"""Streaming encrypted audio file writer for meetings."""
from __future__ import annotations
import json
import logging
from datetime import UTC, datetime
from pathlib import Path
from typing import TYPE_CHECKING
import numpy as np
from noteflow.infrastructure.security.crypto import ChunkedAssetWriter
if TYPE_CHECKING:
from numpy.typing import NDArray
from noteflow.infrastructure.security.crypto import AesGcmCryptoBox
logger = logging.getLogger(__name__)
class MeetingAudioWriter:
"""Write audio chunks to encrypted meeting file.
Manage meeting directory creation, manifest file, and encrypted audio storage.
Uses ChunkedAssetWriter for the actual encryption.
Directory structure:
~/.noteflow/meetings/<meeting-uuid>/
├── manifest.json # Meeting metadata + wrapped DEK
└── audio.enc # Encrypted PCM16 chunks (NFAE format)
"""
def __init__(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Initialize audio writer.
Args:
crypto: CryptoBox instance for encryption operations.
meetings_dir: Root directory for all meetings (e.g., ~/.noteflow/meetings).
"""
self._crypto = crypto
self._meetings_dir = meetings_dir
self._asset_writer: ChunkedAssetWriter | None = None
self._meeting_dir: Path | None = None
self._sample_rate: int = 16000
self._chunk_count: int = 0
def open(
self,
meeting_id: str,
dek: bytes,
wrapped_dek: bytes,
sample_rate: int = 16000,
) -> None:
"""Open meeting for audio writing.
Create meeting directory, write manifest, open encrypted audio file.
Args:
meeting_id: Meeting UUID string.
dek: Unwrapped data encryption key (32 bytes).
wrapped_dek: Encrypted DEK to store in manifest.
sample_rate: Audio sample rate (default 16000 Hz).
Raises:
RuntimeError: If already open.
OSError: If directory creation fails.
"""
if self._asset_writer is not None:
raise RuntimeError("Writer already open")
# Create meeting directory
self._meeting_dir = self._meetings_dir / meeting_id
self._meeting_dir.mkdir(parents=True, exist_ok=True)
# Write manifest.json
manifest = {
"meeting_id": meeting_id,
"created_at": datetime.now(UTC).isoformat(),
"sample_rate": sample_rate,
"channels": 1,
"format": "pcm16",
"wrapped_dek": wrapped_dek.hex(), # Store as hex string
}
manifest_path = self._meeting_dir / "manifest.json"
manifest_path.write_text(json.dumps(manifest, indent=2))
# Open encrypted audio file
audio_path = self._meeting_dir / "audio.enc"
self._asset_writer = ChunkedAssetWriter(self._crypto)
self._asset_writer.open(audio_path, dek)
self._sample_rate = sample_rate
self._chunk_count = 0
logger.info(
"Opened audio writer: meeting=%s, dir=%s",
meeting_id,
self._meeting_dir,
)
def write_chunk(self, audio: NDArray[np.float32]) -> None:
"""Write audio chunk (convert float32 → PCM16).
Args:
audio: Audio samples as float32 array (-1.0 to 1.0).
Raises:
RuntimeError: If not open.
"""
if self._asset_writer is None or not self._asset_writer.is_open:
raise RuntimeError("Writer not open")
# Convert float32 [-1.0, 1.0] to int16 [-32768, 32767]
# Clamp to prevent overflow on conversion
audio_clamped = np.clip(audio, -1.0, 1.0)
pcm16 = (audio_clamped * 32767.0).astype(np.int16)
# Write as raw bytes (platform-native endianness, typically little-endian)
self._asset_writer.write_chunk(pcm16.tobytes())
self._chunk_count += 1
def close(self) -> None:
"""Close audio writer and finalize files.
Safe to call if already closed or never opened.
"""
if self._asset_writer is not None:
bytes_written = self._asset_writer.bytes_written
self._asset_writer.close()
self._asset_writer = None
logger.info(
"Closed audio writer: dir=%s, chunks=%d, bytes=%d",
self._meeting_dir,
self._chunk_count,
bytes_written,
)
self._meeting_dir = None
self._chunk_count = 0
@property
def is_open(self) -> bool:
"""Check if writer is currently open for writing."""
return self._asset_writer is not None and self._asset_writer.is_open
@property
def bytes_written(self) -> int:
"""Total encrypted bytes written to audio.enc file."""
return 0 if self._asset_writer is None else self._asset_writer.bytes_written
@property
def chunk_count(self) -> int:
"""Number of audio chunks written."""
return self._chunk_count
@property
def meeting_dir(self) -> Path | None:
"""Current meeting directory, or None if not open."""
return self._meeting_dir
File: src/noteflow/infrastructure/converters/init.py
"""Infrastructure converters for data transformation between layers."""
from noteflow.infrastructure.converters.asr_converters import AsrConverter
from noteflow.infrastructure.converters.orm_converters import OrmConverter
__all__ = [
"AsrConverter",
"OrmConverter",
]
File: src/noteflow/infrastructure/converters/asr_converters.py
"""Convert ASR DTOs to domain entities."""
from __future__ import annotations
from typing import TYPE_CHECKING
from noteflow.domain.entities import WordTiming
if TYPE_CHECKING:
from noteflow.infrastructure.asr import dto
from noteflow.infrastructure.asr.dto import AsrResult
class AsrConverter:
"""Convert ASR DTOs to domain entities."""
@staticmethod
def word_timing_to_domain(asr_word: dto.WordTiming) -> WordTiming:
"""Convert ASR WordTiming DTO to domain WordTiming entity.
Map field names from ASR convention (start/end) to domain
convention (start_time/end_time).
Args:
asr_word: ASR WordTiming DTO from faster-whisper engine.
Returns:
Domain WordTiming entity with validated timing.
Raises:
ValueError: If timing validation fails.
"""
return WordTiming(
word=asr_word.word,
start_time=asr_word.start,
end_time=asr_word.end,
probability=asr_word.probability,
)
@staticmethod
def result_to_domain_words(result: AsrResult) -> list[WordTiming]:
"""Convert all words from ASR result to domain entities.
Args:
result: ASR transcription result with word timings.
Returns:
List of domain WordTiming entities.
"""
return [AsrConverter.word_timing_to_domain(word) for word in result.words]
File: src/noteflow/infrastructure/diarization/init.py
"""Speaker diarization infrastructure module.
Provides speaker diarization using pyannote.audio (offline) and diart (streaming).
"""
from noteflow.infrastructure.diarization.assigner import (
assign_speaker,
assign_speakers_batch,
)
from noteflow.infrastructure.diarization.dto import SpeakerTurn
from noteflow.infrastructure.diarization.engine import DiarizationEngine
__all__ = [
"DiarizationEngine",
"SpeakerTurn",
"assign_speaker",
"assign_speakers_batch",
]
File: src/noteflow/infrastructure/diarization/assigner.py
"""Speaker assignment utilities for mapping diarization to segments.
Provides functions to assign speaker labels to transcript segments based on
diarization output using timestamp overlap matching.
"""
from __future__ import annotations
from collections.abc import Sequence
from noteflow.infrastructure.diarization.dto import SpeakerTurn
def assign_speaker(
start_time: float,
end_time: float,
turns: Sequence[SpeakerTurn],
) -> tuple[str | None, float]:
"""Assign a speaker to a time range based on diarization turns.
Uses maximum overlap duration to determine the dominant speaker
for the given time range.
Args:
start_time: Segment start time in seconds.
end_time: Segment end time in seconds.
turns: Sequence of speaker turns from diarization.
Returns:
Tuple of (speaker_id, confidence) where speaker_id is None if
no overlapping turns found. Confidence is the ratio of overlap
duration to segment duration.
"""
if not turns:
return None, 0.0
segment_duration = end_time - start_time
if segment_duration <= 0:
return None, 0.0
best_speaker: str | None = None
best_overlap: float = 0.0
for turn in turns:
overlap = turn.overlap_duration(start_time, end_time)
if overlap > best_overlap:
best_overlap = overlap
best_speaker = turn.speaker
if best_speaker is None:
return None, 0.0
confidence = best_overlap / segment_duration
return best_speaker, confidence
def assign_speakers_batch(
segments: Sequence[tuple[float, float]],
turns: Sequence[SpeakerTurn],
) -> list[tuple[str | None, float]]:
"""Assign speakers to multiple segments in batch.
Args:
segments: Sequence of (start_time, end_time) tuples.
turns: Sequence of speaker turns from diarization.
Returns:
List of (speaker_id, confidence) tuples, one per segment.
"""
return [assign_speaker(start, end, turns) for start, end in segments]
File: src/noteflow/infrastructure/diarization/dto.py
"""Data Transfer Objects for speaker diarization.
These DTOs define the data structures used by diarization components.
"""
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class SpeakerTurn:
"""Speaker turn from diarization output.
Represents a time segment where a specific speaker is talking.
"""
speaker: str # Speaker label (e.g., "SPEAKER_00")
start: float # Start time in seconds
end: float # End time in seconds
confidence: float = 1.0 # Confidence score (0.0-1.0)
def __post_init__(self) -> None:
"""Validate turn data."""
if self.end < self.start:
raise ValueError(f"Turn end ({self.end}) < start ({self.start})")
if not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be 0.0-1.0, got {self.confidence}")
@property
def duration(self) -> float:
"""Duration of the turn in seconds."""
return self.end - self.start
def overlaps(self, start: float, end: float) -> bool:
"""Check if this turn overlaps with a time range.
Args:
start: Range start time in seconds.
end: Range end time in seconds.
Returns:
True if there is any overlap.
"""
return self.start < end and self.end > start
def overlap_duration(self, start: float, end: float) -> float:
"""Calculate overlap duration with a time range.
Args:
start: Range start time in seconds.
end: Range end time in seconds.
Returns:
Overlap duration in seconds (0.0 if no overlap).
"""
overlap_start = max(self.start, start)
overlap_end = min(self.end, end)
return max(0.0, overlap_end - overlap_start)
File: src/noteflow/infrastructure/diarization/engine.py
"""Diarization engine implementation using pyannote.audio and diart.
Provides speaker diarization for both streaming (real-time) and
offline (post-meeting) processing.
Requires optional dependencies: pip install noteflow[diarization]
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from noteflow.infrastructure.diarization.dto import SpeakerTurn
if TYPE_CHECKING:
from collections.abc import Sequence
import numpy as np
from numpy.typing import NDArray
from pyannote.core import Annotation
logger = logging.getLogger(__name__)
class DiarizationEngine:
"""Speaker diarization engine using pyannote.audio and diart.
Supports both streaming (real-time via diart) and offline
(post-meeting via pyannote.audio) diarization modes.
"""
def __init__(
self,
device: str = "auto",
hf_token: str | None = None,
streaming_latency: float = 0.5,
min_speakers: int = 1,
max_speakers: int = 10,
) -> None:
"""Initialize the diarization engine.
Args:
device: Device to use ("auto", "cpu", "cuda", "mps").
"auto" selects CUDA > MPS > CPU based on availability.
hf_token: HuggingFace token for pyannote model access.
streaming_latency: Latency for streaming diarization in seconds.
min_speakers: Minimum expected speakers for offline diarization.
max_speakers: Maximum expected speakers for offline diarization.
"""
self._device_preference = device
self._device: str | None = None
self._hf_token = hf_token
self._streaming_latency = streaming_latency
self._min_speakers = min_speakers
self._max_speakers = max_speakers
# Lazy-loaded models
self._streaming_pipeline = None
self._offline_pipeline = None
def _resolve_device(self) -> str:
"""Resolve the actual device to use based on availability.
Returns:
Device string ("cuda", "mps", or "cpu").
"""
if self._device is not None:
return self._device
import torch
if self._device_preference == "auto":
if torch.cuda.is_available():
self._device = "cuda"
elif torch.backends.mps.is_available():
self._device = "mps"
else:
self._device = "cpu"
else:
self._device = self._device_preference
logger.info("Diarization device resolved to: %s", self._device)
return self._device
def load_streaming_model(self) -> None:
"""Load the streaming diarization model (diart).
Raises:
RuntimeError: If model loading fails.
ValueError: If HuggingFace token is not provided.
"""
if self._streaming_pipeline is not None:
logger.debug("Streaming model already loaded")
return
if not self._hf_token:
raise ValueError("HuggingFace token required for pyannote models")
device = self._resolve_device()
logger.info(
"Loading streaming diarization model on %s with latency %.2fs...",
device,
self._streaming_latency,
)
try:
from diart import SpeakerDiarization, SpeakerDiarizationConfig
from diart.models import EmbeddingModel, SegmentationModel
segmentation = SegmentationModel.from_pretrained(
"pyannote/segmentation-3.0",
use_hf_token=self._hf_token,
)
embedding = EmbeddingModel.from_pretrained(
"pyannote/wespeaker-voxceleb-resnet34-LM",
use_hf_token=self._hf_token,
)
config = SpeakerDiarizationConfig(
segmentation=segmentation,
embedding=embedding,
step=self._streaming_latency,
latency=self._streaming_latency,
device=device,
)
self._streaming_pipeline = SpeakerDiarization(config)
logger.info("Streaming diarization model loaded successfully")
except Exception as e:
raise RuntimeError(f"Failed to load streaming diarization model: {e}") from e
def load_offline_model(self) -> None:
"""Load the offline diarization model (pyannote.audio).
Raises:
RuntimeError: If model loading fails.
ValueError: If HuggingFace token is not provided.
"""
if self._offline_pipeline is not None:
logger.debug("Offline model already loaded")
return
if not self._hf_token:
raise ValueError("HuggingFace token required for pyannote models")
device = self._resolve_device()
logger.info("Loading offline diarization model on %s...", device)
try:
import torch
from pyannote.audio import Pipeline
self._offline_pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=self._hf_token,
)
torch_device = torch.device(device)
self._offline_pipeline.to(torch_device)
logger.info("Offline diarization model loaded successfully")
except Exception as e:
raise RuntimeError(f"Failed to load offline diarization model: {e}") from e
def process_chunk(
self,
audio: NDArray[np.float32],
sample_rate: int = 16000,
) -> Sequence[SpeakerTurn]:
"""Process an audio chunk for streaming diarization.
Args:
audio: Audio samples as float32 array (mono).
sample_rate: Audio sample rate in Hz.
Returns:
Sequence of speaker turns detected in this chunk.
Raises:
RuntimeError: If streaming model not loaded.
"""
if self._streaming_pipeline is None:
raise RuntimeError("Streaming model not loaded. Call load_streaming_model() first.")
from pyannote.core import SlidingWindowFeature
# Reshape audio for diart: (samples,) -> (1, samples)
if audio.ndim == 1:
audio = audio.reshape(1, -1)
# Create SlidingWindowFeature for diart
from pyannote.core import SlidingWindow
duration = audio.shape[1] / sample_rate
window = SlidingWindow(start=0.0, duration=duration, step=duration)
waveform = SlidingWindowFeature(audio, window)
# Process through pipeline
results = self._streaming_pipeline([waveform])
turns: list[SpeakerTurn] = []
for annotation, _ in results:
turns.extend(self._annotation_to_turns(annotation))
return turns
def diarize_full(
self,
audio: NDArray[np.float32],
sample_rate: int = 16000,
num_speakers: int | None = None,
) -> Sequence[SpeakerTurn]:
"""Diarize a complete audio recording.
Args:
audio: Audio samples as float32 array (mono).
sample_rate: Audio sample rate in Hz.
num_speakers: Known number of speakers (None for auto-detect).
Returns:
Sequence of speaker turns for the full recording.
Raises:
RuntimeError: If offline model not loaded.
"""
if self._offline_pipeline is None:
raise RuntimeError("Offline model not loaded. Call load_offline_model() first.")
import torch
# Prepare audio tensor: (samples,) -> (channels, samples)
if audio.ndim == 1:
audio_tensor = torch.from_numpy(audio).unsqueeze(0)
else:
audio_tensor = torch.from_numpy(audio)
# Create waveform dict for pyannote
waveform = {"waveform": audio_tensor, "sample_rate": sample_rate}
logger.debug(
"Running offline diarization on %.2fs audio",
audio_tensor.shape[1] / sample_rate,
)
# Run diarization with speaker hints
if num_speakers is not None:
annotation = self._offline_pipeline(waveform, num_speakers=num_speakers)
else:
annotation = self._offline_pipeline(
waveform,
min_speakers=self._min_speakers,
max_speakers=self._max_speakers,
)
return self._annotation_to_turns(annotation)
def _annotation_to_turns(self, annotation: Annotation) -> list[SpeakerTurn]:
"""Convert pyannote Annotation to SpeakerTurn list.
Args:
annotation: Pyannote diarization annotation.
Returns:
List of SpeakerTurn objects.
"""
turns: list[SpeakerTurn] = []
# itertracks(yield_label=True) returns 3-tuples: (segment, track, label)
for track in annotation.itertracks(yield_label=True):
# Unpack with len check for type safety with pyannote's union return
if len(track) == 3:
segment, _, speaker = track
turns.append(
SpeakerTurn(
speaker=str(speaker),
start=segment.start,
end=segment.end,
)
)
return turns
def reset_streaming(self) -> None:
"""Reset streaming pipeline state for a new recording."""
if self._streaming_pipeline is not None:
self._streaming_pipeline.reset()
logger.debug("Streaming pipeline state reset")
def unload(self) -> None:
"""Unload all models to free memory."""
self._streaming_pipeline = None
self._offline_pipeline = None
self._device = None
logger.info("Diarization models unloaded")
@property
def is_streaming_loaded(self) -> bool:
"""Return True if streaming model is loaded."""
return self._streaming_pipeline is not None
@property
def is_offline_loaded(self) -> bool:
"""Return True if offline model is loaded."""
return self._offline_pipeline is not None
@property
def device(self) -> str | None:
"""Return the resolved device, or None if not yet resolved."""
return self._device
File: src/noteflow/infrastructure/export/init.py
"""Export infrastructure module.
Provide transcript export functionality to various file formats.
"""
from noteflow.infrastructure.export.html import HtmlExporter
from noteflow.infrastructure.export.markdown import MarkdownExporter
from noteflow.infrastructure.export.protocols import TranscriptExporter
__all__ = [
"HtmlExporter",
"MarkdownExporter",
"TranscriptExporter",
]
File: src/noteflow/infrastructure/export/_formatting.py
"""Shared formatting utilities for export modules."""
from __future__ import annotations
from datetime import datetime
def format_timestamp(seconds: float) -> str:
"""Format seconds as MM:SS or HH:MM:SS.
Args:
seconds: Time in seconds.
Returns:
Formatted time string.
"""
total_seconds = int(seconds)
hours, remainder = divmod(total_seconds, 3600)
minutes, secs = divmod(remainder, 60)
if hours > 0:
return f"{hours:d}:{minutes:02d}:{secs:02d}"
return f"{minutes:d}:{secs:02d}"
def format_datetime(dt: datetime | None) -> str:
"""Format datetime for display.
Args:
dt: Datetime to format.
Returns:
Formatted datetime string or empty string.
"""
return "" if dt is None else dt.strftime("%Y-%m-%d %H:%M:%S")
File: src/noteflow/infrastructure/export/protocols.py
"""Export protocols defining contracts for transcript exporters.
Define Protocol interfaces for exporting meeting transcripts to various formats.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Protocol
if TYPE_CHECKING:
from collections.abc import Sequence
from noteflow.domain.entities.meeting import Meeting
from noteflow.domain.entities.segment import Segment
class TranscriptExporter(Protocol):
"""Protocol for exporting meeting transcripts to file formats.
Implementations should produce formatted output for the target format
(e.g., Markdown, HTML) from meeting data.
"""
def export(
self,
meeting: Meeting,
segments: Sequence[Segment],
) -> str:
"""Export meeting transcript to formatted string.
Args:
meeting: Meeting entity with metadata.
segments: Ordered list of transcript segments.
Returns:
Formatted transcript string in target format.
"""
...
@property
def format_name(self) -> str:
"""Human-readable format name (e.g., 'Markdown', 'HTML')."""
...
@property
def file_extension(self) -> str:
"""File extension for this format (e.g., '.md', '.html')."""
...
File: src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py
"""initial_schema
Revision ID: 6a9d9f408f40
Revises:
Create Date: 2025-12-16 19:10:55.135444
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "6a9d9f408f40"
down_revision: str | Sequence[str] | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
# Vector dimension for embeddings (OpenAI compatible)
EMBEDDING_DIM = 1536
def upgrade() -> None:
"""Create NoteFlow schema and tables."""
# Create schema
op.execute("CREATE SCHEMA IF NOT EXISTS noteflow")
# Enable pgvector extension
try:
op.execute("CREATE EXTENSION IF NOT EXISTS vector")
except sa.exc.ProgrammingError as e:
raise RuntimeError(
f"Failed to create pgvector extension: {e}. "
"Ensure the database user has CREATE EXTENSION privileges, or "
"install pgvector manually: CREATE EXTENSION vector;"
) from e
# Create meetings table
op.create_table(
"meetings",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("title", sa.String(255), nullable=False),
sa.Column("state", sa.Integer(), nullable=False, server_default="1"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"metadata",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default="{}",
),
sa.Column("wrapped_dek", sa.LargeBinary(), nullable=True),
schema="noteflow",
)
# Create segments table
op.create_table(
"segments",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"meeting_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("segment_id", sa.Integer(), nullable=False),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("start_time", sa.Float(), nullable=False),
sa.Column("end_time", sa.Float(), nullable=False),
sa.Column("language", sa.String(10), nullable=False, server_default="en"),
sa.Column("language_confidence", sa.Float(), nullable=False, server_default="0.0"),
sa.Column("avg_logprob", sa.Float(), nullable=False, server_default="0.0"),
sa.Column("no_speech_prob", sa.Float(), nullable=False, server_default="0.0"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
schema="noteflow",
)
# Add vector column for embeddings (pgvector)
op.execute(f"ALTER TABLE noteflow.segments ADD COLUMN embedding vector({EMBEDDING_DIM})")
# Create index for vector similarity search
op.execute(
"CREATE INDEX IF NOT EXISTS ix_segments_embedding "
"ON noteflow.segments USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)"
)
# Create index for meeting_id lookups
op.create_index(
"ix_segments_meeting_id",
"segments",
["meeting_id"],
schema="noteflow",
)
# Create word_timings table
op.create_table(
"word_timings",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"segment_pk",
sa.Integer(),
sa.ForeignKey("noteflow.segments.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("word", sa.String(255), nullable=False),
sa.Column("start_time", sa.Float(), nullable=False),
sa.Column("end_time", sa.Float(), nullable=False),
sa.Column("probability", sa.Float(), nullable=False),
schema="noteflow",
)
# Create index for segment_pk lookups
op.create_index(
"ix_word_timings_segment_pk",
"word_timings",
["segment_pk"],
schema="noteflow",
)
# Create summaries table
op.create_table(
"summaries",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"meeting_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
unique=True,
),
sa.Column("executive_summary", sa.Text(), nullable=True),
sa.Column(
"generated_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column("model_version", sa.String(50), nullable=True),
schema="noteflow",
)
# Create key_points table
op.create_table(
"key_points",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"summary_id",
sa.Integer(),
sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("start_time", sa.Float(), nullable=False, server_default="0.0"),
sa.Column("end_time", sa.Float(), nullable=False, server_default="0.0"),
sa.Column(
"segment_ids",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default="[]",
),
schema="noteflow",
)
# Create index for summary_id lookups
op.create_index(
"ix_key_points_summary_id",
"key_points",
["summary_id"],
schema="noteflow",
)
# Create action_items table
op.create_table(
"action_items",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"summary_id",
sa.Integer(),
sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("assignee", sa.String(255), nullable=False, server_default=""),
sa.Column("due_date", sa.DateTime(timezone=True), nullable=True),
sa.Column("priority", sa.Integer(), nullable=False, server_default="0"),
sa.Column(
"segment_ids",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default="[]",
),
schema="noteflow",
)
# Create index for summary_id lookups
op.create_index(
"ix_action_items_summary_id",
"action_items",
["summary_id"],
schema="noteflow",
)
def downgrade() -> None:
"""Drop all NoteFlow tables and schema."""
# Drop tables in reverse order (respecting foreign keys)
op.drop_table("action_items", schema="noteflow")
op.drop_table("key_points", schema="noteflow")
op.drop_table("summaries", schema="noteflow")
op.drop_table("word_timings", schema="noteflow")
op.drop_table("segments", schema="noteflow")
op.drop_table("meetings", schema="noteflow")
# Drop schema
op.execute("DROP SCHEMA IF EXISTS noteflow CASCADE")
File: src/noteflow/infrastructure/persistence/migrations/versions/b5c3e8a2d1f0_add_annotations_table.py
"""add_annotations_table
Revision ID: b5c3e8a2d1f0
Revises: 6a9d9f408f40
Create Date: 2025-12-17 10:00:00.000000
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "b5c3e8a2d1f0"
down_revision: str | Sequence[str] | None = "6a9d9f408f40"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Create annotations table for user-created annotations during recording."""
op.create_table(
"annotations",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"annotation_id",
postgresql.UUID(as_uuid=True),
nullable=False,
unique=True,
),
sa.Column(
"meeting_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("annotation_type", sa.String(50), nullable=False),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("start_time", sa.Float(), nullable=False),
sa.Column("end_time", sa.Float(), nullable=False),
sa.Column(
"segment_ids",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default="[]",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
schema="noteflow",
)
# Create index for meeting_id lookups
op.create_index(
"ix_annotations_meeting_id",
"annotations",
["meeting_id"],
schema="noteflow",
)
# Create index for time-based queries
op.create_index(
"ix_annotations_time_range",
"annotations",
["meeting_id", "start_time", "end_time"],
schema="noteflow",
)
def downgrade() -> None:
"""Drop annotations table."""
op.drop_index("ix_annotations_time_range", table_name="annotations", schema="noteflow")
op.drop_index("ix_annotations_meeting_id", table_name="annotations", schema="noteflow")
op.drop_table("annotations", schema="noteflow")
File: src/noteflow/infrastructure/persistence/migrations/versions/c7d4e9f3a2b1_add_speaker_fields_to_segments.py
"""add_speaker_fields_to_segments
Revision ID: c7d4e9f3a2b1
Revises: b5c3e8a2d1f0
Create Date: 2025-12-18 16:00:00.000000
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "c7d4e9f3a2b1"
down_revision: str | Sequence[str] | None = "b5c3e8a2d1f0"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Add speaker_id and speaker_confidence columns to segments table."""
op.add_column(
"segments",
sa.Column("speaker_id", sa.String(50), nullable=True),
schema="noteflow",
)
op.add_column(
"segments",
sa.Column("speaker_confidence", sa.Float(), nullable=False, server_default="0.0"),
schema="noteflow",
)
def downgrade() -> None:
"""Remove speaker_id and speaker_confidence columns from segments table."""
op.drop_column("segments", "speaker_confidence", schema="noteflow")
op.drop_column("segments", "speaker_id", schema="noteflow")
File: src/noteflow/infrastructure/persistence/migrations/init.py
"""Alembic database migrations for NoteFlow."""
File: src/noteflow/infrastructure/persistence/migrations/README
Generic single-database configuration with an async dbapi.
File: src/noteflow/infrastructure/persistence/migrations/script.py.mako
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
"""Upgrade schema."""
${upgrades if upgrades else "pass"}
def downgrade() -> None:
"""Downgrade schema."""
${downgrades if downgrades else "pass"}
File: src/noteflow/infrastructure/persistence/repositories/init.py
"""Repository implementations for NoteFlow."""
from .annotation_repo import SqlAlchemyAnnotationRepository
from .meeting_repo import SqlAlchemyMeetingRepository
from .segment_repo import SqlAlchemySegmentRepository
from .summary_repo import SqlAlchemySummaryRepository
__all__ = [
"SqlAlchemyAnnotationRepository",
"SqlAlchemyMeetingRepository",
"SqlAlchemySegmentRepository",
"SqlAlchemySummaryRepository",
]
File: src/noteflow/infrastructure/persistence/repositories/_base.py
"""Base repository providing common SQLAlchemy patterns."""
from __future__ import annotations
from typing import TYPE_CHECKING, TypeVar
from sqlalchemy.ext.asyncio import AsyncSession
if TYPE_CHECKING:
from sqlalchemy.sql import Select
TModel = TypeVar("TModel")
class BaseRepository:
"""Base class for SQLAlchemy repositories.
Provides common session management and helper methods for
executing queries and persisting models.
"""
def __init__(self, session: AsyncSession) -> None:
"""Initialize repository with database session.
Args:
session: SQLAlchemy async session.
"""
self._session = session
async def _execute_scalar(
self,
stmt: Select[tuple[TModel]],
) -> TModel | None:
"""Execute statement and return single scalar result.
Args:
stmt: SQLAlchemy select statement.
Returns:
Single model instance or None if not found.
"""
result = await self._session.execute(stmt)
return result.scalar_one_or_none()
async def _execute_scalars(
self,
stmt: Select[tuple[TModel]],
) -> list[TModel]:
"""Execute statement and return all scalar results.
Args:
stmt: SQLAlchemy select statement.
Returns:
List of model instances.
"""
result = await self._session.execute(stmt)
return list(result.scalars().all())
async def _add_and_flush(self, model: TModel) -> TModel:
"""Add model to session and flush.
Args:
model: ORM model instance to persist.
Returns:
The persisted model with generated fields populated.
"""
self._session.add(model)
await self._session.flush()
return model
async def _delete_and_flush(self, model: object) -> None:
"""Delete model from session and flush.
Args:
model: ORM model instance to delete.
"""
await self._session.delete(model)
await self._session.flush()
File: src/noteflow/infrastructure/persistence/init.py
"""Persistence infrastructure for NoteFlow."""
from .database import create_async_engine, get_async_session_factory
from .unit_of_work import SqlAlchemyUnitOfWork
__all__ = [
"SqlAlchemyUnitOfWork",
"create_async_engine",
"get_async_session_factory",
]
File: src/noteflow/infrastructure/persistence/database.py
"""Database connection and session management."""
from __future__ import annotations
from collections.abc import AsyncGenerator
from typing import TYPE_CHECKING
from sqlalchemy.ext.asyncio import (
AsyncEngine,
AsyncSession,
async_sessionmaker,
)
from sqlalchemy.ext.asyncio import (
create_async_engine as sa_create_async_engine,
)
if TYPE_CHECKING:
from noteflow.config import Settings
def create_async_engine(settings: Settings) -> AsyncEngine:
"""Create an async SQLAlchemy engine.
Args:
settings: Application settings with database URL.
Returns:
Configured async engine.
"""
return sa_create_async_engine(
settings.database_url_str,
pool_size=settings.db_pool_size,
echo=settings.db_echo,
pool_pre_ping=True, # Verify connections before use
)
def get_async_session_factory(
engine: AsyncEngine,
) -> async_sessionmaker[AsyncSession]:
"""Create an async session factory.
Args:
engine: SQLAlchemy async engine.
Returns:
Session factory for creating async sessions.
"""
return async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
autocommit=False,
autoflush=False,
)
async def get_async_session(
session_factory: async_sessionmaker[AsyncSession],
) -> AsyncGenerator[AsyncSession, None]:
"""Yield an async database session.
Args:
session_factory: Factory for creating sessions.
Yields:
Async database session that is closed after use.
"""
async with session_factory() as session:
yield session
def create_async_session_factory(
database_url: str,
pool_size: int = 5,
echo: bool = False,
) -> async_sessionmaker[AsyncSession]:
"""Create an async session factory from a database URL string.
Convenience function for creating a session factory directly from a URL.
Args:
database_url: PostgreSQL database URL.
pool_size: Connection pool size.
echo: Enable SQL echo logging.
Returns:
Async session factory.
"""
engine = sa_create_async_engine(
database_url,
pool_size=pool_size,
echo=echo,
pool_pre_ping=True,
)
return async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
autocommit=False,
autoflush=False,
)
File: src/noteflow/infrastructure/security/init.py
"""Security infrastructure module.
Provides encryption and key management using OS credential stores.
"""
from noteflow.infrastructure.security.crypto import (
AesGcmCryptoBox,
ChunkedAssetReader,
ChunkedAssetWriter,
)
from noteflow.infrastructure.security.keystore import InMemoryKeyStore, KeyringKeyStore
from noteflow.infrastructure.security.protocols import (
CryptoBox,
EncryptedAssetReader,
EncryptedAssetWriter,
EncryptedChunk,
KeyStore,
)
__all__ = [
"AesGcmCryptoBox",
"ChunkedAssetReader",
"ChunkedAssetWriter",
"CryptoBox",
"EncryptedAssetReader",
"EncryptedAssetWriter",
"EncryptedChunk",
"InMemoryKeyStore",
"KeyStore",
"KeyringKeyStore",
]
File: src/noteflow/infrastructure/security/crypto.py
"""Cryptographic operations implementation using cryptography library.
Provides AES-GCM encryption for audio data with envelope encryption.
"""
from __future__ import annotations
import logging
import secrets
import struct
from collections.abc import Iterator
from pathlib import Path
from typing import TYPE_CHECKING, BinaryIO, Final
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from noteflow.infrastructure.security.protocols import EncryptedChunk
if TYPE_CHECKING:
from noteflow.infrastructure.security.keystore import InMemoryKeyStore, KeyringKeyStore
logger = logging.getLogger(__name__)
# Constants
KEY_SIZE: Final[int] = 32 # 256-bit key
NONCE_SIZE: Final[int] = 12 # 96-bit nonce for AES-GCM
TAG_SIZE: Final[int] = 16 # 128-bit authentication tag
# File format magic number and version
FILE_MAGIC: Final[bytes] = b"NFAE" # NoteFlow Audio Encrypted
FILE_VERSION: Final[int] = 1
class AesGcmCryptoBox:
"""AES-GCM based encryption with envelope encryption.
Uses a master key to wrap/unwrap per-meeting Data Encryption Keys (DEKs).
Each audio chunk is encrypted with AES-256-GCM using the DEK.
"""
def __init__(self, keystore: KeyringKeyStore | InMemoryKeyStore) -> None:
"""Initialize the crypto box.
Args:
keystore: KeyStore instance for master key access.
"""
self._keystore = keystore
self._master_cipher: AESGCM | None = None
def _get_master_cipher(self) -> AESGCM:
"""Get or create the master key cipher."""
if self._master_cipher is None:
master_key = self._keystore.get_or_create_master_key()
self._master_cipher = AESGCM(master_key)
return self._master_cipher
def generate_dek(self) -> bytes:
"""Generate a new Data Encryption Key.
Returns:
32-byte random DEK.
"""
return secrets.token_bytes(KEY_SIZE)
def wrap_dek(self, dek: bytes) -> bytes:
"""Encrypt DEK with master key.
Args:
dek: Data Encryption Key to wrap.
Returns:
Encrypted DEK (nonce || ciphertext || tag).
"""
cipher = self._get_master_cipher()
nonce = secrets.token_bytes(NONCE_SIZE)
ciphertext = cipher.encrypt(nonce, dek, associated_data=None)
# Return nonce || ciphertext (tag is appended by AESGCM)
return nonce + ciphertext
def unwrap_dek(self, wrapped_dek: bytes) -> bytes:
"""Decrypt DEK with master key.
Args:
wrapped_dek: Encrypted DEK from wrap_dek().
Returns:
Original DEK.
Raises:
ValueError: If decryption fails.
"""
if len(wrapped_dek) < NONCE_SIZE + KEY_SIZE + TAG_SIZE:
raise ValueError("Invalid wrapped DEK: too short")
cipher = self._get_master_cipher()
nonce = wrapped_dek[:NONCE_SIZE]
ciphertext = wrapped_dek[NONCE_SIZE:]
try:
return cipher.decrypt(nonce, ciphertext, associated_data=None)
except Exception as e:
raise ValueError(f"DEK unwrap failed: {e}") from e
def encrypt_chunk(self, plaintext: bytes, dek: bytes) -> EncryptedChunk:
"""Encrypt a chunk of data with AES-GCM.
Args:
plaintext: Data to encrypt.
dek: Data Encryption Key.
Returns:
EncryptedChunk with nonce, ciphertext, and tag.
"""
cipher = AESGCM(dek)
nonce = secrets.token_bytes(NONCE_SIZE)
# AESGCM appends the tag to ciphertext
ciphertext_with_tag = cipher.encrypt(nonce, plaintext, associated_data=None)
# Split ciphertext and tag
ciphertext = ciphertext_with_tag[:-TAG_SIZE]
tag = ciphertext_with_tag[-TAG_SIZE:]
return EncryptedChunk(nonce=nonce, ciphertext=ciphertext, tag=tag)
def decrypt_chunk(self, chunk: EncryptedChunk, dek: bytes) -> bytes:
"""Decrypt a chunk of data.
Args:
chunk: EncryptedChunk to decrypt.
dek: Data Encryption Key.
Returns:
Original plaintext.
Raises:
ValueError: If decryption fails.
"""
cipher = AESGCM(dek)
# Reconstruct ciphertext with tag for AESGCM
ciphertext_with_tag = chunk.ciphertext + chunk.tag
try:
return cipher.decrypt(chunk.nonce, ciphertext_with_tag, associated_data=None)
except Exception as e:
raise ValueError(f"Chunk decryption failed: {e}") from e
class ChunkedAssetWriter:
"""Streaming encrypted asset writer.
File format:
- 4 bytes: magic ("NFAE")
- 1 byte: version
- For each chunk:
- 4 bytes: chunk length (big-endian)
- 12 bytes: nonce
- N bytes: ciphertext
- 16 bytes: tag
"""
def __init__(self, crypto: AesGcmCryptoBox) -> None:
"""Initialize the writer.
Args:
crypto: CryptoBox instance for encryption.
"""
self._crypto = crypto
self._file: Path | None = None
self._dek: bytes | None = None
self._handle: BinaryIO | None = None
self._bytes_written: int = 0
def open(self, path: Path, dek: bytes) -> None:
"""Open file for writing.
Args:
path: Path to the encrypted file.
dek: Data Encryption Key for this file.
"""
if self._handle is not None:
raise RuntimeError("Already open")
self._file = path
self._dek = dek
self._handle = path.open("wb")
self._bytes_written = 0
# Write header
self._handle.write(FILE_MAGIC)
self._handle.write(struct.pack("B", FILE_VERSION))
logger.debug("Opened encrypted file for writing: %s", path)
def write_chunk(self, audio_bytes: bytes) -> None:
"""Write and encrypt an audio chunk."""
if self._handle is None or self._dek is None:
raise RuntimeError("File not open")
# Encrypt the chunk
chunk = self._crypto.encrypt_chunk(audio_bytes, self._dek)
# Calculate total chunk size (nonce + ciphertext + tag)
chunk_data = chunk.nonce + chunk.ciphertext + chunk.tag
chunk_length = len(chunk_data)
# Write length prefix and chunk data
self._handle.write(struct.pack(">I", chunk_length))
self._handle.write(chunk_data)
self._handle.flush()
self._bytes_written += 4 + chunk_length
def close(self) -> None:
"""Finalize and close the file."""
if self._handle is not None:
self._handle.close()
self._handle = None
logger.debug("Closed encrypted file, wrote %d bytes", self._bytes_written)
self._dek = None
@property
def is_open(self) -> bool:
"""Check if file is open for writing."""
return self._handle is not None
@property
def bytes_written(self) -> int:
"""Total encrypted bytes written."""
return self._bytes_written
class ChunkedAssetReader:
"""Streaming encrypted asset reader."""
def __init__(self, crypto: AesGcmCryptoBox) -> None:
"""Initialize the reader.
Args:
crypto: CryptoBox instance for decryption.
"""
self._crypto = crypto
self._file: Path | None = None
self._dek: bytes | None = None
self._handle: BinaryIO | None = None
def open(self, path: Path, dek: bytes) -> None:
"""Open file for reading."""
if self._handle is not None:
raise RuntimeError("Already open")
self._file = path
self._dek = dek
self._handle = path.open("rb")
# Read and validate header
magic = self._handle.read(4)
if magic != FILE_MAGIC:
self._handle.close()
self._handle = None
raise ValueError(f"Invalid file format: expected {FILE_MAGIC!r}, got {magic!r}")
version = struct.unpack("B", self._handle.read(1))[0]
if version != FILE_VERSION:
self._handle.close()
self._handle = None
raise ValueError(f"Unsupported file version: {version}")
logger.debug("Opened encrypted file for reading: %s", path)
def read_chunks(self) -> Iterator[bytes]:
"""Yield decrypted audio chunks."""
if self._handle is None or self._dek is None:
raise RuntimeError("File not open")
while True:
# Read chunk length
length_bytes = self._handle.read(4)
if len(length_bytes) < 4:
break # End of file
chunk_length = struct.unpack(">I", length_bytes)[0]
# Read chunk data
chunk_data = self._handle.read(chunk_length)
if len(chunk_data) < chunk_length:
raise ValueError("Truncated chunk")
# Parse chunk (nonce + ciphertext + tag)
nonce = chunk_data[:NONCE_SIZE]
ciphertext = chunk_data[NONCE_SIZE:-TAG_SIZE]
tag = chunk_data[-TAG_SIZE:]
chunk = EncryptedChunk(nonce=nonce, ciphertext=ciphertext, tag=tag)
# Decrypt and yield
yield self._crypto.decrypt_chunk(chunk, self._dek)
def close(self) -> None:
"""Close the file."""
if self._handle is not None:
self._handle.close()
self._handle = None
logger.debug("Closed encrypted file")
self._dek = None
@property
def is_open(self) -> bool:
"""Check if file is open for reading."""
return self._handle is not None
File: src/noteflow/infrastructure/security/keystore.py
"""Keystore implementation using the keyring library.
Provides secure master key storage using OS credential stores.
"""
from __future__ import annotations
import base64
import logging
import os
import secrets
from typing import Final
import keyring
logger = logging.getLogger(__name__)
# Constants
KEY_SIZE: Final[int] = 32 # 256-bit key
SERVICE_NAME: Final[str] = "noteflow"
KEY_NAME: Final[str] = "master_key"
ENV_VAR_NAME: Final[str] = "NOTEFLOW_MASTER_KEY"
class KeyringKeyStore:
"""keyring-based key storage using OS credential store.
Uses:
- macOS: Keychain
- Windows: Credential Manager
- Linux: SecretService (GNOME Keyring, KWallet)
"""
def __init__(
self,
service_name: str = SERVICE_NAME,
key_name: str = KEY_NAME,
) -> None:
"""Initialize the keystore.
Args:
service_name: Service identifier for keyring.
key_name: Key identifier within the service.
"""
self._service_name = service_name
self._key_name = key_name
def get_or_create_master_key(self) -> bytes:
"""Retrieve or generate the master encryption key.
Checks for an environment variable first (for headless/container deployments),
then falls back to the OS keyring.
Returns:
32-byte master key.
Raises:
RuntimeError: If keychain is unavailable and no env var is set.
"""
# Check environment variable first (for headless/container deployments)
if env_key := os.environ.get(ENV_VAR_NAME):
logger.debug("Using master key from environment variable")
return base64.b64decode(env_key)
try:
# Try to retrieve existing key from keyring
stored = keyring.get_password(self._service_name, self._key_name)
if stored is not None:
logger.debug("Retrieved existing master key from keyring")
return base64.b64decode(stored)
# Generate new key
new_key = secrets.token_bytes(KEY_SIZE)
encoded = base64.b64encode(new_key).decode("ascii")
# Store in keyring
keyring.set_password(self._service_name, self._key_name, encoded)
logger.info("Generated and stored new master key in keyring")
return new_key
except keyring.errors.KeyringError as e:
raise RuntimeError(
f"Keyring unavailable: {e}. "
f"Set {ENV_VAR_NAME} environment variable for headless mode."
) from e
def delete_master_key(self) -> None:
"""Delete the master key from the keychain.
Safe to call if key doesn't exist.
"""
try:
keyring.delete_password(self._service_name, self._key_name)
logger.info("Deleted master key")
except keyring.errors.PasswordDeleteError:
# Key doesn't exist, that's fine
logger.debug("Master key not found, nothing to delete")
except keyring.errors.KeyringError as e:
logger.warning("Failed to delete master key: %s", e)
def has_master_key(self) -> bool:
"""Check if master key exists in the keychain.
Returns:
True if master key exists.
"""
try:
stored = keyring.get_password(self._service_name, self._key_name)
return stored is not None
except keyring.errors.KeyringError:
return False
@property
def service_name(self) -> str:
"""Get the service name used for keyring."""
return self._service_name
@property
def key_name(self) -> str:
"""Get the key name used for keyring."""
return self._key_name
class InMemoryKeyStore:
"""In-memory key storage for testing.
Keys are lost when the process exits.
"""
def __init__(self) -> None:
"""Initialize the in-memory keystore."""
self._key: bytes | None = None
def get_or_create_master_key(self) -> bytes:
"""Retrieve or generate the master encryption key."""
if self._key is None:
self._key = secrets.token_bytes(KEY_SIZE)
logger.debug("Generated in-memory master key")
return self._key
def delete_master_key(self) -> None:
"""Delete the master key."""
self._key = None
logger.debug("Deleted in-memory master key")
def has_master_key(self) -> bool:
"""Check if master key exists."""
return self._key is not None
File: src/noteflow/infrastructure/security/protocols.py
"""Security protocols and data types.
These protocols define the contracts for key storage and encryption components.
"""
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
from pathlib import Path
from typing import Protocol
@dataclass(frozen=True)
class EncryptedChunk:
"""An encrypted chunk of data with authentication tag."""
nonce: bytes # Unique nonce for this chunk
ciphertext: bytes # Encrypted data
tag: bytes # Authentication tag
class KeyStore(Protocol):
"""Protocol for OS keychain access.
Implementations should use the OS credential store (Keychain, Credential Manager)
to securely store the master encryption key.
"""
def get_or_create_master_key(self) -> bytes:
"""Retrieve or generate the master encryption key.
If the master key doesn't exist, generates a new 32-byte key
and stores it in the OS keychain.
Returns:
32-byte master key.
Raises:
RuntimeError: If keychain is unavailable or locked.
"""
...
def delete_master_key(self) -> None:
"""Delete the master key from the keychain.
This renders all encrypted data permanently unrecoverable.
Safe to call if key doesn't exist.
"""
...
def has_master_key(self) -> bool:
"""Check if master key exists in the keychain.
Returns:
True if master key exists.
"""
...
class CryptoBox(Protocol):
"""Protocol for envelope encryption with per-meeting keys.
Uses a master key to wrap/unwrap Data Encryption Keys (DEKs),
which are used to encrypt actual meeting data.
"""
def generate_dek(self) -> bytes:
"""Generate a new Data Encryption Key.
Returns:
32-byte random DEK.
"""
...
def wrap_dek(self, dek: bytes) -> bytes:
"""Encrypt DEK with master key.
Args:
dek: Data Encryption Key to wrap.
Returns:
Encrypted DEK (can be stored in DB).
"""
...
def unwrap_dek(self, wrapped_dek: bytes) -> bytes:
"""Decrypt DEK with master key.
Args:
wrapped_dek: Encrypted DEK from wrap_dek().
Returns:
Original DEK.
Raises:
ValueError: If decryption fails (invalid or tampered).
"""
...
def encrypt_chunk(self, plaintext: bytes, dek: bytes) -> EncryptedChunk:
"""Encrypt a chunk of data with AES-GCM.
Args:
plaintext: Data to encrypt.
dek: Data Encryption Key.
Returns:
EncryptedChunk with nonce, ciphertext, and tag.
"""
...
def decrypt_chunk(self, chunk: EncryptedChunk, dek: bytes) -> bytes:
"""Decrypt a chunk of data.
Args:
chunk: EncryptedChunk to decrypt.
dek: Data Encryption Key.
Returns:
Original plaintext.
Raises:
ValueError: If decryption fails (invalid or tampered).
"""
...
class EncryptedAssetWriter(Protocol):
"""Protocol for streaming encrypted audio writer.
Writes audio chunks encrypted with a DEK to a file.
"""
def open(self, path: Path, dek: bytes) -> None:
"""Open file for writing.
Args:
path: Path to the encrypted file.
dek: Data Encryption Key for this file.
Raises:
RuntimeError: If already open.
OSError: If file cannot be created.
"""
...
def write_chunk(self, audio_bytes: bytes) -> None:
"""Write and encrypt an audio chunk.
Args:
audio_bytes: Raw audio data to encrypt and write.
Raises:
RuntimeError: If not open.
"""
...
def close(self) -> None:
"""Finalize and close the file.
Safe to call if already closed.
"""
...
@property
def is_open(self) -> bool:
"""Check if file is open for writing."""
...
@property
def bytes_written(self) -> int:
"""Total encrypted bytes written."""
...
class EncryptedAssetReader(Protocol):
"""Protocol for streaming encrypted audio reader.
Reads and decrypts audio chunks from a file.
"""
def open(self, path: Path, dek: bytes) -> None:
"""Open file for reading.
Args:
path: Path to the encrypted file.
dek: Data Encryption Key for this file.
Raises:
RuntimeError: If already open.
OSError: If file cannot be read.
ValueError: If file format is invalid.
"""
...
def read_chunks(self) -> Iterator[bytes]:
"""Yield decrypted audio chunks.
Yields:
Decrypted audio data chunks.
Raises:
RuntimeError: If not open.
ValueError: If decryption fails.
"""
...
def close(self) -> None:
"""Close the file.
Safe to call if already closed.
"""
...
@property
def is_open(self) -> bool:
"""Check if file is open for reading."""
...
File: src/noteflow/infrastructure/summarization/citation_verifier.py
"""Citation verification implementation."""
from __future__ import annotations
from typing import TYPE_CHECKING
from noteflow.domain.summarization import CitationVerificationResult
if TYPE_CHECKING:
from collections.abc import Sequence
from noteflow.domain.entities import Segment, Summary
class SegmentCitationVerifier:
"""Verify that summary citations reference valid segments.
Checks that all segment_ids in key points and action items
correspond to actual segments in the transcript.
"""
def verify_citations(
self,
summary: Summary,
segments: Sequence[Segment],
) -> CitationVerificationResult:
"""Verify all segment_ids exist in the transcript.
Args:
summary: Summary with key points and action items to verify.
segments: Available transcript segments.
Returns:
CitationVerificationResult with validation status and details.
"""
# Build set of valid segment IDs
valid_segment_ids = {seg.segment_id for seg in segments}
# Track invalid citations
invalid_key_point_indices: list[int] = []
invalid_action_item_indices: list[int] = []
missing_segment_ids: set[int] = set()
# Verify key points
for idx, key_point in enumerate(summary.key_points):
for seg_id in key_point.segment_ids:
if seg_id not in valid_segment_ids:
if idx not in invalid_key_point_indices:
invalid_key_point_indices.append(idx)
missing_segment_ids.add(seg_id)
# Verify action items
for idx, action_item in enumerate(summary.action_items):
for seg_id in action_item.segment_ids:
if seg_id not in valid_segment_ids:
if idx not in invalid_action_item_indices:
invalid_action_item_indices.append(idx)
missing_segment_ids.add(seg_id)
is_valid = not invalid_key_point_indices and not invalid_action_item_indices
return CitationVerificationResult(
is_valid=is_valid,
invalid_key_point_indices=tuple(invalid_key_point_indices),
invalid_action_item_indices=tuple(invalid_action_item_indices),
missing_segment_ids=tuple(sorted(missing_segment_ids)),
)
def filter_invalid_citations(
self,
summary: Summary,
segments: Sequence[Segment],
) -> Summary:
"""Return a copy of the summary with invalid citations removed.
Invalid segment_ids are removed from key points and action items.
Items with no remaining citations keep empty segment_ids lists.
Args:
summary: Summary to filter.
segments: Available transcript segments.
Returns:
New Summary with invalid citations removed.
"""
valid_segment_ids = {seg.segment_id for seg in segments}
# Filter key point citations
from noteflow.domain.entities import ActionItem, KeyPoint
from noteflow.domain.entities import Summary as SummaryEntity
filtered_key_points = [
KeyPoint(
text=kp.text,
segment_ids=[sid for sid in kp.segment_ids if sid in valid_segment_ids],
start_time=kp.start_time,
end_time=kp.end_time,
db_id=kp.db_id,
)
for kp in summary.key_points
]
# Filter action item citations
filtered_action_items = [
ActionItem(
text=ai.text,
assignee=ai.assignee,
due_date=ai.due_date,
priority=ai.priority,
segment_ids=[sid for sid in ai.segment_ids if sid in valid_segment_ids],
db_id=ai.db_id,
)
for ai in summary.action_items
]
return SummaryEntity(
meeting_id=summary.meeting_id,
executive_summary=summary.executive_summary,
key_points=filtered_key_points,
action_items=filtered_action_items,
generated_at=summary.generated_at,
model_version=summary.model_version,
db_id=summary.db_id,
)
File: src/noteflow/infrastructure/summarization/factory.py
"""Factory for creating configured SummarizationService instances."""
from __future__ import annotations
import logging
from noteflow.application.services.summarization_service import (
SummarizationMode,
SummarizationService,
SummarizationServiceSettings,
)
from noteflow.infrastructure.summarization.citation_verifier import SegmentCitationVerifier
from noteflow.infrastructure.summarization.mock_provider import MockSummarizer
from noteflow.infrastructure.summarization.ollama_provider import OllamaSummarizer
logger = logging.getLogger(__name__)
def create_summarization_service(
default_mode: SummarizationMode = SummarizationMode.LOCAL,
include_local: bool = True,
include_mock: bool = True,
verify_citations: bool = True,
filter_invalid_citations: bool = True,
) -> SummarizationService:
"""Create a fully-configured SummarizationService.
Auto-detects provider availability. Falls back to MOCK if LOCAL unavailable.
Args:
default_mode: Preferred summarization mode.
include_local: Register OllamaSummarizer (checked at runtime).
include_mock: Register MockSummarizer (always available).
verify_citations: Enable citation verification.
filter_invalid_citations: Remove invalid citations from output.
Returns:
Configured SummarizationService ready for use.
"""
service = SummarizationService(
settings=SummarizationServiceSettings(
default_mode=default_mode,
fallback_to_local=True, # Enables LOCAL → MOCK fallback
verify_citations=verify_citations,
filter_invalid_citations=filter_invalid_citations,
),
)
# Always register MOCK as fallback
if include_mock:
service.register_provider(SummarizationMode.MOCK, MockSummarizer())
logger.debug("Registered MOCK summarization provider")
# Register LOCAL (Ollama) - availability checked at runtime
if include_local:
ollama = OllamaSummarizer()
service.register_provider(SummarizationMode.LOCAL, ollama)
if ollama.is_available:
logger.info("Registered LOCAL (Ollama) summarization provider - available")
else:
logger.info(
"Registered LOCAL (Ollama) summarization provider - unavailable, will fallback"
)
# Set citation verifier
if verify_citations:
service.set_verifier(SegmentCitationVerifier())
logger.debug("Citation verification enabled")
return service
File: src/noteflow/infrastructure/triggers/init.py
"""Trigger infrastructure module.
Provide signal providers for meeting detection triggers.
"""
from noteflow.infrastructure.triggers.app_audio import AppAudioProvider, AppAudioSettings
from noteflow.infrastructure.triggers.audio_activity import (
AudioActivityProvider,
AudioActivitySettings,
)
from noteflow.infrastructure.triggers.calendar import CalendarProvider, CalendarSettings
from noteflow.infrastructure.triggers.foreground_app import (
ForegroundAppProvider,
ForegroundAppSettings,
)
__all__ = [
"AppAudioProvider",
"AppAudioSettings",
"AudioActivityProvider",
"AudioActivitySettings",
"CalendarProvider",
"CalendarSettings",
"ForegroundAppProvider",
"ForegroundAppSettings",
]
File: src/noteflow/infrastructure/triggers/foreground_app.py
"""Foreground app detection using PyWinCtl.
Detect meeting applications in the foreground window.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource
logger = logging.getLogger(__name__)
@dataclass
class ForegroundAppSettings:
"""Configuration for foreground app detection.
Attributes:
enabled: Whether foreground app detection is enabled.
weight: Confidence weight contributed by this provider.
meeting_apps: Set of app name substrings to match (lowercase).
suppressed_apps: Apps to ignore even if they match meeting_apps.
"""
enabled: bool
weight: float
meeting_apps: set[str] = field(default_factory=set)
suppressed_apps: set[str] = field(default_factory=set)
def __post_init__(self) -> None:
self.meeting_apps = {app.lower() for app in self.meeting_apps}
self.suppressed_apps = {app.lower() for app in self.suppressed_apps}
class ForegroundAppProvider:
"""Detect meeting apps in foreground using PyWinCtl.
PyWinCtl provides cross-platform active window detection for
Linux (X11/Wayland), macOS, and Windows.
"""
def __init__(self, settings: ForegroundAppSettings) -> None:
"""Initialize foreground app provider.
Args:
settings: Configuration settings for foreground app detection.
"""
self._settings = settings
self._available: bool | None = None
@property
def source(self) -> TriggerSource:
"""Get the source type for this provider."""
return TriggerSource.FOREGROUND_APP
@property
def max_weight(self) -> float:
"""Get the maximum weight this provider can contribute."""
return self._settings.weight
def is_enabled(self) -> bool:
"""Check if this provider is enabled and available."""
return self._settings.enabled and self._is_available()
def _is_available(self) -> bool:
"""Check if PyWinCtl is available and working."""
if self._available is not None:
return self._available
try:
import pywinctl
# Try to get active window to verify it works
_ = pywinctl.getActiveWindow()
self._available = True
logger.debug("PyWinCtl available for foreground detection")
except ImportError:
self._available = False
logger.warning("PyWinCtl not installed - foreground detection disabled")
except Exception as e:
self._available = False
logger.warning("PyWinCtl unavailable: %s - foreground detection disabled", e)
return self._available
def get_signal(self) -> TriggerSignal | None:
"""Get current signal if meeting app is in foreground.
Returns:
TriggerSignal if a meeting app is detected, None otherwise.
"""
if not self.is_enabled():
return None
try:
import pywinctl
window = pywinctl.getActiveWindow()
if not window:
return None
title = window.title
if not title:
return None
title_lower = title.lower()
# Check if app is suppressed
for suppressed in self._settings.suppressed_apps:
if suppressed in title_lower:
return None
# Check if it's a meeting app
for app in self._settings.meeting_apps:
if app in title_lower:
return TriggerSignal(
source=self.source,
weight=self.max_weight,
app_name=title,
)
except Exception as e:
logger.debug("Foreground detection error: %s", e)
return None
def suppress_app(self, app_name: str) -> None:
"""Add an app to the suppression list.
Args:
app_name: App name substring to suppress (will be lowercased).
"""
self._settings.suppressed_apps.add(app_name.lower())
logger.info("Suppressed app: %s", app_name)
def unsuppress_app(self, app_name: str) -> None:
"""Remove an app from the suppression list.
Args:
app_name: App name substring to unsuppress.
"""
self._settings.suppressed_apps.discard(app_name.lower())
def add_meeting_app(self, app_name: str) -> None:
"""Add an app to the meeting apps list.
Args:
app_name: App name substring to add (will be lowercased).
"""
self._settings.meeting_apps.add(app_name.lower())
@property
def suppressed_apps(self) -> frozenset[str]:
"""Get current suppressed apps."""
return frozenset(self._settings.suppressed_apps)
File: src/noteflow/init.py
"""NoteFlow - Intelligent Meeting Notetaker."""
__version__ = "0.1.0"
File: src/noteflow_pb2.py
# Compatibility shim for generated gRPC stubs.
# The generated `noteflow_pb2_grpc.py` imports a top-level `noteflow_pb2` module.
# Re-export the packaged definitions to satisfy that import while keeping the
# compiled protobufs under `noteflow.grpc.proto`.
from noteflow.grpc.proto.noteflow_pb2 import * # noqa: F401,F403
File: tests/application/init.py
"""Application layer unit tests."""
File: tests/application/test_recovery_service.py
"""Tests for RecoveryService application service."""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock
import pytest
from noteflow.application.services.recovery_service import RecoveryService
from noteflow.domain.entities import Meeting
from noteflow.domain.value_objects import MeetingState
@pytest.fixture
def mock_uow() -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.meetings = MagicMock()
return uow
class TestRecoveryServiceRecovery:
"""Tests for crash recovery operations."""
async def test_recover_no_crashed_meetings(self, mock_uow: MagicMock) -> None:
"""Test recovery with no crashed meetings."""
mock_uow.meetings.list_all = AsyncMock(return_value=([], 0))
service = RecoveryService(mock_uow)
result = await service.recover_crashed_meetings()
assert result == []
mock_uow.commit.assert_not_called()
async def test_recover_single_recording_meeting(self, mock_uow: MagicMock) -> None:
"""Test recovery of a meeting left in RECORDING state."""
meeting = Meeting.create(title="Crashed Recording")
meeting.start_recording() # Put in RECORDING state
assert meeting.state == MeetingState.RECORDING
mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1))
mock_uow.meetings.update = AsyncMock(return_value=meeting)
service = RecoveryService(mock_uow)
result = await service.recover_crashed_meetings()
assert len(result) == 1
assert result[0].state == MeetingState.ERROR
assert result[0].metadata["crash_recovered"] == "true"
assert result[0].metadata["crash_previous_state"] == "RECORDING"
assert "crash_recovery_time" in result[0].metadata
mock_uow.meetings.update.assert_called_once()
mock_uow.commit.assert_called_once()
async def test_recover_single_stopping_meeting(self, mock_uow: MagicMock) -> None:
"""Test recovery of a meeting left in STOPPING state."""
meeting = Meeting.create(title="Crashed Stopping")
meeting.start_recording()
meeting.begin_stopping() # Put in STOPPING state
assert meeting.state == MeetingState.STOPPING
mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1))
mock_uow.meetings.update = AsyncMock(return_value=meeting)
service = RecoveryService(mock_uow)
result = await service.recover_crashed_meetings()
assert len(result) == 1
assert result[0].state == MeetingState.ERROR
assert result[0].metadata["crash_previous_state"] == "STOPPING"
mock_uow.commit.assert_called_once()
async def test_recover_multiple_crashed_meetings(self, mock_uow: MagicMock) -> None:
"""Test recovery of multiple crashed meetings."""
meeting1 = Meeting.create(title="Crashed 1")
meeting1.start_recording()
meeting2 = Meeting.create(title="Crashed 2")
meeting2.start_recording()
meeting2.begin_stopping()
meeting3 = Meeting.create(title="Crashed 3")
meeting3.start_recording()
meetings = [meeting1, meeting2, meeting3]
mock_uow.meetings.list_all = AsyncMock(return_value=(meetings, 3))
mock_uow.meetings.update = AsyncMock(side_effect=meetings)
service = RecoveryService(mock_uow)
result = await service.recover_crashed_meetings()
assert len(result) == 3
assert all(m.state == MeetingState.ERROR for m in result)
assert result[0].metadata["crash_previous_state"] == "RECORDING"
assert result[1].metadata["crash_previous_state"] == "STOPPING"
assert result[2].metadata["crash_previous_state"] == "RECORDING"
assert mock_uow.meetings.update.call_count == 3
mock_uow.commit.assert_called_once()
class TestRecoveryServiceCounting:
"""Tests for counting crashed meetings."""
async def test_count_no_crashed_meetings(self, mock_uow: MagicMock) -> None:
"""Test counting with no crashed meetings."""
mock_uow.meetings.count_by_state = AsyncMock(return_value=0)
service = RecoveryService(mock_uow)
result = await service.count_crashed_meetings()
assert result == 0
assert mock_uow.meetings.count_by_state.call_count == 2
async def test_count_crashed_meetings_both_states(self, mock_uow: MagicMock) -> None:
"""Test counting meetings in both active states."""
async def count_by_state(state: MeetingState) -> int:
state_counts = {
MeetingState.RECORDING: 3,
MeetingState.STOPPING: 2,
}
return state_counts.get(state, 0)
mock_uow.meetings.count_by_state = AsyncMock(side_effect=count_by_state)
service = RecoveryService(mock_uow)
result = await service.count_crashed_meetings()
assert result == 5 # 3 RECORDING + 2 STOPPING
class TestRecoveryServiceMetadata:
"""Tests for recovery metadata handling."""
async def test_recovery_preserves_existing_metadata(self, mock_uow: MagicMock) -> None:
"""Test recovery preserves existing meeting metadata."""
meeting = Meeting.create(
title="Has Metadata",
metadata={"project": "NoteFlow", "important": "yes"},
)
meeting.start_recording()
mock_uow.meetings.list_all = AsyncMock(return_value=([meeting], 1))
mock_uow.meetings.update = AsyncMock(return_value=meeting)
service = RecoveryService(mock_uow)
result = await service.recover_crashed_meetings()
assert len(result) == 1
# Verify original metadata preserved
assert result[0].metadata["project"] == "NoteFlow"
assert result[0].metadata["important"] == "yes"
# Verify recovery metadata added
assert result[0].metadata["crash_recovered"] == "true"
assert result[0].metadata["crash_previous_state"] == "RECORDING"
File: tests/domain/init.py
"""Domain unit tests."""
File: tests/domain/test_annotation.py
"""Tests for Annotation entity."""
from __future__ import annotations
from uuid import uuid4
import pytest
from noteflow.domain.entities.annotation import Annotation
from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId
class TestAnnotation:
"""Tests for Annotation entity."""
def test_annotation_valid(self) -> None:
"""Annotation can be created with valid fields."""
annotation = Annotation(
id=AnnotationId(uuid4()),
meeting_id=MeetingId(uuid4()),
annotation_type=AnnotationType.NOTE,
text="Important point",
start_time=1.0,
end_time=2.0,
)
assert annotation.text == "Important point"
assert annotation.duration == 1.0
assert annotation.has_segments() is False
def test_annotation_invalid_times_raises(self) -> None:
"""Annotation raises when end_time < start_time."""
with pytest.raises(ValueError, match=r"end_time .* must be >= start_time"):
Annotation(
id=AnnotationId(uuid4()),
meeting_id=MeetingId(uuid4()),
annotation_type=AnnotationType.DECISION,
text="Bad timing",
start_time=5.0,
end_time=2.0,
)
def test_annotation_has_segments(self) -> None:
"""has_segments reflects segment_ids list."""
annotation = Annotation(
id=AnnotationId(uuid4()),
meeting_id=MeetingId(uuid4()),
annotation_type=AnnotationType.ACTION_ITEM,
text="Follow up",
start_time=0.0,
end_time=1.0,
segment_ids=[1, 2],
)
assert annotation.has_segments() is True
assert annotation.duration == 1.0
File: tests/domain/test_segment.py
"""Tests for Segment and WordTiming entities."""
from __future__ import annotations
import pytest
from noteflow.domain.entities.segment import Segment, WordTiming
class TestWordTiming:
"""Tests for WordTiming entity."""
def test_word_timing_valid(self) -> None:
"""Test creating valid WordTiming."""
word = WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=0.95)
assert word.word == "hello"
assert word.start_time == 0.0
assert word.end_time == 0.5
assert word.probability == 0.95
def test_word_timing_invalid_times_raises(self) -> None:
"""Test WordTiming raises on end_time < start_time."""
with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"):
WordTiming(word="hello", start_time=1.0, end_time=0.5, probability=0.9)
@pytest.mark.parametrize("prob", [-0.1, 1.1, 2.0])
def test_word_timing_invalid_probability_raises(self, prob: float) -> None:
"""Test WordTiming raises on invalid probability."""
with pytest.raises(ValueError, match="probability must be between 0 and 1"):
WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=prob)
@pytest.mark.parametrize("prob", [0.0, 0.5, 1.0])
def test_word_timing_valid_probability_bounds(self, prob: float) -> None:
"""Test WordTiming accepts probability at boundaries."""
word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=prob)
assert word.probability == prob
class TestSegment:
"""Tests for Segment entity."""
def test_segment_valid(self) -> None:
"""Test creating valid Segment."""
segment = Segment(
segment_id=0,
text="Hello world",
start_time=0.0,
end_time=2.5,
language="en",
)
assert segment.segment_id == 0
assert segment.text == "Hello world"
assert segment.start_time == 0.0
assert segment.end_time == 2.5
assert segment.language == "en"
def test_segment_invalid_times_raises(self) -> None:
"""Test Segment raises on end_time < start_time."""
with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"):
Segment(segment_id=0, text="test", start_time=5.0, end_time=1.0)
def test_segment_invalid_id_raises(self) -> None:
"""Test Segment raises on negative segment_id."""
with pytest.raises(ValueError, match="segment_id must be non-negative"):
Segment(segment_id=-1, text="test", start_time=0.0, end_time=1.0)
def test_segment_duration(self) -> None:
"""Test duration property calculation."""
segment = Segment(segment_id=0, text="test", start_time=1.5, end_time=4.0)
assert segment.duration == 2.5
def test_segment_word_count_from_text(self) -> None:
"""Test word_count from text when no words list."""
segment = Segment(segment_id=0, text="Hello beautiful world", start_time=0.0, end_time=1.0)
assert segment.word_count == 3
def test_segment_word_count_from_words(self) -> None:
"""Test word_count from words list when provided."""
words = [
WordTiming(word="Hello", start_time=0.0, end_time=0.3, probability=0.9),
WordTiming(word="world", start_time=0.3, end_time=0.5, probability=0.95),
]
segment = Segment(
segment_id=0,
text="Hello world",
start_time=0.0,
end_time=0.5,
words=words,
)
assert segment.word_count == 2
def test_segment_has_embedding_false(self) -> None:
"""Test has_embedding returns False when no embedding."""
segment = Segment(segment_id=0, text="test", start_time=0.0, end_time=1.0)
assert segment.has_embedding() is False
def test_segment_has_embedding_empty_list(self) -> None:
"""Test has_embedding returns False for empty embedding list."""
segment = Segment(segment_id=0, text="test", start_time=0.0, end_time=1.0, embedding=[])
assert segment.has_embedding() is False
def test_segment_has_embedding_true(self) -> None:
"""Test has_embedding returns True when embedding exists."""
segment = Segment(
segment_id=0,
text="test",
start_time=0.0,
end_time=1.0,
embedding=[0.1, 0.2, 0.3],
)
assert segment.has_embedding() is True
File: tests/domain/test_summary.py
"""Tests for Summary, KeyPoint, and ActionItem entities."""
from __future__ import annotations
from datetime import datetime
from uuid import uuid4
import pytest
from noteflow.domain.entities.summary import ActionItem, KeyPoint, Summary
from noteflow.domain.value_objects import MeetingId
class TestKeyPoint:
"""Tests for KeyPoint entity."""
def test_key_point_basic(self) -> None:
"""Test creating basic KeyPoint."""
kp = KeyPoint(text="Important discussion about architecture")
assert kp.text == "Important discussion about architecture"
assert kp.segment_ids == []
assert kp.start_time == 0.0
assert kp.end_time == 0.0
def test_key_point_has_evidence_false(self) -> None:
"""Test has_evidence returns False when no segment_ids."""
kp = KeyPoint(text="No evidence")
assert kp.has_evidence() is False
def test_key_point_has_evidence_true(self) -> None:
"""Test has_evidence returns True with segment_ids."""
kp = KeyPoint(text="With evidence", segment_ids=[1, 2, 3])
assert kp.has_evidence() is True
def test_key_point_with_timing(self) -> None:
"""Test KeyPoint with timing information."""
kp = KeyPoint(
text="Timed point",
segment_ids=[0, 1],
start_time=10.5,
end_time=25.0,
)
assert kp.start_time == 10.5
assert kp.end_time == 25.0
class TestActionItem:
"""Tests for ActionItem entity."""
def test_action_item_basic(self) -> None:
"""Test creating basic ActionItem."""
ai = ActionItem(text="Review PR #123")
assert ai.text == "Review PR #123"
assert ai.assignee == ""
assert ai.due_date is None
assert ai.priority == 0
assert ai.segment_ids == []
def test_action_item_has_evidence_false(self) -> None:
"""Test has_evidence returns False when no segment_ids."""
ai = ActionItem(text="Task without evidence")
assert ai.has_evidence() is False
def test_action_item_has_evidence_true(self) -> None:
"""Test has_evidence returns True with segment_ids."""
ai = ActionItem(text="Task with evidence", segment_ids=[5])
assert ai.has_evidence() is True
def test_action_item_is_assigned_false(self) -> None:
"""Test is_assigned returns False when no assignee."""
ai = ActionItem(text="Unassigned task")
assert ai.is_assigned() is False
def test_action_item_is_assigned_true(self) -> None:
"""Test is_assigned returns True with assignee."""
ai = ActionItem(text="Assigned task", assignee="Alice")
assert ai.is_assigned() is True
def test_action_item_has_due_date_false(self) -> None:
"""Test has_due_date returns False when no due_date."""
ai = ActionItem(text="No deadline")
assert ai.has_due_date() is False
def test_action_item_has_due_date_true(self) -> None:
"""Test has_due_date returns True with due_date."""
ai = ActionItem(text="With deadline", due_date=datetime(2024, 12, 31))
assert ai.has_due_date() is True
class TestSummary:
"""Tests for Summary entity."""
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Provide a meeting ID for tests."""
return MeetingId(uuid4())
def test_summary_basic(self, meeting_id: MeetingId) -> None:
"""Test creating basic Summary."""
summary = Summary(meeting_id=meeting_id)
assert summary.meeting_id == meeting_id
assert summary.executive_summary == ""
assert summary.key_points == []
assert summary.action_items == []
assert summary.generated_at is None
assert summary.model_version == ""
def test_summary_key_point_count(self, meeting_id: MeetingId) -> None:
"""Test key_point_count property."""
summary = Summary(
meeting_id=meeting_id,
key_points=[
KeyPoint(text="Point 1"),
KeyPoint(text="Point 2"),
KeyPoint(text="Point 3"),
],
)
assert summary.key_point_count == 3
def test_summary_action_item_count(self, meeting_id: MeetingId) -> None:
"""Test action_item_count property."""
summary = Summary(
meeting_id=meeting_id,
action_items=[
ActionItem(text="Task 1"),
ActionItem(text="Task 2"),
],
)
assert summary.action_item_count == 2
def test_all_points_have_evidence_true(self, meeting_id: MeetingId) -> None:
"""Test all_points_have_evidence returns True when all evidenced."""
summary = Summary(
meeting_id=meeting_id,
key_points=[
KeyPoint(text="Point 1", segment_ids=[0]),
KeyPoint(text="Point 2", segment_ids=[1, 2]),
],
)
assert summary.all_points_have_evidence() is True
def test_all_points_have_evidence_false(self, meeting_id: MeetingId) -> None:
"""Test all_points_have_evidence returns False when some unevidenced."""
summary = Summary(
meeting_id=meeting_id,
key_points=[
KeyPoint(text="Point 1", segment_ids=[0]),
KeyPoint(text="Point 2"), # No evidence
],
)
assert summary.all_points_have_evidence() is False
def test_all_actions_have_evidence_true(self, meeting_id: MeetingId) -> None:
"""Test all_actions_have_evidence returns True when all evidenced."""
summary = Summary(
meeting_id=meeting_id,
action_items=[
ActionItem(text="Task 1", segment_ids=[0]),
],
)
assert summary.all_actions_have_evidence() is True
def test_all_actions_have_evidence_false(self, meeting_id: MeetingId) -> None:
"""Test all_actions_have_evidence returns False when some unevidenced."""
summary = Summary(
meeting_id=meeting_id,
action_items=[
ActionItem(text="Task 1"), # No evidence
],
)
assert summary.all_actions_have_evidence() is False
def test_is_fully_evidenced_true(self, meeting_id: MeetingId) -> None:
"""Test is_fully_evidenced returns True when all items evidenced."""
summary = Summary(
meeting_id=meeting_id,
key_points=[KeyPoint(text="KP", segment_ids=[0])],
action_items=[ActionItem(text="AI", segment_ids=[1])],
)
assert summary.is_fully_evidenced() is True
def test_is_fully_evidenced_false_points(self, meeting_id: MeetingId) -> None:
"""Test is_fully_evidenced returns False with unevidenced points."""
summary = Summary(
meeting_id=meeting_id,
key_points=[KeyPoint(text="KP")], # No evidence
action_items=[ActionItem(text="AI", segment_ids=[1])],
)
assert summary.is_fully_evidenced() is False
def test_unevidenced_points(self, meeting_id: MeetingId) -> None:
"""Test unevidenced_points property filters correctly."""
kp_no_evidence = KeyPoint(text="No evidence")
kp_with_evidence = KeyPoint(text="With evidence", segment_ids=[0])
summary = Summary(
meeting_id=meeting_id,
key_points=[kp_no_evidence, kp_with_evidence],
)
unevidenced = summary.unevidenced_points
assert len(unevidenced) == 1
assert unevidenced[0] == kp_no_evidence
def test_unevidenced_actions(self, meeting_id: MeetingId) -> None:
"""Test unevidenced_actions property filters correctly."""
ai_no_evidence = ActionItem(text="No evidence")
ai_with_evidence = ActionItem(text="With evidence", segment_ids=[0])
summary = Summary(
meeting_id=meeting_id,
action_items=[ai_no_evidence, ai_with_evidence],
)
unevidenced = summary.unevidenced_actions
assert len(unevidenced) == 1
assert unevidenced[0] == ai_no_evidence
File: tests/domain/test_value_objects.py
"""Tests for domain value objects."""
from __future__ import annotations
from uuid import UUID
import pytest
from noteflow.domain.value_objects import MeetingId, MeetingState
class TestMeetingState:
"""Tests for MeetingState enum."""
@pytest.mark.parametrize(
("current", "target", "expected"),
[
# UNSPECIFIED transitions
(MeetingState.UNSPECIFIED, MeetingState.CREATED, True),
(MeetingState.UNSPECIFIED, MeetingState.RECORDING, False),
# CREATED transitions
(MeetingState.CREATED, MeetingState.RECORDING, True),
(MeetingState.CREATED, MeetingState.ERROR, True),
(MeetingState.CREATED, MeetingState.STOPPED, False),
# RECORDING transitions (must go through STOPPING)
(MeetingState.RECORDING, MeetingState.STOPPING, True),
(MeetingState.RECORDING, MeetingState.STOPPED, False),
(MeetingState.RECORDING, MeetingState.ERROR, True),
(MeetingState.RECORDING, MeetingState.CREATED, False),
# STOPPING transitions
(MeetingState.STOPPING, MeetingState.STOPPED, True),
(MeetingState.STOPPING, MeetingState.ERROR, True),
(MeetingState.STOPPING, MeetingState.RECORDING, False),
(MeetingState.STOPPING, MeetingState.CREATED, False),
# STOPPED transitions
(MeetingState.STOPPED, MeetingState.COMPLETED, True),
(MeetingState.STOPPED, MeetingState.ERROR, True),
(MeetingState.STOPPED, MeetingState.RECORDING, False),
# COMPLETED transitions
(MeetingState.COMPLETED, MeetingState.ERROR, True),
(MeetingState.COMPLETED, MeetingState.RECORDING, False),
# ERROR is terminal
(MeetingState.ERROR, MeetingState.CREATED, False),
(MeetingState.ERROR, MeetingState.RECORDING, False),
],
)
def test_can_transition_to(
self,
current: MeetingState,
target: MeetingState,
expected: bool,
) -> None:
"""Test state transition validation."""
assert current.can_transition_to(target) == expected
@pytest.mark.parametrize(
("value", "expected"),
[
(0, MeetingState.UNSPECIFIED),
(1, MeetingState.CREATED),
(2, MeetingState.RECORDING),
(3, MeetingState.STOPPED),
(4, MeetingState.COMPLETED),
(5, MeetingState.ERROR),
(6, MeetingState.STOPPING),
],
)
def test_from_int_valid(self, value: int, expected: MeetingState) -> None:
"""Test conversion from valid integers."""
assert MeetingState.from_int(value) == expected
def test_from_int_invalid_raises(self) -> None:
"""Test conversion from invalid integer raises ValueError."""
with pytest.raises(ValueError, match="Invalid meeting state"):
MeetingState.from_int(99)
class TestMeetingId:
"""Tests for MeetingId NewType."""
def test_meeting_id_is_uuid(self) -> None:
"""Test MeetingId wraps UUID."""
uuid = UUID("12345678-1234-5678-1234-567812345678")
meeting_id = MeetingId(uuid)
assert meeting_id == uuid
def test_meeting_id_string_conversion(self) -> None:
"""Test MeetingId can be converted to string."""
uuid = UUID("12345678-1234-5678-1234-567812345678")
meeting_id = MeetingId(uuid)
assert str(meeting_id) == "12345678-1234-5678-1234-567812345678"
File: tests/grpc/init.py
"""gRPC service tests."""
File: tests/grpc/test_partial_transcription.py
"""Tests for partial transcription in the gRPC service."""
from __future__ import annotations
import time
from dataclasses import dataclass
from unittest.mock import MagicMock
import numpy as np
import pytest
from numpy.typing import NDArray
from noteflow.grpc.service import NoteFlowServicer
@dataclass
class MockAsrResult:
"""Mock ASR transcription result."""
text: str
start: float = 0.0
end: float = 1.0
language: str = "en"
language_probability: float = 0.99
avg_logprob: float = -0.5
no_speech_prob: float = 0.01
def _create_mock_asr_engine(transcribe_results: list[str] | None = None) -> MagicMock:
"""Create mock ASR engine with configurable transcription results."""
engine = MagicMock()
engine.is_loaded = True
engine.model_size = "base"
results = transcribe_results or ["Test transcription"]
def _transcribe(_audio: NDArray[np.float32]) -> list[MockAsrResult]:
return [MockAsrResult(text=text) for text in results]
async def _transcribe_async(
_audio: NDArray[np.float32],
_language: str | None = None,
) -> list[MockAsrResult]:
return [MockAsrResult(text=text) for text in results]
engine.transcribe = _transcribe
engine.transcribe_async = _transcribe_async
return engine
class TestPartialTranscriptionState:
"""Tests for partial transcription state initialization."""
def test_init_streaming_state_creates_partial_buffer(self) -> None:
"""Initialize streaming state should create empty partial buffer."""
servicer = NoteFlowServicer()
servicer._init_streaming_state("meeting-123", next_segment_id=0)
assert "meeting-123" in servicer._partial_buffers
assert servicer._partial_buffers["meeting-123"] == []
def test_init_streaming_state_creates_last_partial_time(self) -> None:
"""Initialize streaming state should set last partial time to now."""
servicer = NoteFlowServicer()
before = time.time()
servicer._init_streaming_state("meeting-123", next_segment_id=0)
assert "meeting-123" in servicer._last_partial_time
assert servicer._last_partial_time["meeting-123"] >= before
def test_init_streaming_state_creates_empty_last_text(self) -> None:
"""Initialize streaming state should set last partial text to empty."""
servicer = NoteFlowServicer()
servicer._init_streaming_state("meeting-123", next_segment_id=0)
assert "meeting-123" in servicer._last_partial_text
assert servicer._last_partial_text["meeting-123"] == ""
def test_cleanup_streaming_state_removes_partial_state(self) -> None:
"""Cleanup streaming state should remove all partial-related state."""
servicer = NoteFlowServicer()
servicer._init_streaming_state("meeting-123", next_segment_id=0)
servicer._cleanup_streaming_state("meeting-123")
assert "meeting-123" not in servicer._partial_buffers
assert "meeting-123" not in servicer._last_partial_time
assert "meeting-123" not in servicer._last_partial_text
class TestClearPartialBuffer:
"""Tests for _clear_partial_buffer method."""
def test_clear_partial_buffer_empties_buffer(self) -> None:
"""Clear partial buffer should empty the audio buffer."""
servicer = NoteFlowServicer()
servicer._partial_buffers["meeting-123"] = [np.zeros(1600, dtype=np.float32)]
servicer._clear_partial_buffer("meeting-123")
assert servicer._partial_buffers["meeting-123"] == []
def test_clear_partial_buffer_resets_last_text(self) -> None:
"""Clear partial buffer should reset last partial text."""
servicer = NoteFlowServicer()
servicer._last_partial_text["meeting-123"] = "Previous partial"
servicer._clear_partial_buffer("meeting-123")
assert servicer._last_partial_text["meeting-123"] == ""
def test_clear_partial_buffer_updates_time(self) -> None:
"""Clear partial buffer should update last partial time."""
servicer = NoteFlowServicer()
servicer._last_partial_time["meeting-123"] = 0.0
before = time.time()
servicer._clear_partial_buffer("meeting-123")
assert servicer._last_partial_time["meeting-123"] >= before
def test_clear_partial_buffer_handles_missing_meeting(self) -> None:
"""Clear partial buffer should handle missing meeting gracefully."""
servicer = NoteFlowServicer()
servicer._clear_partial_buffer("nonexistent") # Should not raise
class TestMaybeEmitPartial:
"""Tests for _maybe_emit_partial method."""
@pytest.mark.asyncio
async def test_returns_none_when_asr_not_loaded(self) -> None:
"""Return None when ASR engine is not loaded."""
servicer = NoteFlowServicer()
servicer._init_streaming_state("meeting-123", next_segment_id=0)
result = await servicer._maybe_emit_partial("meeting-123")
assert result is None
@pytest.mark.asyncio
async def test_returns_none_when_cadence_not_reached(self) -> None:
"""Return None when not enough time has passed since last partial."""
engine = _create_mock_asr_engine(["Test"])
servicer = NoteFlowServicer(asr_engine=engine)
servicer._init_streaming_state("meeting-123", next_segment_id=0)
# Set last time to now (cadence not reached)
servicer._last_partial_time["meeting-123"] = time.time()
# Add some audio
audio = np.ones(16000, dtype=np.float32) * 0.1 # 1 second of audio
servicer._partial_buffers["meeting-123"].append(audio)
result = await servicer._maybe_emit_partial("meeting-123")
assert result is None
@pytest.mark.asyncio
async def test_returns_none_when_buffer_empty(self) -> None:
"""Return None when partial buffer is empty."""
engine = _create_mock_asr_engine(["Test"])
servicer = NoteFlowServicer(asr_engine=engine)
servicer._init_streaming_state("meeting-123", next_segment_id=0)
# Set last time to past (cadence reached)
servicer._last_partial_time["meeting-123"] = time.time() - 10.0
result = await servicer._maybe_emit_partial("meeting-123")
assert result is None
@pytest.mark.asyncio
async def test_returns_none_when_audio_too_short(self) -> None:
"""Return None when buffered audio is less than minimum."""
engine = _create_mock_asr_engine(["Test"])
servicer = NoteFlowServicer(asr_engine=engine)
servicer._init_streaming_state("meeting-123", next_segment_id=0)
servicer._last_partial_time["meeting-123"] = time.time() - 10.0
# Add only 0.1 seconds of audio (minimum is 0.5s)
audio = np.ones(1600, dtype=np.float32) * 0.1 # 0.1 second
servicer._partial_buffers["meeting-123"].append(audio)
result = await servicer._maybe_emit_partial("meeting-123")
assert result is None
@pytest.mark.asyncio
async def test_emits_partial_when_conditions_met(self) -> None:
"""Emit partial when cadence reached and sufficient audio buffered."""
engine = _create_mock_asr_engine(["Hello world"])
servicer = NoteFlowServicer(asr_engine=engine)
servicer._init_streaming_state("meeting-123", next_segment_id=0)
servicer._last_partial_time["meeting-123"] = time.time() - 10.0
# Add 1 second of audio (above minimum of 0.5s)
audio = np.ones(16000, dtype=np.float32) * 0.1
servicer._partial_buffers["meeting-123"].append(audio)
result = await servicer._maybe_emit_partial("meeting-123")
assert result is not None
assert result.update_type == 1 # UPDATE_TYPE_PARTIAL
assert result.partial_text == "Hello world"
assert result.meeting_id == "meeting-123"
@pytest.mark.asyncio
async def test_debounces_duplicate_text(self) -> None:
"""Return None when text is same as last partial (debounce)."""
engine = _create_mock_asr_engine(["Same text"])
servicer = NoteFlowServicer(asr_engine=engine)
servicer._init_streaming_state("meeting-123", next_segment_id=0)
servicer._last_partial_time["meeting-123"] = time.time() - 10.0
servicer._last_partial_text["meeting-123"] = "Same text" # Same as transcription
audio = np.ones(16000, dtype=np.float32) * 0.1
servicer._partial_buffers["meeting-123"].append(audio)
result = await servicer._maybe_emit_partial("meeting-123")
assert result is None
@pytest.mark.asyncio
async def test_updates_last_partial_state(self) -> None:
"""Emitting partial should update last text and time."""
engine = _create_mock_asr_engine(["New text"])
servicer = NoteFlowServicer(asr_engine=engine)
servicer._init_streaming_state("meeting-123", next_segment_id=0)
servicer._last_partial_time["meeting-123"] = time.time() - 10.0
audio = np.ones(16000, dtype=np.float32) * 0.1
servicer._partial_buffers["meeting-123"].append(audio)
before = time.time()
await servicer._maybe_emit_partial("meeting-123")
assert servicer._last_partial_text["meeting-123"] == "New text"
assert servicer._last_partial_time["meeting-123"] >= before
class TestPartialCadence:
"""Tests for partial transcription cadence constants."""
def test_partial_cadence_is_2_seconds(self) -> None:
"""Partial cadence should be 2 seconds per spec."""
assert NoteFlowServicer.PARTIAL_CADENCE_SECONDS == 2.0
def test_min_partial_audio_is_half_second(self) -> None:
"""Minimum partial audio should be 0.5 seconds."""
assert NoteFlowServicer.MIN_PARTIAL_AUDIO_SECONDS == 0.5
class TestPartialBufferAccumulation:
"""Tests for audio buffer accumulation during speech."""
@pytest.mark.asyncio
async def test_speech_audio_added_to_buffer(self) -> None:
"""Speech audio should be accumulated in partial buffer."""
engine = _create_mock_asr_engine()
servicer = NoteFlowServicer(asr_engine=engine)
servicer._init_streaming_state("meeting-123", next_segment_id=0)
# Simulate speech detection by processing audio
audio = np.ones(1600, dtype=np.float32) * 0.1
# Mock VAD to return True (is_speech)
servicer._vad_instances["meeting-123"].process_chunk = MagicMock(return_value=True)
updates = []
async for update in servicer._process_audio_with_vad("meeting-123", audio):
updates.append(update)
# Buffer should have audio added
assert len(servicer._partial_buffers["meeting-123"]) >= 1
@pytest.mark.asyncio
async def test_silence_does_not_add_to_buffer(self) -> None:
"""Silent audio should not be added to partial buffer."""
engine = _create_mock_asr_engine()
servicer = NoteFlowServicer(asr_engine=engine)
servicer._init_streaming_state("meeting-123", next_segment_id=0)
audio = np.zeros(1600, dtype=np.float32) # Silence
# Mock VAD to return False (is_silence)
servicer._vad_instances["meeting-123"].process_chunk = MagicMock(return_value=False)
updates = []
async for update in servicer._process_audio_with_vad("meeting-123", audio):
updates.append(update)
# Buffer should still be empty
assert servicer._partial_buffers["meeting-123"] == []
class TestPartialIntegrationWithFinal:
"""Tests for partial buffer clearing when final segment emitted."""
@pytest.mark.asyncio
async def test_buffer_cleared_on_final_segment(self) -> None:
"""Partial buffer should be cleared when a final segment is produced."""
servicer = NoteFlowServicer()
servicer._init_streaming_state("meeting-123", next_segment_id=0)
# Add some audio to buffer
audio = np.ones(16000, dtype=np.float32) * 0.1
servicer._partial_buffers["meeting-123"].append(audio)
servicer._last_partial_text["meeting-123"] = "Some partial"
# Clear buffer (simulates final segment emission)
servicer._clear_partial_buffer("meeting-123")
assert servicer._partial_buffers["meeting-123"] == []
assert servicer._last_partial_text["meeting-123"] == ""
File: tests/infrastructure/asr/init.py
"""ASR infrastructure tests."""
File: tests/infrastructure/audio/init.py
"""Audio infrastructure tests package."""
File: tests/infrastructure/audio/conftest.py
"""Test fixtures for audio infrastructure tests."""
from __future__ import annotations
import numpy as np
import pytest
from numpy.typing import NDArray
from noteflow.infrastructure.audio import TimestampedAudio
@pytest.fixture
def silence_audio() -> NDArray[np.float32]:
"""Return silent audio (all zeros)."""
return np.zeros(1600, dtype=np.float32) # 100ms at 16kHz
@pytest.fixture
def full_scale_audio() -> NDArray[np.float32]:
"""Return full-scale audio (all ones)."""
return np.ones(1600, dtype=np.float32)
@pytest.fixture
def half_scale_audio() -> NDArray[np.float32]:
"""Return half-scale audio (all 0.5)."""
return np.full(1600, 0.5, dtype=np.float32)
@pytest.fixture
def sample_timestamped_audio() -> TimestampedAudio:
"""Return sample timestamped audio chunk."""
return TimestampedAudio(
frames=np.zeros(1600, dtype=np.float32),
timestamp=0.0,
duration=0.1,
)
@pytest.fixture
def timestamped_audio_sequence() -> list[TimestampedAudio]:
"""Return sequence of timestamped audio chunks for buffer tests."""
return [
TimestampedAudio(
frames=np.zeros(1600, dtype=np.float32),
timestamp=float(i) * 0.1,
duration=0.1,
)
for i in range(10)
]
File: tests/infrastructure/audio/test_dto.py
"""Tests for audio DTOs."""
from __future__ import annotations
from dataclasses import FrozenInstanceError
import numpy as np
import pytest
from noteflow.infrastructure.audio import AudioDeviceInfo, TimestampedAudio
class TestAudioDeviceInfo:
"""Tests for AudioDeviceInfo dataclass."""
def test_audio_device_info_creation(self) -> None:
"""Test AudioDeviceInfo can be created with all fields."""
device = AudioDeviceInfo(
device_id=0,
name="Test Microphone",
channels=2,
sample_rate=48000,
is_default=True,
)
assert device.device_id == 0
assert device.name == "Test Microphone"
assert device.channels == 2
assert device.sample_rate == 48000
assert device.is_default is True
def test_audio_device_info_frozen(self) -> None:
"""Test AudioDeviceInfo is immutable (frozen)."""
device = AudioDeviceInfo(
device_id=0,
name="Test",
channels=1,
sample_rate=16000,
is_default=False,
)
with pytest.raises(FrozenInstanceError):
# Intentionally assign to frozen field to verify immutability
device.name = "Modified" # type: ignore[misc]
class TestTimestampedAudio:
"""Tests for TimestampedAudio dataclass."""
def test_timestamped_audio_creation(self) -> None:
"""Test TimestampedAudio can be created with valid values."""
frames = np.zeros(1600, dtype=np.float32)
audio = TimestampedAudio(
frames=frames,
timestamp=1.0,
duration=0.1,
)
assert len(audio.frames) == 1600
assert audio.timestamp == 1.0
assert audio.duration == 0.1
def test_timestamped_audio_negative_duration_raises(self) -> None:
"""Test TimestampedAudio raises on negative duration."""
frames = np.zeros(1600, dtype=np.float32)
with pytest.raises(ValueError, match="Duration must be non-negative"):
TimestampedAudio(
frames=frames,
timestamp=0.0,
duration=-0.1,
)
def test_timestamped_audio_negative_timestamp_raises(self) -> None:
"""Test TimestampedAudio raises on negative timestamp."""
frames = np.zeros(1600, dtype=np.float32)
with pytest.raises(ValueError, match="Timestamp must be non-negative"):
TimestampedAudio(
frames=frames,
timestamp=-1.0,
duration=0.1,
)
def test_timestamped_audio_zero_duration_valid(self) -> None:
"""Test TimestampedAudio accepts zero duration."""
frames = np.zeros(0, dtype=np.float32)
audio = TimestampedAudio(
frames=frames,
timestamp=0.0,
duration=0.0,
)
assert audio.duration == 0.0
def test_timestamped_audio_zero_timestamp_valid(self) -> None:
"""Test TimestampedAudio accepts zero timestamp."""
frames = np.zeros(1600, dtype=np.float32)
audio = TimestampedAudio(
frames=frames,
timestamp=0.0,
duration=0.1,
)
assert audio.timestamp == 0.0
File: tests/infrastructure/audio/test_reader.py
"""Tests for MeetingAudioReader."""
from __future__ import annotations
import json
from pathlib import Path
from uuid import uuid4
import numpy as np
import pytest
from noteflow.infrastructure.audio.reader import MeetingAudioReader
from noteflow.infrastructure.audio.writer import MeetingAudioWriter
from noteflow.infrastructure.security.crypto import AesGcmCryptoBox
from noteflow.infrastructure.security.keystore import InMemoryKeyStore
@pytest.fixture
def crypto() -> AesGcmCryptoBox:
"""Create crypto instance with in-memory keystore."""
keystore = InMemoryKeyStore()
return AesGcmCryptoBox(keystore)
@pytest.fixture
def meetings_dir(tmp_path: Path) -> Path:
"""Create temporary meetings directory."""
return tmp_path / "meetings"
def test_audio_exists_requires_manifest(
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""audio_exists should require both audio.enc and manifest.json."""
meeting_id = str(uuid4())
meeting_dir = meetings_dir / meeting_id
meeting_dir.mkdir(parents=True, exist_ok=True)
# Only audio.enc present -> False
(meeting_dir / "audio.enc").write_bytes(b"")
reader = MeetingAudioReader(crypto, meetings_dir)
assert reader.audio_exists(meeting_id) is False
# Add manifest.json -> True
(meeting_dir / "manifest.json").write_text(json.dumps({"sample_rate": 16000}))
assert reader.audio_exists(meeting_id) is True
def test_reader_uses_manifest_sample_rate(
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Reader should expose sample_rate from manifest and use it for durations."""
meeting_id = str(uuid4())
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer = MeetingAudioWriter(crypto, meetings_dir)
writer.open(meeting_id, dek, wrapped_dek, sample_rate=48000)
writer.write_chunk(np.zeros(1600, dtype=np.float32)) # 1600 samples @ 48kHz
writer.close()
reader = MeetingAudioReader(crypto, meetings_dir)
chunks = reader.load_meeting_audio(meeting_id)
assert reader.sample_rate == 48000
assert len(chunks) == 1
assert chunks[0].duration == pytest.approx(1600 / 48000, rel=1e-6)
File: tests/infrastructure/audio/test_ring_buffer.py
"""Tests for TimestampedRingBuffer."""
from __future__ import annotations
import numpy as np
import pytest
from noteflow.infrastructure.audio import TimestampedAudio, TimestampedRingBuffer
class TestTimestampedRingBuffer:
"""Tests for TimestampedRingBuffer class."""
def test_init_with_valid_duration(self) -> None:
"""Test buffer initialization with valid max_duration."""
buffer = TimestampedRingBuffer(max_duration=10.0)
assert buffer.max_duration == 10.0
assert buffer.duration == 0.0
assert buffer.chunk_count == 0
def test_init_with_default_duration(self) -> None:
"""Test buffer uses default max_duration of 30 seconds."""
buffer = TimestampedRingBuffer()
assert buffer.max_duration == 30.0
def test_init_with_invalid_duration_raises(self) -> None:
"""Test buffer raises on non-positive max_duration."""
with pytest.raises(ValueError, match="max_duration must be positive"):
TimestampedRingBuffer(max_duration=0.0)
with pytest.raises(ValueError, match="max_duration must be positive"):
TimestampedRingBuffer(max_duration=-1.0)
def test_push_single_chunk(self, sample_timestamped_audio: TimestampedAudio) -> None:
"""Test pushing single audio chunk."""
buffer = TimestampedRingBuffer(max_duration=10.0)
buffer.push(sample_timestamped_audio)
assert buffer.chunk_count == 1
assert buffer.duration == sample_timestamped_audio.duration
def test_push_multiple_chunks(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None:
"""Test pushing multiple audio chunks."""
buffer = TimestampedRingBuffer(max_duration=10.0)
for audio in timestamped_audio_sequence:
buffer.push(audio)
assert buffer.chunk_count == 10
assert buffer.duration == pytest.approx(1.0, rel=1e-9) # 10 chunks * 0.1s
def test_push_evicts_old_at_capacity(self) -> None:
"""Test old chunks are evicted when buffer exceeds max_duration."""
buffer = TimestampedRingBuffer(max_duration=0.5) # 500ms max
# Push 10 chunks of 0.1s each (1.0s total)
for i in range(10):
audio = TimestampedAudio(
frames=np.zeros(1600, dtype=np.float32),
timestamp=float(i) * 0.1,
duration=0.1,
)
buffer.push(audio)
# Should only keep ~5 chunks (0.5s worth)
assert buffer.duration <= 0.5
assert buffer.chunk_count <= 6 # May keep one extra during eviction
def test_get_window_returns_requested_duration(
self, timestamped_audio_sequence: list[TimestampedAudio]
) -> None:
"""Test get_window returns chunks for requested duration."""
buffer = TimestampedRingBuffer(max_duration=10.0)
for audio in timestamped_audio_sequence:
buffer.push(audio)
# Request 0.3 seconds (should get ~3 chunks)
window = buffer.get_window(0.3)
total_duration = sum(a.duration for a in window)
assert total_duration >= 0.3
assert len(window) >= 3
def test_get_window_empty_returns_empty(self) -> None:
"""Test get_window on empty buffer returns empty list."""
buffer = TimestampedRingBuffer(max_duration=10.0)
window = buffer.get_window(1.0)
assert window == []
def test_get_window_negative_returns_empty(
self, sample_timestamped_audio: TimestampedAudio
) -> None:
"""Test get_window with negative duration returns empty list."""
buffer = TimestampedRingBuffer(max_duration=10.0)
buffer.push(sample_timestamped_audio)
window = buffer.get_window(-1.0)
assert window == []
def test_get_window_zero_returns_empty(
self, sample_timestamped_audio: TimestampedAudio
) -> None:
"""Test get_window with zero duration returns empty list."""
buffer = TimestampedRingBuffer(max_duration=10.0)
buffer.push(sample_timestamped_audio)
window = buffer.get_window(0.0)
assert window == []
def test_get_window_exceeds_buffer_returns_all(
self, timestamped_audio_sequence: list[TimestampedAudio]
) -> None:
"""Test get_window with duration > buffer returns all chunks."""
buffer = TimestampedRingBuffer(max_duration=10.0)
for audio in timestamped_audio_sequence:
buffer.push(audio)
window = buffer.get_window(100.0) # Request more than available
assert len(window) == 10
def test_get_window_chronological_order(
self, timestamped_audio_sequence: list[TimestampedAudio]
) -> None:
"""Test get_window returns chunks in chronological order."""
buffer = TimestampedRingBuffer(max_duration=10.0)
for audio in timestamped_audio_sequence:
buffer.push(audio)
window = buffer.get_window(1.0)
# Verify timestamps are increasing
for i in range(1, len(window)):
assert window[i].timestamp >= window[i - 1].timestamp
def test_get_all_returns_all_chunks(
self, timestamped_audio_sequence: list[TimestampedAudio]
) -> None:
"""Test get_all returns all buffered chunks."""
buffer = TimestampedRingBuffer(max_duration=10.0)
for audio in timestamped_audio_sequence:
buffer.push(audio)
all_chunks = buffer.get_all()
assert len(all_chunks) == 10
def test_clear_removes_all(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None:
"""Test clear removes all chunks and resets duration."""
buffer = TimestampedRingBuffer(max_duration=10.0)
for audio in timestamped_audio_sequence:
buffer.push(audio)
buffer.clear()
assert buffer.chunk_count == 0
assert buffer.duration == 0.0
assert len(buffer) == 0
def test_duration_property(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None:
"""Test duration property tracks total buffered duration."""
buffer = TimestampedRingBuffer(max_duration=10.0)
assert buffer.duration == 0.0
for i, audio in enumerate(timestamped_audio_sequence):
buffer.push(audio)
expected = (i + 1) * 0.1
assert buffer.duration == pytest.approx(expected, rel=1e-9)
def test_chunk_count_property(self, timestamped_audio_sequence: list[TimestampedAudio]) -> None:
"""Test chunk_count property tracks number of chunks."""
buffer = TimestampedRingBuffer(max_duration=10.0)
for i, audio in enumerate(timestamped_audio_sequence):
buffer.push(audio)
assert buffer.chunk_count == i + 1
def test_max_duration_property(self) -> None:
"""Test max_duration property returns configured value."""
buffer = TimestampedRingBuffer(max_duration=15.0)
assert buffer.max_duration == 15.0
def test_len_returns_chunk_count(
self, timestamped_audio_sequence: list[TimestampedAudio]
) -> None:
"""Test __len__ returns chunk count."""
buffer = TimestampedRingBuffer(max_duration=10.0)
for audio in timestamped_audio_sequence:
buffer.push(audio)
assert len(buffer) == buffer.chunk_count
File: tests/infrastructure/audio/test_writer.py
"""Tests for MeetingAudioWriter."""
from __future__ import annotations
import json
from pathlib import Path
from uuid import uuid4
import numpy as np
import pytest
from noteflow.infrastructure.audio.writer import MeetingAudioWriter
from noteflow.infrastructure.security.crypto import AesGcmCryptoBox, ChunkedAssetReader
from noteflow.infrastructure.security.keystore import InMemoryKeyStore
@pytest.fixture
def crypto() -> AesGcmCryptoBox:
"""Create crypto instance with in-memory keystore."""
keystore = InMemoryKeyStore()
return AesGcmCryptoBox(keystore)
@pytest.fixture
def meetings_dir(tmp_path: Path) -> Path:
"""Create temporary meetings directory."""
return tmp_path / "meetings"
class TestMeetingAudioWriterBasics:
"""Tests for MeetingAudioWriter basic operations."""
def test_writer_creates_meeting_directory(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test writer creates meeting directory structure."""
writer = MeetingAudioWriter(crypto, meetings_dir)
meeting_id = str(uuid4())
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(meeting_id, dek, wrapped_dek)
meeting_dir = meetings_dir / meeting_id
assert meeting_dir.exists()
assert (meeting_dir / "manifest.json").exists()
assert (meeting_dir / "audio.enc").exists()
writer.close()
def test_manifest_contains_correct_metadata(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test manifest.json has required fields."""
writer = MeetingAudioWriter(crypto, meetings_dir)
meeting_id = str(uuid4())
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(meeting_id, dek, wrapped_dek, sample_rate=16000)
writer.close()
manifest_path = meetings_dir / meeting_id / "manifest.json"
manifest = json.loads(manifest_path.read_text())
assert manifest["meeting_id"] == meeting_id
assert manifest["sample_rate"] == 16000
assert manifest["channels"] == 1
assert manifest["format"] == "pcm16"
assert "wrapped_dek" in manifest
assert "created_at" in manifest
def test_write_chunk_converts_float32_to_pcm16(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test audio conversion from float32 to PCM16."""
writer = MeetingAudioWriter(crypto, meetings_dir)
meeting_id = str(uuid4())
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(meeting_id, dek, wrapped_dek)
# Create test audio: 1600 samples = 0.1 seconds at 16kHz
test_audio = np.linspace(-1.0, 1.0, 1600, dtype=np.float32)
writer.write_chunk(test_audio)
assert writer.bytes_written > 0
# PCM16 = 2 bytes/sample = 3200 bytes raw, but encrypted with overhead
assert writer.bytes_written > 3200
assert writer.chunk_count == 1
writer.close()
def test_multiple_chunks_written(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test writing multiple audio chunks."""
writer = MeetingAudioWriter(crypto, meetings_dir)
meeting_id = str(uuid4())
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(meeting_id, dek, wrapped_dek)
# Write 100 chunks
for _ in range(100):
audio = np.random.uniform(-0.5, 0.5, 1600).astype(np.float32)
writer.write_chunk(audio)
# Should have written significant data
assert writer.bytes_written > 100 * 3200 # At least raw PCM16 size
assert writer.chunk_count == 100
writer.close()
def test_write_chunk_clamps_audio_range(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test audio values outside [-1, 1] are clamped before encoding."""
writer = MeetingAudioWriter(crypto, meetings_dir)
meeting_id = str(uuid4())
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(meeting_id, dek, wrapped_dek)
writer.write_chunk(np.array([-2.0, 0.0, 2.0], dtype=np.float32))
writer.close()
audio_path = meetings_dir / meeting_id / "audio.enc"
reader = ChunkedAssetReader(crypto)
reader.open(audio_path, dek)
chunk_bytes = next(reader.read_chunks())
pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16)
audio_float = pcm16.astype(np.float32) / 32767.0
assert audio_float.min() >= -1.0
assert audio_float.max() <= 1.0
reader.close()
class TestMeetingAudioWriterErrors:
"""Tests for MeetingAudioWriter error handling."""
def test_writer_raises_if_already_open(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test writer raises RuntimeError if opened twice."""
writer = MeetingAudioWriter(crypto, meetings_dir)
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(str(uuid4()), dek, wrapped_dek)
with pytest.raises(RuntimeError, match="already open"):
writer.open(str(uuid4()), dek, wrapped_dek)
writer.close()
def test_writer_raises_if_write_when_not_open(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test writer raises RuntimeError if write called before open."""
writer = MeetingAudioWriter(crypto, meetings_dir)
audio = np.zeros(1600, dtype=np.float32)
with pytest.raises(RuntimeError, match="not open"):
writer.write_chunk(audio)
def test_close_is_idempotent(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test close can be called multiple times safely."""
writer = MeetingAudioWriter(crypto, meetings_dir)
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(str(uuid4()), dek, wrapped_dek)
writer.close()
writer.close() # Should not raise
writer.close() # Should not raise
class TestMeetingAudioWriterProperties:
"""Tests for MeetingAudioWriter properties."""
def test_is_open_property(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test is_open property reflects writer state."""
writer = MeetingAudioWriter(crypto, meetings_dir)
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
assert writer.is_open is False
writer.open(str(uuid4()), dek, wrapped_dek)
assert writer.is_open is True
writer.close()
assert writer.is_open is False
def test_meeting_dir_property(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test meeting_dir property returns correct path."""
writer = MeetingAudioWriter(crypto, meetings_dir)
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
meeting_id = str(uuid4())
assert writer.meeting_dir is None
writer.open(meeting_id, dek, wrapped_dek)
assert writer.meeting_dir == meetings_dir / meeting_id
writer.close()
assert writer.meeting_dir is None
def test_bytes_written_when_closed(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test bytes_written returns 0 when not open."""
writer = MeetingAudioWriter(crypto, meetings_dir)
assert writer.bytes_written == 0
class TestMeetingAudioWriterIntegration:
"""Integration tests for audio roundtrip."""
def test_audio_roundtrip_encryption_decryption(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test writing audio, then reading it back encrypted."""
# Write audio
writer = MeetingAudioWriter(crypto, meetings_dir)
meeting_id = str(uuid4())
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(meeting_id, dek, wrapped_dek)
# Write 10 chunks of known audio
original_chunks: list[np.ndarray] = []
for i in range(10):
audio = np.sin(2 * np.pi * 440 * np.linspace(i, i + 0.1, 1600)).astype(np.float32)
original_chunks.append(audio)
writer.write_chunk(audio)
writer.close()
# Read audio back
audio_path = meetings_dir / meeting_id / "audio.enc"
assert audio_path.exists()
reader = ChunkedAssetReader(crypto)
reader.open(audio_path, dek)
read_chunks: list[np.ndarray] = []
for chunk_bytes in reader.read_chunks():
# Convert bytes back to PCM16 then to float32
pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16)
audio_float = pcm16.astype(np.float32) / 32767.0
read_chunks.append(audio_float)
reader.close()
# Verify we read same number of chunks
assert len(read_chunks) == len(original_chunks)
# Verify audio content matches (within quantization error)
for orig, read in zip(original_chunks, read_chunks, strict=True):
# PCM16 quantization adds ~0.00003 max error
assert np.allclose(orig, read, atol=0.0001)
def test_manifest_wrapped_dek_can_decrypt_audio(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Test that wrapped_dek from manifest can decrypt audio file."""
# Write audio
writer = MeetingAudioWriter(crypto, meetings_dir)
meeting_id = str(uuid4())
dek = crypto.generate_dek()
wrapped_dek = crypto.wrap_dek(dek)
writer.open(meeting_id, dek, wrapped_dek)
writer.write_chunk(np.zeros(1600, dtype=np.float32))
writer.close()
# Read manifest
manifest_path = meetings_dir / meeting_id / "manifest.json"
manifest = json.loads(manifest_path.read_text())
wrapped_dek_hex = manifest["wrapped_dek"]
# Unwrap DEK from manifest
unwrapped_dek = crypto.unwrap_dek(bytes.fromhex(wrapped_dek_hex))
# Use unwrapped DEK to read audio
audio_path = meetings_dir / meeting_id / "audio.enc"
reader = ChunkedAssetReader(crypto)
reader.open(audio_path, unwrapped_dek)
chunks = list(reader.read_chunks())
assert len(chunks) == 1 # Should read the one chunk we wrote
reader.close()
File: tests/infrastructure/export/test_formatting.py
"""Tests for export formatting helpers."""
from __future__ import annotations
from datetime import datetime
from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp
class TestFormatTimestamp:
"""Tests for format_timestamp."""
def test_format_timestamp_under_hour(self) -> None:
assert format_timestamp(0) == "0:00"
assert format_timestamp(59) == "0:59"
assert format_timestamp(60) == "1:00"
assert format_timestamp(125) == "2:05"
def test_format_timestamp_over_hour(self) -> None:
assert format_timestamp(3600) == "1:00:00"
assert format_timestamp(3661) == "1:01:01"
class TestFormatDatetime:
"""Tests for format_datetime."""
def test_format_datetime_none(self) -> None:
assert format_datetime(None) == ""
def test_format_datetime_value(self) -> None:
dt = datetime(2024, 1, 1, 12, 30, 15)
assert format_datetime(dt) == "2024-01-01 12:30:15"
File: tests/infrastructure/export/test_html.py
"""Tests for HTML exporter."""
from __future__ import annotations
from noteflow.domain.entities import ActionItem, KeyPoint, Meeting, Segment, Summary
from noteflow.infrastructure.export.html import HtmlExporter
class TestHtmlExporter:
"""Tests for HtmlExporter output."""
def test_export_escapes_html(self) -> None:
meeting = Meeting.create(title="<Weekly & Sync>")
segments = [
Segment(segment_id=0, text="Hello <team>", start_time=0.0, end_time=1.0),
]
summary = Summary(
meeting_id=meeting.id,
executive_summary="Summary with <b>bold</b>",
key_points=[KeyPoint(text="Key <point>")],
action_items=[ActionItem(text="Do <thing>", assignee="bob<")],
)
meeting.summary = summary
exporter = HtmlExporter()
output = exporter.export(meeting, segments)
assert "<Weekly & Sync>" in output
assert "Hello <team>" in output
assert "Summary with <b>bold</b>" in output
assert "Key <point>" in output
assert "@bob<" in output
File: tests/infrastructure/export/test_markdown.py
"""Tests for Markdown exporter."""
from __future__ import annotations
from datetime import datetime
from noteflow.domain.entities import ActionItem, KeyPoint, Meeting, Segment, Summary
from noteflow.infrastructure.export.markdown import MarkdownExporter
class TestMarkdownExporter:
"""Tests for MarkdownExporter output."""
def test_export_includes_sections(self) -> None:
meeting = Meeting.create(title="Weekly Sync")
meeting.started_at = datetime(2024, 1, 1, 9, 0, 0)
meeting.ended_at = datetime(2024, 1, 1, 9, 30, 0)
segments = [
Segment(segment_id=0, text="Hello team", start_time=0.0, end_time=1.0),
Segment(segment_id=1, text="Next steps", start_time=1.0, end_time=2.0),
]
summary = Summary(
meeting_id=meeting.id,
executive_summary="Great meeting.",
key_points=[KeyPoint(text="KP1")],
action_items=[ActionItem(text="Do thing", assignee="alice")],
)
meeting.summary = summary
exporter = MarkdownExporter()
output = exporter.export(meeting, segments)
assert "# Weekly Sync" in output
assert "## Meeting Info" in output
assert "## Transcript" in output
assert "**[0:00]** Hello team" in output
assert "## Summary" in output
assert "### Key Points" in output
assert "- KP1" in output
assert "### Action Items" in output
assert "- [ ] Do thing (@alice)" in output
assert "Exported from NoteFlow" in output
File: tests/infrastructure/security/test_crypto.py
"""Tests for crypto error paths and asset reader behavior."""
from __future__ import annotations
import struct
from pathlib import Path
import pytest
from noteflow.infrastructure.security.crypto import (
FILE_MAGIC,
FILE_VERSION,
AesGcmCryptoBox,
ChunkedAssetReader,
ChunkedAssetWriter,
)
from noteflow.infrastructure.security.keystore import InMemoryKeyStore
@pytest.fixture
def crypto() -> AesGcmCryptoBox:
"""Crypto box with in-memory key store."""
return AesGcmCryptoBox(InMemoryKeyStore())
class TestAesGcmCryptoBox:
"""Tests for AesGcmCryptoBox edge cases."""
def test_unwrap_dek_too_short_raises(self, crypto: AesGcmCryptoBox) -> None:
"""unwrap_dek rejects payloads shorter than nonce+ciphertext+tag."""
with pytest.raises(ValueError, match="Invalid wrapped DEK"):
crypto.unwrap_dek(b"short")
class TestChunkedAssetReader:
"""Tests for ChunkedAssetReader validation."""
def test_open_invalid_magic_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None:
"""Reader rejects files with invalid magic."""
path = tmp_path / "bad_magic.enc"
path.write_bytes(b"BAD!" + bytes([FILE_VERSION]))
reader = ChunkedAssetReader(crypto)
with pytest.raises(ValueError, match="Invalid file format"):
reader.open(path, crypto.generate_dek())
def test_open_invalid_version_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None:
"""Reader rejects unsupported file versions."""
path = tmp_path / "bad_version.enc"
path.write_bytes(FILE_MAGIC + bytes([FILE_VERSION + 1]))
reader = ChunkedAssetReader(crypto)
with pytest.raises(ValueError, match="Unsupported file version"):
reader.open(path, crypto.generate_dek())
def test_read_truncated_chunk_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None:
"""Reader errors on truncated chunk data."""
path = tmp_path / "truncated.enc"
with path.open("wb") as handle:
handle.write(FILE_MAGIC)
handle.write(struct.pack("B", FILE_VERSION))
handle.write(struct.pack(">I", 10)) # claim 10 bytes
handle.write(b"12345") # only 5 bytes provided
reader = ChunkedAssetReader(crypto)
reader.open(path, crypto.generate_dek())
with pytest.raises(ValueError, match="Truncated chunk"):
list(reader.read_chunks())
reader.close()
def test_read_with_wrong_dek_raises(self, crypto: AesGcmCryptoBox, tmp_path: Path) -> None:
"""Decrypting with the wrong key fails."""
path = tmp_path / "wrong_key.enc"
dek = crypto.generate_dek()
other_dek = crypto.generate_dek()
writer = ChunkedAssetWriter(crypto)
writer.open(path, dek)
writer.write_chunk(b"hello")
writer.close()
reader = ChunkedAssetReader(crypto)
reader.open(path, other_dek)
with pytest.raises(ValueError, match="Chunk decryption failed"):
list(reader.read_chunks())
reader.close()
File: tests/infrastructure/summarization/test_citation_verifier.py
"""Tests for citation verification."""
from __future__ import annotations
from uuid import uuid4
import pytest
from noteflow.domain.entities import ActionItem, KeyPoint, Segment, Summary
from noteflow.domain.value_objects import MeetingId
from noteflow.infrastructure.summarization import SegmentCitationVerifier
def _segment(segment_id: int, text: str = "Test") -> Segment:
"""Create a test segment."""
return Segment(
segment_id=segment_id,
text=text,
start_time=segment_id * 5.0,
end_time=(segment_id + 1) * 5.0,
)
def _key_point(text: str, segment_ids: list[int]) -> KeyPoint:
"""Create a test key point."""
return KeyPoint(text=text, segment_ids=segment_ids)
def _action_item(text: str, segment_ids: list[int]) -> ActionItem:
"""Create a test action item."""
return ActionItem(text=text, segment_ids=segment_ids)
def _summary(
key_points: list[KeyPoint] | None = None,
action_items: list[ActionItem] | None = None,
) -> Summary:
"""Create a test summary."""
return Summary(
meeting_id=MeetingId(uuid4()),
executive_summary="Test summary",
key_points=key_points or [],
action_items=action_items or [],
)
class TestSegmentCitationVerifier:
"""Tests for SegmentCitationVerifier."""
@pytest.fixture
def verifier(self) -> SegmentCitationVerifier:
"""Create verifier instance."""
return SegmentCitationVerifier()
def test_verify_valid_citations(self, verifier: SegmentCitationVerifier) -> None:
"""All citations valid should return is_valid=True."""
segments = [_segment(0), _segment(1), _segment(2)]
summary = _summary(
key_points=[_key_point("Point 1", [0, 1])],
action_items=[_action_item("Action 1", [2])],
)
result = verifier.verify_citations(summary, segments)
assert result.is_valid is True
assert result.invalid_key_point_indices == ()
assert result.invalid_action_item_indices == ()
assert result.missing_segment_ids == ()
def test_verify_invalid_key_point_citation(self, verifier: SegmentCitationVerifier) -> None:
"""Invalid segment_id in key point should be detected."""
segments = [_segment(0), _segment(1)]
summary = _summary(
key_points=[_key_point("Point 1", [0, 99])], # 99 doesn't exist
)
result = verifier.verify_citations(summary, segments)
assert result.is_valid is False
assert result.invalid_key_point_indices == (0,)
assert result.invalid_action_item_indices == ()
assert result.missing_segment_ids == (99,)
def test_verify_invalid_action_item_citation(self, verifier: SegmentCitationVerifier) -> None:
"""Invalid segment_id in action item should be detected."""
segments = [_segment(0), _segment(1)]
summary = _summary(
action_items=[_action_item("Action 1", [50])], # 50 doesn't exist
)
result = verifier.verify_citations(summary, segments)
assert result.is_valid is False
assert result.invalid_key_point_indices == ()
assert result.invalid_action_item_indices == (0,)
assert result.missing_segment_ids == (50,)
def test_verify_multiple_invalid_citations(self, verifier: SegmentCitationVerifier) -> None:
"""Multiple invalid citations should all be detected."""
segments = [_segment(0)]
summary = _summary(
key_points=[
_key_point("Point 1", [0]),
_key_point("Point 2", [1]), # Invalid
_key_point("Point 3", [2]), # Invalid
],
action_items=[
_action_item("Action 1", [3]), # Invalid
],
)
result = verifier.verify_citations(summary, segments)
assert result.is_valid is False
assert result.invalid_key_point_indices == (1, 2)
assert result.invalid_action_item_indices == (0,)
assert result.missing_segment_ids == (1, 2, 3)
def test_verify_empty_summary(self, verifier: SegmentCitationVerifier) -> None:
"""Empty summary should be valid."""
segments = [_segment(0)]
summary = _summary()
result = verifier.verify_citations(summary, segments)
assert result.is_valid is True
def test_verify_empty_segments(self, verifier: SegmentCitationVerifier) -> None:
"""Summary with citations but no segments should be invalid."""
segments: list[Segment] = []
summary = _summary(key_points=[_key_point("Point 1", [0])])
result = verifier.verify_citations(summary, segments)
assert result.is_valid is False
assert result.missing_segment_ids == (0,)
def test_verify_empty_citations(self, verifier: SegmentCitationVerifier) -> None:
"""Key points/actions with empty segment_ids should be valid."""
segments = [_segment(0)]
summary = _summary(
key_points=[_key_point("Point 1", [])], # No citations
action_items=[_action_item("Action 1", [])], # No citations
)
result = verifier.verify_citations(summary, segments)
assert result.is_valid is True
def test_invalid_count_property(self, verifier: SegmentCitationVerifier) -> None:
"""invalid_count should sum key point and action item invalid counts."""
segments = [_segment(0)]
summary = _summary(
key_points=[
_key_point("Point 1", [1]), # Invalid
_key_point("Point 2", [2]), # Invalid
],
action_items=[
_action_item("Action 1", [3]), # Invalid
],
)
result = verifier.verify_citations(summary, segments)
assert result.invalid_count == 3
class TestFilterInvalidCitations:
"""Tests for filter_invalid_citations method."""
@pytest.fixture
def verifier(self) -> SegmentCitationVerifier:
"""Create verifier instance."""
return SegmentCitationVerifier()
def test_filter_removes_invalid_segment_ids(self, verifier: SegmentCitationVerifier) -> None:
"""Invalid segment_ids should be removed from citations."""
segments = [_segment(0), _segment(1)]
summary = _summary(
key_points=[_key_point("Point 1", [0, 1, 99])], # 99 invalid
action_items=[_action_item("Action 1", [1, 50])], # 50 invalid
)
filtered = verifier.filter_invalid_citations(summary, segments)
assert filtered.key_points[0].segment_ids == [0, 1]
assert filtered.action_items[0].segment_ids == [1]
def test_filter_preserves_valid_citations(self, verifier: SegmentCitationVerifier) -> None:
"""Valid citations should be preserved."""
segments = [_segment(0), _segment(1), _segment(2)]
summary = _summary(
key_points=[_key_point("Point 1", [0, 1])],
action_items=[_action_item("Action 1", [2])],
)
filtered = verifier.filter_invalid_citations(summary, segments)
assert filtered.key_points[0].segment_ids == [0, 1]
assert filtered.action_items[0].segment_ids == [2]
def test_filter_preserves_other_fields(self, verifier: SegmentCitationVerifier) -> None:
"""Non-citation fields should be preserved."""
segments = [_segment(0)]
summary = Summary(
meeting_id=MeetingId(uuid4()),
executive_summary="Important meeting",
key_points=[KeyPoint(text="Key point", segment_ids=[0], start_time=1.0, end_time=2.0)],
action_items=[ActionItem(text="Action", segment_ids=[0], assignee="Alice", priority=2)],
model_version="test-1.0",
)
filtered = verifier.filter_invalid_citations(summary, segments)
assert filtered.executive_summary == "Important meeting"
assert filtered.key_points[0].text == "Key point"
assert filtered.key_points[0].start_time == 1.0
assert filtered.action_items[0].assignee == "Alice"
assert filtered.action_items[0].priority == 2
assert filtered.model_version == "test-1.0"
File: tests/infrastructure/summarization/test_mock_provider.py
"""Tests for mock summarization provider."""
from __future__ import annotations
from uuid import uuid4
import pytest
from noteflow.domain.entities import Segment
from noteflow.domain.summarization import SummarizationRequest
from noteflow.domain.value_objects import MeetingId
from noteflow.infrastructure.summarization import MockSummarizer
def _segment(
segment_id: int,
text: str,
start: float = 0.0,
end: float = 5.0,
) -> Segment:
"""Create a test segment."""
return Segment(
segment_id=segment_id,
text=text,
start_time=start,
end_time=end,
)
class TestMockSummarizer:
"""Tests for MockSummarizer."""
@pytest.fixture
def summarizer(self) -> MockSummarizer:
"""Create MockSummarizer instance."""
return MockSummarizer(latency_ms=0.0)
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Create a test meeting ID."""
return MeetingId(uuid4())
def test_provider_name(self, summarizer: MockSummarizer) -> None:
"""Provider name should be 'mock'."""
assert summarizer.provider_name == "mock"
def test_is_available(self, summarizer: MockSummarizer) -> None:
"""Mock provider should always be available."""
assert summarizer.is_available is True
def test_requires_cloud_consent(self, summarizer: MockSummarizer) -> None:
"""Mock provider should not require cloud consent."""
assert summarizer.requires_cloud_consent is False
@pytest.mark.asyncio
async def test_summarize_returns_result(
self,
summarizer: MockSummarizer,
meeting_id: MeetingId,
) -> None:
"""Summarize should return a SummarizationResult."""
segments = [
_segment(0, "First segment text.", 0.0, 5.0),
_segment(1, "Second segment text.", 5.0, 10.0),
]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert result.provider_name == "mock"
assert result.model_name == "mock-1.0"
assert result.summary.meeting_id == meeting_id
@pytest.mark.asyncio
async def test_summarize_generates_executive_summary(
self,
summarizer: MockSummarizer,
meeting_id: MeetingId,
) -> None:
"""Summarize should generate executive summary with segment count."""
segments = [
_segment(0, "Hello", 0.0, 5.0),
_segment(1, "World", 5.0, 10.0),
_segment(2, "Test", 10.0, 15.0),
]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert "3 segments" in result.summary.executive_summary
assert "15.0 seconds" in result.summary.executive_summary
@pytest.mark.asyncio
async def test_summarize_generates_key_points_with_citations(
self,
summarizer: MockSummarizer,
meeting_id: MeetingId,
) -> None:
"""Key points should have valid segment_id citations."""
segments = [
_segment(0, "First point", 0.0, 5.0),
_segment(1, "Second point", 5.0, 10.0),
]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert len(result.summary.key_points) == 2
assert result.summary.key_points[0].segment_ids == [0]
assert result.summary.key_points[1].segment_ids == [1]
@pytest.mark.asyncio
async def test_summarize_respects_max_key_points(
self,
summarizer: MockSummarizer,
meeting_id: MeetingId,
) -> None:
"""Key points should be limited to max_key_points."""
segments = [_segment(i, f"Segment {i}", i * 5.0, (i + 1) * 5.0) for i in range(10)]
request = SummarizationRequest(
meeting_id=meeting_id,
segments=segments,
max_key_points=3,
)
result = await summarizer.summarize(request)
assert len(result.summary.key_points) == 3
@pytest.mark.asyncio
async def test_summarize_extracts_action_items(
self,
summarizer: MockSummarizer,
meeting_id: MeetingId,
) -> None:
"""Action items should be extracted from segments with action keywords."""
segments = [
_segment(0, "General discussion", 0.0, 5.0),
_segment(1, "We need to fix the bug", 5.0, 10.0),
_segment(2, "TODO: Review the code", 10.0, 15.0),
_segment(3, "The meeting went well", 15.0, 20.0),
]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert len(result.summary.action_items) == 2
assert result.summary.action_items[0].segment_ids == [1]
assert result.summary.action_items[1].segment_ids == [2]
@pytest.mark.asyncio
async def test_summarize_respects_max_action_items(
self,
summarizer: MockSummarizer,
meeting_id: MeetingId,
) -> None:
"""Action items should be limited to max_action_items."""
segments = [_segment(i, f"TODO: task {i}", i * 5.0, (i + 1) * 5.0) for i in range(10)]
request = SummarizationRequest(
meeting_id=meeting_id,
segments=segments,
max_action_items=2,
)
result = await summarizer.summarize(request)
assert len(result.summary.action_items) == 2
@pytest.mark.asyncio
async def test_summarize_sets_generated_at(
self,
summarizer: MockSummarizer,
meeting_id: MeetingId,
) -> None:
"""Summary should have generated_at timestamp."""
segments = [_segment(0, "Test", 0.0, 5.0)]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert result.summary.generated_at is not None
@pytest.mark.asyncio
async def test_summarize_empty_segments(
self,
summarizer: MockSummarizer,
meeting_id: MeetingId,
) -> None:
"""Summarize should handle empty segments list."""
request = SummarizationRequest(meeting_id=meeting_id, segments=[])
result = await summarizer.summarize(request)
assert result.summary.key_points == []
assert result.summary.action_items == []
assert "0 segments" in result.summary.executive_summary
File: tests/infrastructure/summarization/test_ollama_provider.py
"""Tests for Ollama summarization provider."""
from __future__ import annotations
import json
import sys
import types
from typing import Any
from uuid import uuid4
import pytest
from noteflow.domain.entities import Segment
from noteflow.domain.summarization import (
InvalidResponseError,
ProviderUnavailableError,
SummarizationRequest,
)
from noteflow.domain.value_objects import MeetingId
def _segment(
segment_id: int,
text: str,
start: float = 0.0,
end: float = 5.0,
) -> Segment:
"""Create a test segment."""
return Segment(
segment_id=segment_id,
text=text,
start_time=start,
end_time=end,
)
def _valid_json_response(
summary: str = "Test summary.",
key_points: list[dict[str, Any]] | None = None,
action_items: list[dict[str, Any]] | None = None,
) -> str:
"""Build a valid JSON response string."""
return json.dumps(
{
"executive_summary": summary,
"key_points": key_points or [],
"action_items": action_items or [],
}
)
class TestOllamaSummarizerProperties:
"""Tests for OllamaSummarizer properties."""
@pytest.fixture
def mock_ollama_module(self, monkeypatch: pytest.MonkeyPatch) -> types.ModuleType:
"""Mock ollama module."""
mock_client = types.SimpleNamespace(
list=lambda: {"models": []},
chat=lambda **_: {"message": {"content": _valid_json_response()}},
)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
return mock_module
def test_provider_name(self, mock_ollama_module: types.ModuleType) -> None:
"""Provider name should be 'ollama'."""
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
assert summarizer.provider_name == "ollama"
def test_requires_cloud_consent_false(self, mock_ollama_module: types.ModuleType) -> None:
"""Ollama should not require cloud consent (local processing)."""
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
assert summarizer.requires_cloud_consent is False
def test_is_available_when_server_responds(self, mock_ollama_module: types.ModuleType) -> None:
"""is_available should be True when server responds."""
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
assert summarizer.is_available is True
def test_is_available_false_when_connection_fails(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""is_available should be False when server unreachable."""
def raise_error() -> None:
raise ConnectionError("Connection refused")
mock_client = types.SimpleNamespace(list=raise_error)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
assert summarizer.is_available is False
class TestOllamaSummarizerSummarize:
"""Tests for OllamaSummarizer.summarize method."""
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Create test meeting ID."""
return MeetingId(uuid4())
@pytest.mark.asyncio
async def test_summarize_empty_segments(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Empty segments should return empty summary without calling LLM."""
call_count = 0
def mock_chat(**_: Any) -> dict[str, Any]:
nonlocal call_count
call_count += 1
return {"message": {"content": _valid_json_response()}}
mock_client = types.SimpleNamespace(list=lambda: {}, chat=mock_chat)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
request = SummarizationRequest(meeting_id=meeting_id, segments=[])
result = await summarizer.summarize(request)
assert result.summary.key_points == []
assert result.summary.action_items == []
assert call_count == 0
@pytest.mark.asyncio
async def test_summarize_returns_result(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Summarize should return SummarizationResult."""
response = _valid_json_response(
summary="Meeting discussed project updates.",
key_points=[{"text": "Project on track", "segment_ids": [0]}],
action_items=[
{"text": "Review code", "assignee": "Alice", "priority": 2, "segment_ids": [1]}
],
)
mock_client = types.SimpleNamespace(
list=lambda: {},
chat=lambda **_: {"message": {"content": response}},
)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
segments = [
_segment(0, "Project is on track.", 0.0, 5.0),
_segment(1, "Alice needs to review the code.", 5.0, 10.0),
]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert result.provider_name == "ollama"
assert result.summary.meeting_id == meeting_id
assert result.summary.executive_summary == "Meeting discussed project updates."
assert len(result.summary.key_points) == 1
assert result.summary.key_points[0].segment_ids == [0]
assert len(result.summary.action_items) == 1
assert result.summary.action_items[0].assignee == "Alice"
assert result.summary.action_items[0].priority == 2
@pytest.mark.asyncio
async def test_summarize_filters_invalid_segment_ids(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Invalid segment_ids in response should be filtered out."""
response = _valid_json_response(
summary="Test",
key_points=[{"text": "Point", "segment_ids": [0, 99, 100]}], # 99, 100 invalid
)
mock_client = types.SimpleNamespace(
list=lambda: {},
chat=lambda **_: {"message": {"content": response}},
)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
segments = [_segment(0, "Only segment")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert result.summary.key_points[0].segment_ids == [0]
@pytest.mark.asyncio
async def test_summarize_respects_max_limits(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Response items exceeding max limits should be truncated."""
response = _valid_json_response(
summary="Test",
key_points=[{"text": f"Point {i}", "segment_ids": [0]} for i in range(10)],
action_items=[{"text": f"Action {i}", "segment_ids": [0]} for i in range(10)],
)
mock_client = types.SimpleNamespace(
list=lambda: {},
chat=lambda **_: {"message": {"content": response}},
)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
segments = [_segment(0, "Test segment")]
request = SummarizationRequest(
meeting_id=meeting_id,
segments=segments,
max_key_points=3,
max_action_items=2,
)
result = await summarizer.summarize(request)
assert len(result.summary.key_points) == 3
assert len(result.summary.action_items) == 2
@pytest.mark.asyncio
async def test_summarize_handles_markdown_fenced_json(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Markdown code fences around JSON should be stripped."""
json_content = _valid_json_response(summary="Fenced response")
response = f"```json\n{json_content}\n```"
mock_client = types.SimpleNamespace(
list=lambda: {},
chat=lambda **_: {"message": {"content": response}},
)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert result.summary.executive_summary == "Fenced response"
class TestOllamaSummarizerErrors:
"""Tests for OllamaSummarizer error handling."""
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Create test meeting ID."""
return MeetingId(uuid4())
@pytest.mark.asyncio
async def test_raises_unavailable_when_package_missing(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Should raise ProviderUnavailableError when ollama not installed."""
# Remove ollama from sys.modules if present
monkeypatch.delitem(sys.modules, "ollama", raising=False)
# Make import fail
import builtins
original_import = builtins.__import__
def mock_import(name: str, *args: Any, **kwargs: Any) -> Any:
if name == "ollama":
raise ImportError("No module named 'ollama'")
return original_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", mock_import)
# Need to reload the module to trigger fresh import
from noteflow.infrastructure.summarization import ollama_provider
# Create fresh instance that will try to import
summarizer = ollama_provider.OllamaSummarizer()
summarizer._client = None # Force re-import attempt
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
with pytest.raises(ProviderUnavailableError, match="ollama package not installed"):
await summarizer.summarize(request)
@pytest.mark.asyncio
async def test_raises_unavailable_on_connection_error(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Should raise ProviderUnavailableError on connection failure."""
def raise_connection_error(**_: Any) -> None:
raise ConnectionRefusedError("Connection refused")
mock_client = types.SimpleNamespace(
list=lambda: {},
chat=raise_connection_error,
)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
with pytest.raises(ProviderUnavailableError, match="Cannot connect"):
await summarizer.summarize(request)
@pytest.mark.asyncio
async def test_raises_invalid_response_on_bad_json(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Should raise InvalidResponseError on malformed JSON."""
mock_client = types.SimpleNamespace(
list=lambda: {},
chat=lambda **_: {"message": {"content": "not valid json {{{"}},
)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
with pytest.raises(InvalidResponseError, match="Invalid JSON"):
await summarizer.summarize(request)
@pytest.mark.asyncio
async def test_raises_invalid_response_on_empty_content(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Should raise InvalidResponseError on empty response."""
mock_client = types.SimpleNamespace(
list=lambda: {},
chat=lambda **_: {"message": {"content": ""}},
)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer()
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
with pytest.raises(InvalidResponseError, match="Empty response"):
await summarizer.summarize(request)
class TestOllamaSummarizerConfiguration:
"""Tests for OllamaSummarizer configuration."""
@pytest.mark.asyncio
async def test_custom_model_name(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""Custom model name should be used."""
captured_model = None
def capture_chat(**kwargs: Any) -> dict[str, Any]:
nonlocal captured_model
captured_model = kwargs.get("model")
return {"message": {"content": _valid_json_response()}}
mock_client = types.SimpleNamespace(list=lambda: {}, chat=capture_chat)
mock_module = types.ModuleType("ollama")
mock_module.Client = lambda host: mock_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer(model="mistral")
meeting_id = MeetingId(uuid4())
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
await summarizer.summarize(request)
assert captured_model == "mistral"
def test_custom_host(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""Custom host should be passed to client."""
captured_host = None
def capture_client(host: str) -> types.SimpleNamespace:
nonlocal captured_host
captured_host = host
return types.SimpleNamespace(
list=lambda: {},
chat=lambda **_: {"message": {"content": _valid_json_response()}},
)
mock_module = types.ModuleType("ollama")
mock_module.Client = capture_client
monkeypatch.setitem(sys.modules, "ollama", mock_module)
from noteflow.infrastructure.summarization import OllamaSummarizer
summarizer = OllamaSummarizer(host="http://custom:8080")
_ = summarizer.is_available
assert captured_host == "http://custom:8080"
File: tests/infrastructure/triggers/conftest.py
"""Test fixtures for trigger infrastructure tests."""
from __future__ import annotations
import sys
import types
from collections.abc import Callable
from dataclasses import dataclass
import pytest
@dataclass
class DummyWindow:
"""Mock window object for pywinctl tests."""
title: str | None
@pytest.fixture
def mock_pywinctl(monkeypatch: pytest.MonkeyPatch) -> Callable[[str | None], None]:
"""Factory fixture to install mocked pywinctl module.
Usage:
mock_pywinctl("Zoom Meeting") # Window with title
mock_pywinctl(None) # No active window
"""
def _install(title: str | None) -> None:
window = DummyWindow(title) if title is not None else None
module = types.SimpleNamespace(getActiveWindow=lambda: window)
monkeypatch.setitem(sys.modules, "pywinctl", module)
return _install
@pytest.fixture
def mock_pywinctl_unavailable(monkeypatch: pytest.MonkeyPatch) -> None:
"""Install pywinctl mock that raises ImportError."""
def raise_import_error() -> None:
msg = "No module named 'pywinctl'"
raise ImportError(msg)
monkeypatch.setitem(sys.modules, "pywinctl", None)
@pytest.fixture
def mock_pywinctl_raises(monkeypatch: pytest.MonkeyPatch) -> None:
"""Install pywinctl mock that raises RuntimeError on getActiveWindow."""
def raise_runtime_error() -> None:
msg = "No display available"
raise RuntimeError(msg)
module = types.SimpleNamespace(getActiveWindow=raise_runtime_error)
monkeypatch.setitem(sys.modules, "pywinctl", module)
File: tests/infrastructure/triggers/test_audio_activity.py
"""Tests for audio activity trigger provider."""
from __future__ import annotations
import time
import numpy as np
import pytest
from noteflow.infrastructure.audio import RmsLevelProvider
from noteflow.infrastructure.triggers.audio_activity import (
AudioActivityProvider,
AudioActivitySettings,
)
def _settings(**overrides: object) -> AudioActivitySettings:
defaults: dict[str, object] = {
"enabled": True,
"threshold_db": -20.0,
"window_seconds": 10.0,
"min_active_ratio": 0.6,
"min_samples": 3,
"max_history": 10,
"weight": 0.3,
} | overrides
return AudioActivitySettings(**defaults)
def test_audio_activity_settings_validation() -> None:
"""Settings should reject min_samples greater than max_history."""
with pytest.raises(ValueError, match="min_samples"):
AudioActivitySettings(
enabled=True,
threshold_db=-20.0,
window_seconds=5.0,
min_active_ratio=0.5,
min_samples=11,
max_history=10,
weight=0.3,
)
def test_audio_activity_provider_disabled_ignores_updates() -> None:
"""Disabled provider should not emit signals."""
provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=False))
frames = np.ones(10, dtype=np.float32)
provider.update(frames, timestamp=1.0)
assert provider.get_signal() is None
def test_audio_activity_provider_emits_signal(monkeypatch: pytest.MonkeyPatch) -> None:
"""Provider emits a signal when sustained activity passes ratio threshold."""
provider = AudioActivityProvider(RmsLevelProvider(), _settings())
active = np.ones(10, dtype=np.float32)
inactive = np.zeros(10, dtype=np.float32)
provider.update(active, timestamp=1.0)
provider.update(active, timestamp=2.0)
provider.update(inactive, timestamp=3.0)
monkeypatch.setattr(time, "monotonic", lambda: 4.0)
signal = provider.get_signal()
assert signal is not None
assert signal.weight == pytest.approx(0.3)
def test_audio_activity_provider_window_excludes_old_samples(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Samples outside the window should not contribute to activity ratio."""
provider = AudioActivityProvider(RmsLevelProvider(), _settings(window_seconds=2.0))
active = np.ones(10, dtype=np.float32)
provider.update(active, timestamp=1.0)
provider.update(active, timestamp=2.0)
provider.update(active, timestamp=3.0)
monkeypatch.setattr(time, "monotonic", lambda: 10.0)
assert provider.get_signal() is None
def test_audio_activity_provider_source_property() -> None:
"""Provider source should be AUDIO_ACTIVITY."""
from noteflow.domain.triggers.entities import TriggerSource
provider = AudioActivityProvider(RmsLevelProvider(), _settings())
assert provider.source == TriggerSource.AUDIO_ACTIVITY
def test_audio_activity_provider_max_weight_property() -> None:
"""Provider max_weight should reflect configured weight."""
provider = AudioActivityProvider(RmsLevelProvider(), _settings(weight=0.5))
assert provider.max_weight == pytest.approx(0.5)
def test_audio_activity_provider_is_enabled_reflects_settings() -> None:
"""is_enabled should reflect settings.enabled."""
enabled_provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=True))
disabled_provider = AudioActivityProvider(RmsLevelProvider(), _settings(enabled=False))
assert enabled_provider.is_enabled() is True
assert disabled_provider.is_enabled() is False
def test_audio_activity_provider_clear_history() -> None:
"""clear_history should reset the activity history."""
provider = AudioActivityProvider(RmsLevelProvider(), _settings())
active = np.ones(10, dtype=np.float32)
provider.update(active, timestamp=1.0)
provider.update(active, timestamp=2.0)
provider.update(active, timestamp=3.0)
provider.clear_history()
# After clearing, signal should be None due to insufficient samples
assert provider.get_signal() is None
def test_audio_activity_provider_insufficient_samples() -> None:
"""Provider should return None when history has fewer than min_samples."""
provider = AudioActivityProvider(RmsLevelProvider(), _settings(min_samples=5))
active = np.ones(10, dtype=np.float32)
# Add only 3 samples (less than min_samples=5)
provider.update(active, timestamp=1.0)
provider.update(active, timestamp=2.0)
provider.update(active, timestamp=3.0)
assert provider.get_signal() is None
def test_audio_activity_provider_below_activity_ratio() -> None:
"""Provider should return None when active ratio < min_active_ratio."""
provider = AudioActivityProvider(RmsLevelProvider(), _settings(min_active_ratio=0.7))
active = np.ones(10, dtype=np.float32)
inactive = np.zeros(10, dtype=np.float32)
# Add 3 active, 7 inactive = 30% active ratio (below 70% threshold)
provider.update(active, timestamp=1.0)
provider.update(active, timestamp=2.0)
provider.update(active, timestamp=3.0)
provider.update(inactive, timestamp=4.0)
provider.update(inactive, timestamp=5.0)
provider.update(inactive, timestamp=6.0)
provider.update(inactive, timestamp=7.0)
provider.update(inactive, timestamp=8.0)
provider.update(inactive, timestamp=9.0)
provider.update(inactive, timestamp=10.0)
assert provider.get_signal() is None
def test_audio_activity_provider_boundary_activity_ratio(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Provider should emit signal when ratio exactly equals min_active_ratio."""
provider = AudioActivityProvider(
RmsLevelProvider(),
_settings(min_active_ratio=0.6, min_samples=5, max_history=10),
)
active = np.ones(10, dtype=np.float32)
inactive = np.zeros(10, dtype=np.float32)
# Add 6 active, 4 inactive = 60% active ratio (exactly at threshold)
provider.update(active, timestamp=1.0)
provider.update(active, timestamp=2.0)
provider.update(active, timestamp=3.0)
provider.update(active, timestamp=4.0)
provider.update(active, timestamp=5.0)
provider.update(active, timestamp=6.0)
provider.update(inactive, timestamp=7.0)
provider.update(inactive, timestamp=8.0)
provider.update(inactive, timestamp=9.0)
provider.update(inactive, timestamp=10.0)
monkeypatch.setattr(time, "monotonic", lambda: 11.0)
signal = provider.get_signal()
assert signal is not None
assert signal.weight == pytest.approx(0.3)
File: tests/infrastructure/triggers/test_foreground_app.py
"""Tests for foreground app trigger provider."""
from __future__ import annotations
import sys
import types
import pytest
from noteflow.domain.triggers.entities import TriggerSource
from noteflow.infrastructure.triggers.foreground_app import (
ForegroundAppProvider,
ForegroundAppSettings,
)
class DummyWindow:
"""Mock window object for pywinctl tests."""
def __init__(self, title: str | None) -> None:
self.title = title
def _install_pywinctl(monkeypatch: pytest.MonkeyPatch, title: str | None) -> None:
"""Install mocked pywinctl with specified window title."""
window = DummyWindow(title) if title is not None else None
module = types.SimpleNamespace(getActiveWindow=lambda: window)
monkeypatch.setitem(sys.modules, "pywinctl", module)
def _settings(**overrides: object) -> ForegroundAppSettings:
"""Create ForegroundAppSettings with defaults and overrides."""
defaults: dict[str, object] = {
"enabled": True,
"weight": 0.4,
"meeting_apps": {"zoom"},
"suppressed_apps": set(),
} | overrides
return ForegroundAppSettings(**defaults)
# --- Existing Tests ---
def test_foreground_app_provider_emits_signal(monkeypatch: pytest.MonkeyPatch) -> None:
"""Provider emits signal when a meeting app is in foreground."""
_install_pywinctl(monkeypatch, "Zoom Meeting")
provider = ForegroundAppProvider(_settings())
signal = provider.get_signal()
assert signal is not None
assert signal.weight == pytest.approx(0.4)
assert signal.app_name == "Zoom Meeting"
def test_foreground_app_provider_suppressed(monkeypatch: pytest.MonkeyPatch) -> None:
"""Suppressed apps should not emit signals."""
_install_pywinctl(monkeypatch, "Zoom Meeting")
provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom"}))
assert provider.get_signal() is None
def test_foreground_app_provider_unavailable(monkeypatch: pytest.MonkeyPatch) -> None:
"""Unavailable provider should report disabled."""
provider = ForegroundAppProvider(_settings())
monkeypatch.setattr(provider, "_is_available", lambda: False)
assert provider.is_enabled() is False
# --- New Tests ---
def test_foreground_app_provider_source_property() -> None:
"""Provider source should be FOREGROUND_APP."""
provider = ForegroundAppProvider(_settings())
assert provider.source == TriggerSource.FOREGROUND_APP
def test_foreground_app_provider_max_weight_property() -> None:
"""Provider max_weight should reflect configured weight."""
provider = ForegroundAppProvider(_settings(weight=0.5))
assert provider.max_weight == pytest.approx(0.5)
def test_foreground_app_settings_lowercases_apps() -> None:
"""Settings __post_init__ should lowercase meeting_apps and suppressed_apps."""
settings = ForegroundAppSettings(
enabled=True,
weight=0.4,
meeting_apps={"ZOOM", "Teams", "GoToMeeting"},
suppressed_apps={"SLACK", "Discord"},
)
assert "zoom" in settings.meeting_apps
assert "teams" in settings.meeting_apps
assert "gotomeeting" in settings.meeting_apps
assert "slack" in settings.suppressed_apps
assert "discord" in settings.suppressed_apps
# Original case should not be present
assert "ZOOM" not in settings.meeting_apps
assert "SLACK" not in settings.suppressed_apps
def test_foreground_app_provider_disabled_returns_none(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Provider should return None when enabled=False."""
_install_pywinctl(monkeypatch, "Zoom Meeting")
provider = ForegroundAppProvider(_settings(enabled=False))
assert provider.get_signal() is None
def test_foreground_app_provider_no_window_returns_none(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Provider should return None when getActiveWindow() returns None."""
_install_pywinctl(monkeypatch, None)
provider = ForegroundAppProvider(_settings())
# Force availability check to succeed
provider._available = True
assert provider.get_signal() is None
def test_foreground_app_provider_empty_title_returns_none(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Provider should return None when window title is empty string."""
_install_pywinctl(monkeypatch, "")
provider = ForegroundAppProvider(_settings())
provider._available = True
assert provider.get_signal() is None
def test_foreground_app_provider_non_meeting_app_returns_none(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Provider should return None when foreground app is not a meeting app."""
_install_pywinctl(monkeypatch, "Firefox Browser")
provider = ForegroundAppProvider(_settings(meeting_apps={"zoom", "teams"}))
provider._available = True
assert provider.get_signal() is None
def test_foreground_app_provider_suppress_app() -> None:
"""suppress_app should add lowercased app to suppressed_apps."""
provider = ForegroundAppProvider(_settings(suppressed_apps=set()))
provider.suppress_app("ZOOM")
provider.suppress_app("Teams")
assert "zoom" in provider.suppressed_apps
assert "teams" in provider.suppressed_apps
def test_foreground_app_provider_unsuppress_app() -> None:
"""unsuppress_app should remove app from suppressed_apps."""
provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom", "teams"}))
provider.unsuppress_app("zoom")
assert "zoom" not in provider.suppressed_apps
assert "teams" in provider.suppressed_apps
def test_foreground_app_provider_add_meeting_app() -> None:
"""add_meeting_app should add lowercased app to meeting_apps."""
provider = ForegroundAppProvider(_settings(meeting_apps={"zoom"}))
provider.add_meeting_app("WEBEX")
provider.add_meeting_app("RingCentral")
assert "webex" in provider._settings.meeting_apps
assert "ringcentral" in provider._settings.meeting_apps
def test_foreground_app_provider_suppressed_apps_property() -> None:
"""suppressed_apps property should return frozenset."""
provider = ForegroundAppProvider(_settings(suppressed_apps={"zoom", "teams"}))
result = provider.suppressed_apps
assert isinstance(result, frozenset)
assert "zoom" in result
assert "teams" in result
def test_foreground_app_provider_case_insensitive_matching(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Provider should match meeting apps case-insensitively."""
_install_pywinctl(monkeypatch, "ZOOM MEETING - Conference Room")
provider = ForegroundAppProvider(_settings(meeting_apps={"zoom"}))
provider._available = True
signal = provider.get_signal()
assert signal is not None
assert signal.app_name == "ZOOM MEETING - Conference Room"
def test_foreground_app_provider_is_enabled_when_enabled_and_available(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""is_enabled should return True when both enabled and available."""
_install_pywinctl(monkeypatch, "Some Window")
provider = ForegroundAppProvider(_settings(enabled=True))
assert provider.is_enabled() is True
File: tests/infrastructure/init.py
"""Infrastructure tests package."""
File: tests/infrastructure/test_converters.py
"""Tests for infrastructure converters."""
from __future__ import annotations
from noteflow.domain import entities
from noteflow.infrastructure.asr import dto
from noteflow.infrastructure.converters import AsrConverter, OrmConverter
class TestAsrConverter:
"""Tests for AsrConverter."""
def test_word_timing_to_domain_maps_field_names(self) -> None:
"""Test ASR start/end maps to domain start_time/end_time."""
asr_word = dto.WordTiming(word="hello", start=1.5, end=2.0, probability=0.95)
result = AsrConverter.word_timing_to_domain(asr_word)
assert result.word == "hello"
assert result.start_time == 1.5
assert result.end_time == 2.0
assert result.probability == 0.95
def test_word_timing_to_domain_preserves_precision(self) -> None:
"""Test timing values preserve floating point precision."""
asr_word = dto.WordTiming(
word="test",
start=0.123456789,
end=0.987654321,
probability=0.999999,
)
result = AsrConverter.word_timing_to_domain(asr_word)
assert result.start_time == 0.123456789
assert result.end_time == 0.987654321
assert result.probability == 0.999999
def test_word_timing_to_domain_returns_domain_type(self) -> None:
"""Test converter returns domain WordTiming type."""
asr_word = dto.WordTiming(word="test", start=1.0, end=2.0, probability=0.9)
result = AsrConverter.word_timing_to_domain(asr_word)
assert isinstance(result, entities.WordTiming)
def test_result_to_domain_words_converts_all(self) -> None:
"""Test batch conversion of ASR result words."""
asr_result = dto.AsrResult(
text="hello world",
start=0.0,
end=2.0,
words=(
dto.WordTiming(word="hello", start=0.0, end=1.0, probability=0.9),
dto.WordTiming(word="world", start=1.0, end=2.0, probability=0.95),
),
)
words = AsrConverter.result_to_domain_words(asr_result)
assert len(words) == 2
assert words[0].word == "hello"
assert words[0].start_time == 0.0
assert words[1].word == "world"
assert words[1].start_time == 1.0
def test_result_to_domain_words_empty(self) -> None:
"""Test conversion with empty words tuple."""
asr_result = dto.AsrResult(text="", start=0.0, end=0.0, words=())
words = AsrConverter.result_to_domain_words(asr_result)
assert words == []
class TestOrmConverterToOrmKwargs:
"""Tests for OrmConverter.word_timing_to_orm_kwargs."""
def test_converts_to_dict(self) -> None:
"""Test domain to ORM kwargs conversion."""
word = entities.WordTiming(
word="test",
start_time=1.5,
end_time=2.0,
probability=0.9,
)
result = OrmConverter.word_timing_to_orm_kwargs(word)
assert result == {
"word": "test",
"start_time": 1.5,
"end_time": 2.0,
"probability": 0.9,
}
def test_preserves_precision(self) -> None:
"""Test floating point precision in kwargs."""
word = entities.WordTiming(
word="precise",
start_time=0.123456789,
end_time=0.987654321,
probability=0.111111,
)
result = OrmConverter.word_timing_to_orm_kwargs(word)
assert result["start_time"] == 0.123456789
assert result["end_time"] == 0.987654321
assert result["probability"] == 0.111111
File: tests/integration/init.py
"""Integration tests using testcontainers."""
File: tests/integration/test_unit_of_work.py
"""Integration tests for SqlAlchemyUnitOfWork."""
from __future__ import annotations
from datetime import UTC, datetime
from typing import TYPE_CHECKING
import pytest
from noteflow.domain.entities import Meeting, Segment, Summary
from noteflow.domain.value_objects import MeetingState
from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
@pytest.mark.integration
class TestUnitOfWork:
"""Integration tests for SqlAlchemyUnitOfWork."""
async def test_uow_context_manager(
self, session_factory: async_sessionmaker[AsyncSession]
) -> None:
"""Test UoW works as async context manager."""
async with SqlAlchemyUnitOfWork(session_factory) as uow:
assert uow.meetings is not None
assert uow.segments is not None
assert uow.summaries is not None
async def test_uow_commit(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
"""Test UoW commit persists changes."""
meeting = Meeting.create(title="Commit Test")
async with SqlAlchemyUnitOfWork(session_factory) as uow:
await uow.meetings.create(meeting)
await uow.commit()
# Verify in new UoW
async with SqlAlchemyUnitOfWork(session_factory) as uow:
retrieved = await uow.meetings.get(meeting.id)
assert retrieved is not None
assert retrieved.title == "Commit Test"
async def test_uow_rollback(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
"""Test UoW rollback discards changes."""
meeting = Meeting.create(title="Rollback Test")
async with SqlAlchemyUnitOfWork(session_factory) as uow:
await uow.meetings.create(meeting)
await uow.rollback()
# Verify not persisted
async with SqlAlchemyUnitOfWork(session_factory) as uow:
retrieved = await uow.meetings.get(meeting.id)
assert retrieved is None
async def test_uow_auto_rollback_on_exception(
self, session_factory: async_sessionmaker[AsyncSession]
) -> None:
"""Test UoW auto-rollbacks on exception."""
meeting = Meeting.create(title="Exception Test")
with pytest.raises(ValueError, match="Test exception"):
async with SqlAlchemyUnitOfWork(session_factory) as uow:
await uow.meetings.create(meeting)
raise ValueError("Test exception")
# Verify not persisted
async with SqlAlchemyUnitOfWork(session_factory) as uow:
retrieved = await uow.meetings.get(meeting.id)
assert retrieved is None
async def test_uow_transactional_consistency(
self, session_factory: async_sessionmaker[AsyncSession]
) -> None:
"""Test UoW provides transactional consistency across repos."""
meeting = Meeting.create(title="Transactional Test")
segment = Segment(
segment_id=0,
text="Hello",
start_time=0.0,
end_time=1.0,
meeting_id=meeting.id,
)
summary = Summary(
meeting_id=meeting.id,
executive_summary="Test summary",
generated_at=datetime.now(UTC),
)
# Create meeting, segment, and summary in same transaction
async with SqlAlchemyUnitOfWork(session_factory) as uow:
await uow.meetings.create(meeting)
await uow.segments.add(meeting.id, segment)
await uow.summaries.save(summary)
await uow.commit()
# Verify all persisted
async with SqlAlchemyUnitOfWork(session_factory) as uow:
m = await uow.meetings.get(meeting.id)
segs = await uow.segments.get_by_meeting(meeting.id)
s = await uow.summaries.get_by_meeting(meeting.id)
assert m is not None
assert len(segs) == 1
assert s is not None
async def test_uow_repository_caching(
self, session_factory: async_sessionmaker[AsyncSession]
) -> None:
"""Test UoW caches repository instances."""
async with SqlAlchemyUnitOfWork(session_factory) as uow:
meetings1 = uow.meetings
meetings2 = uow.meetings
assert meetings1 is meetings2
segments1 = uow.segments
segments2 = uow.segments
assert segments1 is segments2
async def test_uow_multiple_operations(
self, session_factory: async_sessionmaker[AsyncSession]
) -> None:
"""Test UoW handles multiple operations in sequence."""
meeting = Meeting.create(title="Multi-op Test")
async with SqlAlchemyUnitOfWork(session_factory) as uow:
# Create
await uow.meetings.create(meeting)
await uow.commit()
# Update
meeting.start_recording()
await uow.meetings.update(meeting)
await uow.commit()
# Add segment
segment = Segment(segment_id=0, text="Test", start_time=0.0, end_time=1.0)
await uow.segments.add(meeting.id, segment)
await uow.commit()
# Verify final state
async with SqlAlchemyUnitOfWork(session_factory) as uow:
m = await uow.meetings.get(meeting.id)
segs = await uow.segments.get_by_meeting(meeting.id)
assert m is not None
assert m.state == MeetingState.RECORDING
assert len(segs) == 1
File: tests/init.py
"""NoteFlow test suite."""
File: tests/conftest.py
"""Global test fixtures to mock optional extra dependencies.
These stubs allow running the suite without installing heavy/optional packages
like openai/anthropic/ollama/pywinctl, while individual tests can still
override with more specific monkeypatches when needed.
"""
from __future__ import annotations
import sys
import types
from types import SimpleNamespace
import pytest
@pytest.fixture(autouse=True, scope="session")
def mock_optional_extras() -> None:
"""Install lightweight stubs for optional extra deps if absent."""
if "openai" not in sys.modules:
try:
import openai as _openai # noqa: F401
except ImportError:
def _default_create(**_: object) -> SimpleNamespace:
return SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content="{}"))],
usage=SimpleNamespace(total_tokens=0),
)
openai_module = types.ModuleType("openai")
openai_module.OpenAI = lambda **kwargs: SimpleNamespace(
chat=SimpleNamespace(completions=SimpleNamespace(create=_default_create))
)
sys.modules["openai"] = openai_module
if "anthropic" not in sys.modules:
try:
import anthropic as _anthropic # noqa: F401
except ImportError:
def _default_messages_create(**_: object) -> SimpleNamespace:
return SimpleNamespace(
content=[SimpleNamespace(text="{}")],
usage=SimpleNamespace(input_tokens=0, output_tokens=0),
)
anthropic_module = types.ModuleType("anthropic")
anthropic_module.Anthropic = lambda **kwargs: SimpleNamespace(
messages=SimpleNamespace(create=_default_messages_create)
)
sys.modules["anthropic"] = anthropic_module
if "ollama" not in sys.modules:
try:
import ollama as _ollama # noqa: F401
except ImportError:
def _default_chat(**_: object) -> dict[str, object]:
return {
"message": {
"content": '{"executive_summary": "", "key_points": [], "action_items": []}'
},
"eval_count": 0,
"prompt_eval_count": 0,
}
ollama_module = types.ModuleType("ollama")
ollama_module.Client = lambda **kwargs: SimpleNamespace(
list=lambda: {}, chat=_default_chat
)
sys.modules["ollama"] = ollama_module
# pywinctl depends on pymonctl, which may fail in headless environments
# Mock both if not already present
if "pymonctl" not in sys.modules:
try:
import pymonctl as _pymonctl # noqa: F401
except Exception:
# Mock pymonctl for headless environments (Xlib.error.DisplayNameError, etc.)
pymonctl_module = types.ModuleType("pymonctl")
pymonctl_module.getAllMonitors = lambda: []
sys.modules["pymonctl"] = pymonctl_module
if "pywinctl" not in sys.modules:
try:
import pywinctl as _pywinctl # noqa: F401
except Exception:
# ImportError: package not installed
# OSError/Xlib errors: pywinctl may fail in headless environments
pywinctl_module = types.ModuleType("pywinctl")
pywinctl_module.getActiveWindow = lambda: None
pywinctl_module.getAllWindows = lambda: []
pywinctl_module.getAllTitles = lambda: []
sys.modules["pywinctl"] = pywinctl_module
File: src/noteflow/application/services/meeting_service.py
"""Meeting application service.
Orchestrates meeting-related use cases with persistence.
"""
from __future__ import annotations
import logging
import shutil
from collections.abc import Sequence
from datetime import UTC, datetime
from pathlib import Path
from typing import TYPE_CHECKING
from noteflow.domain.entities import (
ActionItem,
Annotation,
KeyPoint,
Meeting,
Segment,
Summary,
WordTiming,
)
from noteflow.domain.value_objects import AnnotationId, AnnotationType
if TYPE_CHECKING:
from collections.abc import Sequence as SequenceType
from noteflow.domain.ports.unit_of_work import UnitOfWork
from noteflow.domain.value_objects import MeetingId, MeetingState
logger = logging.getLogger(__name__)
class MeetingService:
"""Application service for meeting operations.
Provides use cases for managing meetings, segments, and summaries.
All methods are async and expect a UnitOfWork to be provided.
"""
def __init__(self, uow: UnitOfWork) -> None:
"""Initialize the meeting service.
Args:
uow: Unit of work for persistence.
"""
self._uow = uow
async def create_meeting(
self,
title: str,
metadata: dict[str, str] | None = None,
) -> Meeting:
"""Create a new meeting.
Args:
title: Meeting title.
metadata: Optional metadata.
Returns:
Created meeting.
"""
meeting = Meeting.create(title=title, metadata=metadata or {})
async with self._uow:
saved = await self._uow.meetings.create(meeting)
await self._uow.commit()
return saved
async def get_meeting(self, meeting_id: MeetingId) -> Meeting | None:
"""Get a meeting by ID.
Args:
meeting_id: Meeting identifier.
Returns:
Meeting if found, None otherwise.
"""
async with self._uow:
return await self._uow.meetings.get(meeting_id)
async def list_meetings(
self,
states: list[MeetingState] | None = None,
limit: int = 100,
offset: int = 0,
sort_desc: bool = True,
) -> tuple[Sequence[Meeting], int]:
"""List meetings with optional filtering.
Args:
states: Optional list of states to filter by.
limit: Maximum number of meetings to return.
offset: Number of meetings to skip.
sort_desc: Sort by created_at descending if True.
Returns:
Tuple of (meetings list, total count).
"""
async with self._uow:
return await self._uow.meetings.list_all(
states=states,
limit=limit,
offset=offset,
sort_desc=sort_desc,
)
async def start_recording(self, meeting_id: MeetingId) -> Meeting | None:
"""Start recording a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
Updated meeting, or None if not found.
"""
async with self._uow:
meeting = await self._uow.meetings.get(meeting_id)
if meeting is None:
return None
meeting.start_recording()
await self._uow.meetings.update(meeting)
await self._uow.commit()
return meeting
async def stop_meeting(self, meeting_id: MeetingId) -> Meeting | None:
"""Stop a meeting through graceful STOPPING state.
Transitions: RECORDING -> STOPPING -> STOPPED
Args:
meeting_id: Meeting identifier.
Returns:
Updated meeting, or None if not found.
"""
async with self._uow:
meeting = await self._uow.meetings.get(meeting_id)
if meeting is None:
return None
# Graceful shutdown: RECORDING -> STOPPING -> STOPPED
meeting.begin_stopping()
meeting.stop_recording()
await self._uow.meetings.update(meeting)
await self._uow.commit()
return meeting
async def complete_meeting(self, meeting_id: MeetingId) -> Meeting | None:
"""Mark a meeting as completed.
Args:
meeting_id: Meeting identifier.
Returns:
Updated meeting, or None if not found.
"""
async with self._uow:
meeting = await self._uow.meetings.get(meeting_id)
if meeting is None:
return None
meeting.complete()
await self._uow.meetings.update(meeting)
await self._uow.commit()
return meeting
async def delete_meeting(
self,
meeting_id: MeetingId,
meetings_dir: Path | None = None,
) -> bool:
"""Delete meeting with complete cleanup.
Removes:
1. Filesystem assets (audio, manifest) if meetings_dir provided
2. Database records (cascade deletes children)
Args:
meeting_id: Meeting identifier.
meetings_dir: Base directory for meeting assets.
Returns:
True if deleted, False if not found.
"""
async with self._uow:
meeting = await self._uow.meetings.get(meeting_id)
if meeting is None:
return False
# Delete filesystem assets first (if directory provided)
if meetings_dir is not None:
meeting_dir = meetings_dir / str(meeting_id)
if meeting_dir.exists():
shutil.rmtree(meeting_dir)
logger.info(
"Deleted meeting assets at %s",
meeting_dir,
)
# Delete DB record (cascade handles children)
success = await self._uow.meetings.delete(meeting_id)
if success:
await self._uow.commit()
logger.info("Deleted meeting %s", meeting_id)
return success
async def add_segment(
self,
meeting_id: MeetingId,
segment_id: int,
text: str,
start_time: float,
end_time: float,
words: list[WordTiming] | None = None,
language: str = "en",
language_confidence: float = 0.0,
avg_logprob: float = 0.0,
no_speech_prob: float = 0.0,
) -> Segment:
"""Add a transcript segment to a meeting.
Args:
meeting_id: Meeting identifier.
segment_id: Segment sequence number.
text: Transcript text.
start_time: Start time in seconds.
end_time: End time in seconds.
words: Optional word-level timing.
language: Detected language code.
language_confidence: Language detection confidence.
avg_logprob: Average log probability.
no_speech_prob: No-speech probability.
Returns:
Added segment.
"""
segment = Segment(
segment_id=segment_id,
text=text,
start_time=start_time,
end_time=end_time,
meeting_id=meeting_id,
words=words or [],
language=language,
language_confidence=language_confidence,
avg_logprob=avg_logprob,
no_speech_prob=no_speech_prob,
)
async with self._uow:
saved = await self._uow.segments.add(meeting_id, segment)
await self._uow.commit()
return saved
async def add_segments_batch(
self,
meeting_id: MeetingId,
segments: Sequence[Segment],
) -> Sequence[Segment]:
"""Add multiple segments in batch.
Args:
meeting_id: Meeting identifier.
segments: Segments to add.
Returns:
Added segments.
"""
async with self._uow:
saved = await self._uow.segments.add_batch(meeting_id, segments)
await self._uow.commit()
return saved
async def get_segments(
self,
meeting_id: MeetingId,
include_words: bool = True,
) -> Sequence[Segment]:
"""Get all segments for a meeting.
Args:
meeting_id: Meeting identifier.
include_words: Include word-level timing.
Returns:
List of segments ordered by segment_id.
"""
async with self._uow:
return await self._uow.segments.get_by_meeting(
meeting_id,
include_words=include_words,
)
async def search_segments(
self,
query_embedding: list[float],
limit: int = 10,
meeting_id: MeetingId | None = None,
) -> Sequence[tuple[Segment, float]]:
"""Search segments by semantic similarity.
Args:
query_embedding: Query embedding vector.
limit: Maximum number of results.
meeting_id: Optional meeting to restrict search to.
Returns:
List of (segment, similarity_score) tuples.
"""
async with self._uow:
return await self._uow.segments.search_semantic(
query_embedding=query_embedding,
limit=limit,
meeting_id=meeting_id,
)
async def save_summary(
self,
meeting_id: MeetingId,
executive_summary: str,
key_points: list[KeyPoint] | None = None,
action_items: list[ActionItem] | None = None,
model_version: str = "",
) -> Summary:
"""Save or update a meeting summary.
Args:
meeting_id: Meeting identifier.
executive_summary: Executive summary text.
key_points: List of key points.
action_items: List of action items.
model_version: Model version that generated the summary.
Returns:
Saved summary.
"""
summary = Summary(
meeting_id=meeting_id,
executive_summary=executive_summary,
key_points=key_points or [],
action_items=action_items or [],
generated_at=datetime.now(UTC),
model_version=model_version,
)
async with self._uow:
saved = await self._uow.summaries.save(summary)
await self._uow.commit()
return saved
async def get_summary(self, meeting_id: MeetingId) -> Summary | None:
"""Get summary for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
Summary if exists, None otherwise.
"""
async with self._uow:
return await self._uow.summaries.get_by_meeting(meeting_id)
# Annotation methods
async def add_annotation(
self,
meeting_id: MeetingId,
annotation_type: AnnotationType,
text: str,
start_time: float,
end_time: float,
segment_ids: list[int] | None = None,
) -> Annotation:
"""Add an annotation to a meeting.
Args:
meeting_id: Meeting identifier.
annotation_type: Type of annotation.
text: Annotation text.
start_time: Start time in seconds.
end_time: End time in seconds.
segment_ids: Optional list of linked segment IDs.
Returns:
Added annotation.
"""
from uuid import uuid4
annotation = Annotation(
id=AnnotationId(uuid4()),
meeting_id=meeting_id,
annotation_type=annotation_type,
text=text,
start_time=start_time,
end_time=end_time,
segment_ids=segment_ids or [],
)
async with self._uow:
saved = await self._uow.annotations.add(annotation)
await self._uow.commit()
return saved
async def get_annotation(self, annotation_id: AnnotationId) -> Annotation | None:
"""Get an annotation by ID.
Args:
annotation_id: Annotation identifier.
Returns:
Annotation if found, None otherwise.
"""
async with self._uow:
return await self._uow.annotations.get(annotation_id)
async def get_annotations(
self,
meeting_id: MeetingId,
) -> SequenceType[Annotation]:
"""Get all annotations for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
List of annotations ordered by start_time.
"""
async with self._uow:
return await self._uow.annotations.get_by_meeting(meeting_id)
async def get_annotations_in_range(
self,
meeting_id: MeetingId,
start_time: float,
end_time: float,
) -> SequenceType[Annotation]:
"""Get annotations within a time range.
Args:
meeting_id: Meeting identifier.
start_time: Start of time range in seconds.
end_time: End of time range in seconds.
Returns:
List of annotations overlapping the time range.
"""
async with self._uow:
return await self._uow.annotations.get_by_time_range(meeting_id, start_time, end_time)
async def update_annotation(self, annotation: Annotation) -> Annotation:
"""Update an existing annotation.
Args:
annotation: Annotation with updated fields.
Returns:
Updated annotation.
Raises:
ValueError: If annotation does not exist.
"""
async with self._uow:
updated = await self._uow.annotations.update(annotation)
await self._uow.commit()
return updated
async def delete_annotation(self, annotation_id: AnnotationId) -> bool:
"""Delete an annotation.
Args:
annotation_id: Annotation identifier.
Returns:
True if deleted, False if not found.
"""
async with self._uow:
success = await self._uow.annotations.delete(annotation_id)
if success:
await self._uow.commit()
return success
File: src/noteflow/application/services/retention_service.py
"""Service for automatic meeting retention and cleanup."""
from __future__ import annotations
import logging
from collections.abc import Callable
from dataclasses import dataclass
from datetime import UTC, datetime, timedelta
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from noteflow.domain.entities import Meeting
from noteflow.domain.ports.unit_of_work import UnitOfWork
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class RetentionReport:
"""Result of retention cleanup run.
Attributes:
meetings_checked: Number of meetings that matched cutoff criteria.
meetings_deleted: Number of meetings successfully deleted.
errors: List of error messages for failed deletions.
"""
meetings_checked: int
meetings_deleted: int
errors: tuple[str, ...]
class RetentionService:
"""Manage automatic deletion of expired meetings.
Find and delete meetings that have been completed longer than
the configured retention period.
"""
def __init__(
self,
uow_factory: Callable[[], UnitOfWork],
retention_days: int,
meetings_dir: Path | None = None,
enabled: bool = False,
) -> None:
"""Initialize retention service.
Args:
uow_factory: Factory that returns a fresh UnitOfWork instance per call.
retention_days: Days to retain completed meetings.
meetings_dir: Base directory for meeting assets.
enabled: Whether retention is enabled.
"""
self._uow_factory = uow_factory
self._retention_days = retention_days
self._meetings_dir = meetings_dir
self._enabled = enabled
@property
def is_enabled(self) -> bool:
"""Check if retention is enabled."""
return self._enabled
@property
def retention_days(self) -> int:
"""Get configured retention days."""
return self._retention_days
@property
def cutoff_date(self) -> datetime:
"""Calculate cutoff date for retention."""
return datetime.now(UTC) - timedelta(days=self._retention_days)
async def find_expired_meetings(self) -> list[Meeting]:
"""Find meetings older than retention period.
Returns:
List of meetings eligible for deletion.
"""
uow = self._uow_factory()
async with uow:
return list(await uow.meetings.find_older_than(self.cutoff_date))
async def run_cleanup(self, dry_run: bool = False) -> RetentionReport:
"""Execute retention cleanup.
Args:
dry_run: If True, report but don't delete.
Returns:
Report of cleanup results.
"""
if not self._enabled and not dry_run:
logger.info("Retention disabled, skipping cleanup")
return RetentionReport(
meetings_checked=0,
meetings_deleted=0,
errors=(),
)
cutoff = self.cutoff_date
logger.info(
"Running retention cleanup (dry_run=%s, cutoff=%s)",
dry_run,
cutoff.isoformat(),
)
expired = await self.find_expired_meetings()
deleted = 0
errors: list[str] = []
for meeting in expired:
if dry_run:
logger.info(
"Would delete expired meeting: id=%s, ended_at=%s",
meeting.id,
meeting.ended_at,
)
continue
try:
# Import here to avoid circular imports
from noteflow.application.services import MeetingService
# Use a fresh UnitOfWork instance for each deletion
meeting_svc = MeetingService(self._uow_factory())
success = await meeting_svc.delete_meeting(
meeting.id,
meetings_dir=self._meetings_dir,
)
if success:
deleted += 1
logger.info(
"Deleted expired meeting: id=%s",
meeting.id,
)
except Exception as e:
error_msg = f"{meeting.id}: {e}"
errors.append(error_msg)
logger.warning("Failed to delete meeting %s: %s", meeting.id, e)
logger.info(
"Retention cleanup complete: checked=%d, deleted=%d, errors=%d",
len(expired),
deleted,
len(errors),
)
return RetentionReport(
meetings_checked=len(expired),
meetings_deleted=deleted,
errors=tuple(errors),
)
File: src/noteflow/application/services/summarization_service.py
"""Summarization orchestration service.
Coordinate provider selection, consent handling, and citation verification.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from enum import Enum
from typing import TYPE_CHECKING
from noteflow.domain.summarization import (
CitationVerificationResult,
ProviderUnavailableError,
SummarizationRequest,
SummarizationResult,
)
if TYPE_CHECKING:
from collections.abc import Awaitable, Callable, Sequence
from noteflow.domain.entities import Segment, Summary
from noteflow.domain.summarization import CitationVerifier, SummarizerProvider
from noteflow.domain.value_objects import MeetingId
# Type alias for persistence callback
PersistCallback = Callable[[Summary], Awaitable[None]]
logger = logging.getLogger(__name__)
class SummarizationMode(Enum):
"""Available summarization modes."""
MOCK = "mock"
LOCAL = "local" # Ollama
CLOUD = "cloud" # OpenAI/Anthropic
@dataclass
class SummarizationServiceSettings:
"""Configuration for summarization service.
Attributes:
default_mode: Default summarization mode.
cloud_consent_granted: Whether user has consented to cloud processing.
fallback_to_local: Fall back to local if cloud unavailable.
verify_citations: Whether to verify citations after summarization.
filter_invalid_citations: Remove invalid citations from result.
max_key_points: Default maximum key points.
max_action_items: Default maximum action items.
"""
default_mode: SummarizationMode = SummarizationMode.LOCAL
cloud_consent_granted: bool = False
fallback_to_local: bool = True
verify_citations: bool = True
filter_invalid_citations: bool = True
max_key_points: int = 5
max_action_items: int = 10
@dataclass
class SummarizationServiceResult:
"""Result from summarization service.
Attributes:
result: The raw summarization result from the provider.
verification: Citation verification result (if verification enabled).
filtered_summary: Summary with invalid citations removed (if filtering enabled).
provider_used: Which provider was actually used.
fallback_used: Whether a fallback provider was used.
"""
result: SummarizationResult
verification: CitationVerificationResult | None = None
filtered_summary: Summary | None = None
provider_used: str = ""
fallback_used: bool = False
@property
def summary(self) -> Summary:
"""Get the best available summary (filtered if available)."""
return self.filtered_summary or self.result.summary
@property
def has_invalid_citations(self) -> bool:
"""Check if summary has invalid citations."""
return self.verification is not None and not self.verification.is_valid
@dataclass
class SummarizationService:
"""Orchestrate summarization with provider selection and citation verification.
Manages provider selection based on mode and availability, handles
cloud consent requirements, and verifies/filters citation integrity.
"""
providers: dict[SummarizationMode, SummarizerProvider] = field(default_factory=dict)
verifier: CitationVerifier | None = None
settings: SummarizationServiceSettings = field(default_factory=SummarizationServiceSettings)
on_persist: PersistCallback | None = None
def register_provider(self, mode: SummarizationMode, provider: SummarizerProvider) -> None:
"""Register a provider for a specific mode.
Args:
mode: The mode this provider handles.
provider: The provider implementation.
"""
self.providers[mode] = provider
logger.debug("Registered %s provider: %s", mode.value, provider.provider_name)
def set_verifier(self, verifier: CitationVerifier) -> None:
"""Set the citation verifier.
Args:
verifier: Citation verifier implementation.
"""
self.verifier = verifier
def get_available_modes(self) -> list[SummarizationMode]:
"""Get list of currently available summarization modes.
Returns:
List of available modes based on registered providers.
"""
available = []
for mode, provider in self.providers.items():
if mode == SummarizationMode.CLOUD:
if provider.is_available and self.settings.cloud_consent_granted:
available.append(mode)
elif provider.is_available:
available.append(mode)
return available
def is_mode_available(self, mode: SummarizationMode) -> bool:
"""Check if a specific mode is available.
Args:
mode: The mode to check.
Returns:
True if mode is available.
"""
return mode in self.get_available_modes()
def grant_cloud_consent(self) -> None:
"""Grant consent for cloud processing."""
self.settings.cloud_consent_granted = True
logger.info("Cloud consent granted")
def revoke_cloud_consent(self) -> None:
"""Revoke consent for cloud processing."""
self.settings.cloud_consent_granted = False
logger.info("Cloud consent revoked")
async def summarize(
self,
meeting_id: MeetingId,
segments: Sequence[Segment],
mode: SummarizationMode | None = None,
max_key_points: int | None = None,
max_action_items: int | None = None,
) -> SummarizationServiceResult:
"""Generate evidence-linked summary for meeting transcript.
Args:
meeting_id: The meeting ID.
segments: Transcript segments to summarize.
mode: Override default mode (None uses settings default).
max_key_points: Override default max key points.
max_action_items: Override default max action items.
Returns:
SummarizationServiceResult with summary and verification.
Raises:
SummarizationError: If summarization fails and no fallback available.
ProviderUnavailableError: If no provider is available for the mode.
"""
target_mode = mode or self.settings.default_mode
fallback_used = False
# Get provider, potentially with fallback
provider, actual_mode = self._get_provider_with_fallback(target_mode)
if actual_mode != target_mode:
fallback_used = True
logger.info(
"Falling back from %s to %s mode",
target_mode.value,
actual_mode.value,
)
# Build request
request = SummarizationRequest(
meeting_id=meeting_id,
segments=segments,
max_key_points=max_key_points or self.settings.max_key_points,
max_action_items=max_action_items or self.settings.max_action_items,
)
# Execute summarization
logger.info(
"Summarizing %d segments with %s provider",
len(segments),
provider.provider_name,
)
result = await provider.summarize(request)
# Build service result
service_result = SummarizationServiceResult(
result=result,
provider_used=provider.provider_name,
fallback_used=fallback_used,
)
# Verify citations if enabled
if self.settings.verify_citations and self.verifier is not None:
verification = self.verifier.verify_citations(result.summary, list(segments))
service_result.verification = verification
if not verification.is_valid:
logger.warning(
"Summary has %d invalid citations",
verification.invalid_count,
)
# Filter if enabled
if self.settings.filter_invalid_citations:
service_result.filtered_summary = self._filter_citations(
result.summary, list(segments)
)
# Persist summary if callback provided
if self.on_persist is not None:
await self.on_persist(service_result.summary)
logger.debug("Summary persisted for meeting %s", meeting_id)
return service_result
def _get_provider_with_fallback(
self, mode: SummarizationMode
) -> tuple[SummarizerProvider, SummarizationMode]:
"""Get provider for mode, with fallback if unavailable.
Args:
mode: Requested mode.
Returns:
Tuple of (provider, actual_mode).
Raises:
ProviderUnavailableError: If no provider available.
"""
# Check requested mode
if mode in self.providers:
provider = self.providers[mode]
# Check cloud consent
if mode == SummarizationMode.CLOUD and not self.settings.cloud_consent_granted:
logger.warning("Cloud mode requested but consent not granted")
if self.settings.fallback_to_local:
return self._get_fallback_provider(mode)
raise ProviderUnavailableError("Cloud consent not granted")
if provider.is_available:
return provider, mode
# Provider exists but unavailable
if self.settings.fallback_to_local and mode != SummarizationMode.MOCK:
return self._get_fallback_provider(mode)
raise ProviderUnavailableError(f"No provider available for mode: {mode.value}")
def _get_fallback_provider(
self, original_mode: SummarizationMode
) -> tuple[SummarizerProvider, SummarizationMode]:
"""Get fallback provider when primary unavailable.
Fallback order: LOCAL -> MOCK
Args:
original_mode: The mode that was unavailable.
Returns:
Tuple of (provider, mode).
Raises:
ProviderUnavailableError: If no fallback available.
"""
fallback_order = [SummarizationMode.LOCAL, SummarizationMode.MOCK]
for fallback_mode in fallback_order:
if fallback_mode == original_mode:
continue
if fallback_mode in self.providers:
provider = self.providers[fallback_mode]
if provider.is_available:
return provider, fallback_mode
raise ProviderUnavailableError("No fallback provider available")
def _filter_citations(self, summary: Summary, segments: list[Segment]) -> Summary:
"""Filter invalid citations from summary.
Args:
summary: Summary to filter.
segments: Available segments.
Returns:
Summary with invalid citations removed.
"""
if self.verifier is None:
return summary
# Use verifier's filter method if available
if hasattr(self.verifier, "filter_invalid_citations"):
return self.verifier.filter_invalid_citations(summary, segments)
return summary
def set_default_mode(self, mode: SummarizationMode) -> None:
"""Set the default summarization mode.
Args:
mode: New default mode.
"""
self.settings.default_mode = mode
logger.info("Default summarization mode set to %s", mode.value)
def set_persist_callback(self, callback: PersistCallback | None) -> None:
"""Set callback for persisting summaries after generation.
Args:
callback: Async function that persists a Summary, or None to disable.
"""
self.on_persist = callback
File: src/noteflow/cli/retention.py
"""CLI command for retention cleanup.
Usage:
python -m noteflow.cli.retention cleanup [--dry-run]
python -m noteflow.cli.retention status
"""
from __future__ import annotations
import argparse
import asyncio
import logging
import sys
from noteflow.application.services import RetentionService
from noteflow.config.settings import get_settings
from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
async def _run_cleanup(dry_run: bool) -> int:
"""Execute retention cleanup.
Args:
dry_run: If True, report but don't delete.
Returns:
Exit code (0 for success, 1 for errors).
"""
settings = get_settings()
if not settings.retention_enabled and not dry_run:
logger.warning(
"Retention is disabled. Set NOTEFLOW_RETENTION_ENABLED=true or use --dry-run"
)
return 1
uow_factory = SqlAlchemyUnitOfWork.factory_from_settings(settings)
service = RetentionService(
uow_factory=uow_factory,
retention_days=settings.retention_days,
meetings_dir=settings.meetings_dir,
enabled=settings.retention_enabled,
)
logger.info(
"Running retention cleanup (dry_run=%s, retention_days=%d, cutoff=%s)",
dry_run,
service.retention_days,
service.cutoff_date.isoformat(),
)
report = await service.run_cleanup(dry_run=dry_run)
print("\nRetention Cleanup Report:")
print(f" Meetings checked: {report.meetings_checked}")
print(f" Meetings deleted: {report.meetings_deleted}")
if report.errors:
print(f" Errors: {len(report.errors)}")
for err in report.errors:
print(f" - {err}")
return 1
return 0
async def _show_status() -> int:
"""Show retention status and pending deletions.
Returns:
Exit code (always 0).
"""
settings = get_settings()
uow_factory = SqlAlchemyUnitOfWork.factory_from_settings(settings)
service = RetentionService(
uow_factory=uow_factory,
retention_days=settings.retention_days,
meetings_dir=settings.meetings_dir,
enabled=settings.retention_enabled,
)
expired = await service.find_expired_meetings()
print("\nRetention Status:")
print(f" Enabled: {settings.retention_enabled}")
print(f" Retention days: {settings.retention_days}")
print(f" Check interval: {settings.retention_check_interval_hours} hours")
print(f" Cutoff date: {service.cutoff_date.isoformat()}")
print(f" Meetings pending deletion: {len(expired)}")
if expired:
print("\n Pending deletions:")
for meeting in expired[:10]: # Show first 10
print(f" - {meeting.id}: {meeting.title} (ended: {meeting.ended_at})")
if len(expired) > 10:
print(f" ... and {len(expired) - 10} more")
return 0
def main() -> None:
"""Entry point for retention CLI."""
parser = argparse.ArgumentParser(
description="NoteFlow meeting retention management",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
subparsers = parser.add_subparsers(dest="command", help="Available commands")
# cleanup command
cleanup_parser = subparsers.add_parser("cleanup", help="Run retention cleanup")
cleanup_parser.add_argument(
"--dry-run",
action="store_true",
help="Report what would be deleted without deleting",
)
# status command
subparsers.add_parser("status", help="Show retention status")
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
if args.command == "cleanup":
exit_code = asyncio.run(_run_cleanup(dry_run=args.dry_run))
elif args.command == "status":
exit_code = asyncio.run(_show_status())
else:
parser.print_help()
exit_code = 1
sys.exit(exit_code)
if __name__ == "__main__":
main()
File: src/noteflow/client/components/_async_mixin.py
"""Mixin for async operations with loading/error state management.
Provides standardized handling for UI components that perform async operations,
including loading state, error handling, and UI thread dispatch.
"""
from __future__ import annotations
from collections.abc import Awaitable, Callable
from typing import TYPE_CHECKING, TypeVar
if TYPE_CHECKING:
import flet as ft
T = TypeVar("T")
class AsyncOperationMixin[T]:
"""Mixin providing standardized async operation handling.
Manages loading state, error handling, and UI thread dispatch for
Flet components that perform async operations.
Components using this mixin must have:
- `_page: ft.Page | None` attribute for UI updates
"""
_page: ft.Page | None
async def run_async_operation(
self,
operation: Callable[[], Awaitable[T]],
on_success: Callable[[T], None],
on_error: Callable[[str], None],
set_loading: Callable[[bool], None],
) -> T | None:
"""Run async operation with standardized state management.
Handles loading state, error catching, and UI thread dispatch.
All callbacks are dispatched to the UI thread.
Args:
operation: Async callable to execute.
on_success: Callback with result on success (called on UI thread).
on_error: Callback with error message on failure (called on UI thread).
set_loading: Callback to set loading state (called on UI thread).
Returns:
Result of operation on success, None on failure.
"""
self._dispatch_ui(lambda: set_loading(True))
try:
result = await operation()
# Capture result for closure
self._dispatch_ui(lambda r=result: on_success(r)) # type: ignore[misc]
return result
except Exception as e:
error_msg = str(e)
self._dispatch_ui(lambda msg=error_msg: on_error(msg)) # type: ignore[misc]
return None
finally:
self._dispatch_ui(lambda: set_loading(False))
def _dispatch_ui(self, callback: Callable[[], None]) -> None:
"""Dispatch callback to UI thread.
Safe to call even if page is None (no-op in that case).
Args:
callback: Function to execute on UI thread.
"""
if not self._page:
return
async def _runner() -> None:
callback()
# Flet expects a coroutine function here; schedule it.
self._page.run_task(_runner)
File: src/noteflow/client/components/annotation_display.py
"""Annotation display component for meeting review.
Display existing annotations during meeting review with type badges and clickable timestamps.
Reuses patterns from MeetingLibraryComponent (ListView) and SummaryPanelComponent (type badges).
"""
from __future__ import annotations
import logging
from collections.abc import Callable
from typing import TYPE_CHECKING
import flet as ft
# REUSE existing formatting utility
from noteflow.infrastructure.export._formatting import format_timestamp
if TYPE_CHECKING:
from noteflow.client.state import AppState
from noteflow.grpc.client import AnnotationInfo
logger = logging.getLogger(__name__)
# Annotation type colors (reused pattern from summary_panel.py)
ANNOTATION_TYPE_COLORS: dict[str, str] = {
"action_item": ft.Colors.GREEN_400,
"decision": ft.Colors.BLUE_400,
"note": ft.Colors.GREY_400,
"risk": ft.Colors.ORANGE_400,
}
ANNOTATION_TYPE_ICONS: dict[str, str] = {
"action_item": ft.Icons.CHECK_CIRCLE_OUTLINE,
"decision": ft.Icons.GAVEL,
"note": ft.Icons.NOTE,
"risk": ft.Icons.WARNING,
}
ANNOTATION_TYPE_LABELS: dict[str, str] = {
"action_item": "Action",
"decision": "Decision",
"note": "Note",
"risk": "Risk",
}
class AnnotationDisplayComponent:
"""Display existing annotations during meeting review.
Shows annotations sorted by start_time with type badges and clickable timestamps.
Reuses ListView pattern from MeetingLibraryComponent.
"""
def __init__(
self,
state: AppState,
on_annotation_seek: Callable[[float], None] | None = None,
) -> None:
"""Initialize annotation display.
Args:
state: Centralized application state.
on_annotation_seek: Callback when annotation is clicked (seek to timestamp).
"""
self._state = state
self._on_annotation_seek = on_annotation_seek
# UI elements
self._list_view: ft.ListView | None = None
self._header_text: ft.Text | None = None
self._container: ft.Container | None = None
# State
self._annotations: list[AnnotationInfo] = []
def build(self) -> ft.Container:
"""Build annotation display UI.
Returns:
Container with annotation list.
"""
self._header_text = ft.Text(
"Annotations (0)",
size=14,
weight=ft.FontWeight.BOLD,
)
self._list_view = ft.ListView(
spacing=5,
padding=10,
height=150,
)
self._container = ft.Container(
content=ft.Column(
[
self._header_text,
ft.Container(
content=self._list_view,
border=ft.border.all(1, ft.Colors.GREY_400),
border_radius=8,
),
],
spacing=5,
),
visible=False, # Hidden until annotations loaded
)
return self._container
def load_annotations(self, annotations: list[AnnotationInfo]) -> None:
"""Load and display annotations.
Args:
annotations: List of annotations to display.
"""
# Sort by start_time
self._annotations = sorted(annotations, key=lambda a: a.start_time)
self._state.run_on_ui_thread(self._render_annotations)
def clear(self) -> None:
"""Clear all annotations."""
self._annotations = []
if self._list_view:
self._list_view.controls.clear()
if self._header_text:
self._header_text.value = "Annotations (0)"
if self._container:
self._container.visible = False
self._state.request_update()
def _render_annotations(self) -> None:
"""Render annotation list (UI thread only)."""
if not self._list_view or not self._header_text or not self._container:
return
self._list_view.controls.clear()
for annotation in self._annotations:
self._list_view.controls.append(self._create_annotation_row(annotation))
# Update header and visibility
count = len(self._annotations)
self._header_text.value = f"Annotations ({count})"
self._container.visible = count > 0
self._state.request_update()
def _create_annotation_row(self, annotation: AnnotationInfo) -> ft.Container:
"""Create a row for an annotation.
Args:
annotation: Annotation to display.
Returns:
Container with annotation details.
"""
# Get type styling
atype = annotation.annotation_type
color = ANNOTATION_TYPE_COLORS.get(atype, ft.Colors.GREY_400)
icon = ANNOTATION_TYPE_ICONS.get(atype, ft.Icons.NOTE)
label = ANNOTATION_TYPE_LABELS.get(atype, atype.title())
# Format timestamp
time_str = format_timestamp(annotation.start_time)
# Type badge
badge = ft.Container(
content=ft.Row(
[
ft.Icon(icon, size=12, color=color),
ft.Text(label, size=10, color=color, weight=ft.FontWeight.BOLD),
],
spacing=2,
),
bgcolor=f"{color}20", # 20% opacity background
padding=ft.padding.symmetric(horizontal=6, vertical=2),
border_radius=4,
)
# Annotation text (truncated if long)
text = annotation.text
display_text = f"{text[:80]}..." if len(text) > 80 else text
row = ft.Row(
[
badge,
ft.Text(time_str, size=11, color=ft.Colors.GREY_600, width=50),
ft.Text(display_text, size=12, expand=True),
],
spacing=10,
)
return ft.Container(
content=row,
padding=8,
border_radius=4,
on_click=lambda e, a=annotation: self._on_annotation_click(a),
ink=True,
)
def _on_annotation_click(self, annotation: AnnotationInfo) -> None:
"""Handle annotation row click.
Args:
annotation: Clicked annotation.
"""
if self._on_annotation_seek:
self._on_annotation_seek(annotation.start_time)
logger.debug(
"Annotation seek: type=%s, time=%.2f",
annotation.annotation_type,
annotation.start_time,
)
File: src/noteflow/client/components/annotation_toolbar.py
"""Annotation toolbar component for adding action items, decisions, and notes.
Uses AnnotationInfo from grpc.client and NoteFlowClient.add_annotation().
Does not recreate any types - imports and uses existing ones.
"""
from __future__ import annotations
import logging
from collections.abc import Callable
from typing import TYPE_CHECKING
import flet as ft
if TYPE_CHECKING:
from noteflow.client.state import AppState
from noteflow.grpc.client import NoteFlowClient
logger = logging.getLogger(__name__)
class AnnotationToolbarComponent:
"""Toolbar for adding annotations during recording or playback.
Uses NoteFlowClient.add_annotation() to persist annotations.
"""
def __init__(
self,
state: AppState,
get_client: Callable[[], NoteFlowClient | None],
) -> None:
"""Initialize annotation toolbar.
Args:
state: Centralized application state.
get_client: Callable that returns current gRPC client or None.
"""
self._state = state
self._get_client = get_client
# UI elements
self._action_btn: ft.ElevatedButton | None = None
self._decision_btn: ft.ElevatedButton | None = None
self._note_btn: ft.ElevatedButton | None = None
self._risk_btn: ft.ElevatedButton | None = None
self._row: ft.Row | None = None
# Dialog elements
self._dialog: ft.AlertDialog | None = None
self._text_field: ft.TextField | None = None
self._current_annotation_type: str = ""
def build(self) -> ft.Row:
"""Build annotation toolbar UI.
Returns:
Row containing annotation buttons.
"""
self._action_btn = ft.ElevatedButton(
"Action Item",
icon=ft.Icons.CHECK_CIRCLE_OUTLINE,
on_click=lambda e: self._show_annotation_dialog("action_item"),
disabled=True,
)
self._decision_btn = ft.ElevatedButton(
"Decision",
icon=ft.Icons.GAVEL,
on_click=lambda e: self._show_annotation_dialog("decision"),
disabled=True,
)
self._note_btn = ft.ElevatedButton(
"Note",
icon=ft.Icons.NOTE_ADD,
on_click=lambda e: self._show_annotation_dialog("note"),
disabled=True,
)
self._risk_btn = ft.ElevatedButton(
"Risk",
icon=ft.Icons.WARNING_AMBER,
on_click=lambda e: self._show_annotation_dialog("risk"),
disabled=True,
)
self._row = ft.Row(
[self._action_btn, self._decision_btn, self._note_btn, self._risk_btn],
visible=False,
)
return self._row
def set_enabled(self, enabled: bool) -> None:
"""Enable or disable annotation buttons.
Args:
enabled: Whether buttons should be enabled.
"""
if self._action_btn:
self._action_btn.disabled = not enabled
if self._decision_btn:
self._decision_btn.disabled = not enabled
if self._note_btn:
self._note_btn.disabled = not enabled
if self._risk_btn:
self._risk_btn.disabled = not enabled
self._state.request_update()
def set_visible(self, visible: bool) -> None:
"""Set visibility of annotation toolbar.
Args:
visible: Whether toolbar should be visible.
"""
if self._row:
self._row.visible = visible
self._state.request_update()
def _show_annotation_dialog(self, annotation_type: str) -> None:
"""Show dialog for entering annotation text.
Args:
annotation_type: Type of annotation (action_item, decision, note).
"""
self._current_annotation_type = annotation_type
# Format type for display
type_display = annotation_type.replace("_", " ").title()
self._text_field = ft.TextField(
label=f"{type_display} Text",
multiline=True,
min_lines=2,
max_lines=4,
width=400,
autofocus=True,
)
self._dialog = ft.AlertDialog(
title=ft.Text(f"Add {type_display}"),
content=self._text_field,
actions=[
ft.TextButton("Cancel", on_click=self._close_dialog),
ft.ElevatedButton("Add", on_click=self._submit_annotation),
],
actions_alignment=ft.MainAxisAlignment.END,
)
# Show dialog
if self._state._page:
self._state._page.dialog = self._dialog
self._dialog.open = True
self._state.request_update()
def _close_dialog(self, e: ft.ControlEvent | None = None) -> None:
"""Close the annotation dialog."""
if self._dialog:
self._dialog.open = False
self._state.request_update()
def _submit_annotation(self, e: ft.ControlEvent) -> None:
"""Submit the annotation to the server."""
if not self._text_field:
return
text = self._text_field.value or ""
if not text.strip():
return
self._close_dialog()
# Get current timestamp
timestamp = self._get_current_timestamp()
# Submit to server
client = self._get_client()
if not client:
logger.warning("No gRPC client available for annotation")
return
meeting = self._state.current_meeting
if not meeting:
logger.warning("No current meeting for annotation")
return
try:
if annotation := client.add_annotation(
meeting_id=meeting.id,
annotation_type=self._current_annotation_type,
text=text.strip(),
start_time=timestamp,
end_time=timestamp, # Point annotation
):
self._state.annotations.append(annotation)
logger.info(
"Added annotation: %s at %.2f", self._current_annotation_type, timestamp
)
else:
logger.error("Failed to add annotation")
except Exception as exc:
logger.error("Error adding annotation: %s", exc)
def _get_current_timestamp(self) -> float:
"""Get current timestamp for annotation.
Returns timestamp from playback position (during playback) or
recording elapsed time (during recording).
Returns:
Current timestamp in seconds.
"""
# During playback, use playback position
if self._state.playback_position > 0:
return self._state.playback_position
# During recording, use elapsed seconds
return float(self._state.elapsed_seconds)
File: src/noteflow/client/components/playback_controls.py
"""Playback controls component with play/pause/stop and timeline.
Uses SoundDevicePlayback from infrastructure.audio and format_timestamp from _formatting.
Does not recreate any types - imports and uses existing ones.
"""
from __future__ import annotations
import logging
from collections.abc import Callable
from typing import TYPE_CHECKING, Final
import flet as ft
from noteflow.client.components._thread_mixin import BackgroundWorkerMixin
# REUSE existing types - do not recreate
from noteflow.infrastructure.audio import PlaybackState
from noteflow.infrastructure.export._formatting import format_timestamp
if TYPE_CHECKING:
from noteflow.client.state import AppState
logger = logging.getLogger(__name__)
POSITION_POLL_INTERVAL: Final[float] = 0.1 # 100ms for smooth timeline updates
class PlaybackControlsComponent(BackgroundWorkerMixin):
"""Audio playback controls with play/pause/stop and timeline.
Uses SoundDevicePlayback from state and format_timestamp from _formatting.
"""
def __init__(
self,
state: AppState,
on_position_change: Callable[[float], None] | None = None,
) -> None:
"""Initialize playback controls component.
Args:
state: Centralized application state.
on_position_change: Callback when playback position changes.
"""
self._state = state
self._on_position_change = on_position_change
self._init_worker()
# UI elements
self._play_btn: ft.IconButton | None = None
self._stop_btn: ft.IconButton | None = None
self._position_label: ft.Text | None = None
self._duration_label: ft.Text | None = None
self._timeline_slider: ft.Slider | None = None
self._row: ft.Row | None = None
def build(self) -> ft.Row:
"""Build playback controls UI.
Returns:
Row containing playback buttons and timeline.
"""
self._play_btn = ft.IconButton(
icon=ft.Icons.PLAY_ARROW,
icon_color=ft.Colors.GREEN,
tooltip="Play",
on_click=self._on_play_click,
disabled=True,
)
self._stop_btn = ft.IconButton(
icon=ft.Icons.STOP,
icon_color=ft.Colors.RED,
tooltip="Stop",
on_click=self._on_stop_click,
disabled=True,
)
self._position_label = ft.Text("00:00", size=12, width=50)
self._duration_label = ft.Text("00:00", size=12, width=50)
self._timeline_slider = ft.Slider(
min=0,
max=100,
value=0,
expand=True,
on_change=self._on_slider_change,
disabled=True,
)
self._row = ft.Row(
[
self._play_btn,
self._stop_btn,
self._position_label,
self._timeline_slider,
self._duration_label,
],
visible=False,
)
return self._row
def set_visible(self, visible: bool) -> None:
"""Set visibility of playback controls.
Args:
visible: Whether controls should be visible.
"""
if self._row:
self._row.visible = visible
self._state.request_update()
def load_audio(self) -> None:
"""Load session audio buffer for playback."""
buffer = self._state.session_audio_buffer
if not buffer:
logger.warning("No audio in session buffer")
return
# Play through SoundDevicePlayback
self._state.playback.play(buffer)
self._state.playback.pause() # Load but don't start
# Update UI state
duration = self._state.playback.total_duration
self._state.playback_position = 0.0
self._state.run_on_ui_thread(lambda: self._update_loaded_state(duration))
def _update_loaded_state(self, duration: float) -> None:
"""Update UI after audio is loaded (UI thread only)."""
if self._play_btn:
self._play_btn.disabled = False
if self._stop_btn:
self._stop_btn.disabled = False
if self._timeline_slider:
self._timeline_slider.disabled = False
self._timeline_slider.max = max(duration, 0.1)
self._timeline_slider.value = 0
if self._duration_label:
self._duration_label.value = format_timestamp(duration)
if self._position_label:
self._position_label.value = "00:00"
self.set_visible(True)
self._state.request_update()
def seek(self, position: float) -> None:
"""Seek to a specific position.
Args:
position: Position in seconds.
"""
if self._state.playback.seek(position):
self._state.playback_position = position
self._state.run_on_ui_thread(self._update_position_display)
def _on_play_click(self, e: ft.ControlEvent) -> None:
"""Handle play/pause button click."""
playback = self._state.playback
if playback.state == PlaybackState.PLAYING:
playback.pause()
self._stop_polling()
self._update_play_button(playing=False)
elif playback.state == PlaybackState.PAUSED:
playback.resume()
self._start_polling()
self._update_play_button(playing=True)
elif buffer := self._state.session_audio_buffer:
playback.play(buffer)
self._start_polling()
self._update_play_button(playing=True)
def _on_stop_click(self, e: ft.ControlEvent) -> None:
"""Handle stop button click."""
self._stop_polling()
self._state.playback.stop()
self._state.playback_position = 0.0
self._update_play_button(playing=False)
self._state.run_on_ui_thread(self._update_position_display)
def _on_slider_change(self, e: ft.ControlEvent) -> None:
"""Handle timeline slider change."""
if self._timeline_slider:
position = float(self._timeline_slider.value or 0)
self.seek(position)
def _update_play_button(self, *, playing: bool) -> None:
"""Update play button icon based on state."""
if self._play_btn:
if playing:
self._play_btn.icon = ft.Icons.PAUSE
self._play_btn.tooltip = "Pause"
else:
self._play_btn.icon = ft.Icons.PLAY_ARROW
self._play_btn.tooltip = "Play"
self._state.request_update()
def _start_polling(self) -> None:
"""Start position polling thread."""
self._start_worker(self._poll_loop, "PlaybackPositionPoll")
def _stop_polling(self) -> None:
"""Stop position polling thread."""
self._stop_worker()
def _poll_loop(self) -> None:
"""Background polling loop for position updates."""
while self._should_run():
playback = self._state.playback
if playback.state == PlaybackState.PLAYING:
position = playback.current_position
self._state.playback_position = position
self._state.run_on_ui_thread(self._update_position_display)
# Notify callback
if self._on_position_change:
try:
self._on_position_change(position)
except Exception as e:
logger.error("Position change callback error: %s", e)
elif playback.state == PlaybackState.STOPPED:
# Playback finished - update UI and stop polling
self._state.run_on_ui_thread(self._on_playback_finished)
break
self._wait_interval(POSITION_POLL_INTERVAL)
def _update_position_display(self) -> None:
"""Update position display elements (UI thread only)."""
position = self._state.playback_position
if self._position_label:
self._position_label.value = format_timestamp(position)
if self._timeline_slider and not self._timeline_slider.disabled:
# Only update if user isn't dragging
self._timeline_slider.value = position
self._state.request_update()
def _on_playback_finished(self) -> None:
"""Handle playback completion (UI thread only)."""
self._update_play_button(playing=False)
self._state.playback_position = 0.0
self._update_position_display()
File: src/noteflow/client/components/recording_timer.py
"""Recording timer component with background thread.
Uses format_timestamp() from infrastructure/export/_formatting.py (not local implementation).
"""
from __future__ import annotations
import time
from typing import TYPE_CHECKING, Final
import flet as ft
from noteflow.client.components._thread_mixin import BackgroundWorkerMixin
# REUSE existing formatting utility - do not recreate
from noteflow.infrastructure.export._formatting import format_timestamp
if TYPE_CHECKING:
from noteflow.client.state import AppState
TIMER_UPDATE_INTERVAL: Final[float] = 1.0
class RecordingTimerComponent(BackgroundWorkerMixin):
"""Recording duration timer with background thread.
Uses format_timestamp() from export._formatting (not local implementation).
"""
def __init__(self, state: AppState) -> None:
"""Initialize timer component.
Args:
state: Centralized application state.
"""
self._state = state
self._init_worker()
self._dot: ft.Icon | None = None
self._label: ft.Text | None = None
self._row: ft.Row | None = None
def build(self) -> ft.Row:
"""Build timer UI elements.
Returns:
Row containing recording dot and time label.
"""
self._dot = ft.Icon(
ft.Icons.FIBER_MANUAL_RECORD,
color=ft.Colors.RED,
size=16,
)
self._label = ft.Text(
"00:00",
size=20,
weight=ft.FontWeight.BOLD,
color=ft.Colors.RED,
)
self._row = ft.Row(
controls=[self._dot, self._label],
visible=False,
)
return self._row
def start(self) -> None:
"""Start the recording timer."""
self._state.recording_start_time = time.time()
self._state.elapsed_seconds = 0
if self._row:
self._row.visible = True
if self._label:
self._label.value = "00:00"
self._start_worker(self._timer_loop, "RecordingTimer")
self._state.request_update()
def stop(self) -> None:
"""Stop the recording timer."""
self._stop_worker(timeout=2.0)
if self._row:
self._row.visible = False
self._state.recording_start_time = None
self._state.request_update()
def _timer_loop(self) -> None:
"""Background timer loop."""
while self._should_run():
if self._state.recording_start_time is not None:
self._state.elapsed_seconds = int(time.time() - self._state.recording_start_time)
self._state.run_on_ui_thread(self._update_display)
self._wait_interval(TIMER_UPDATE_INTERVAL)
def _update_display(self) -> None:
"""Update timer display (UI thread only)."""
if not self._label:
return
# REUSE existing format_timestamp from _formatting.py
self._label.value = format_timestamp(float(self._state.elapsed_seconds))
self._state.request_update()
File: src/noteflow/client/components/summary_panel.py
"""Summary panel component for evidence-linked meeting summaries.
Uses existing patterns from MeetingLibraryComponent and TranscriptComponent.
Does not recreate any types - imports and uses existing domain entities.
"""
from __future__ import annotations
import logging
from collections.abc import Callable
from typing import TYPE_CHECKING
from uuid import UUID
import flet as ft
if TYPE_CHECKING:
from noteflow.application.services import SummarizationService
from noteflow.client.state import AppState
from noteflow.domain.entities import ActionItem, KeyPoint, Summary
from noteflow.domain.value_objects import MeetingId
logger = logging.getLogger(__name__)
# Priority color mapping
PRIORITY_COLORS: dict[int, str] = {
0: ft.Colors.GREY_400, # Unspecified
1: ft.Colors.BLUE_400, # Low
2: ft.Colors.ORANGE_400, # Medium
3: ft.Colors.RED_400, # High
}
PRIORITY_LABELS: dict[int, str] = {
0: "—",
1: "Low",
2: "Med",
3: "High",
}
class SummaryPanelComponent:
"""Summary panel with evidence-linked key points and action items.
Displays executive summary, key points with citations, and action items
with priority badges. Citation chips link back to transcript segments.
"""
def __init__(
self,
state: AppState,
get_service: Callable[[], SummarizationService | None],
on_citation_click: Callable[[int], None] | None = None,
) -> None:
"""Initialize summary panel.
Args:
state: Centralized application state.
get_service: Callable to get summarization service.
on_citation_click: Callback when citation chip is clicked (segment_id).
"""
self._state = state
self._get_service = get_service
self._on_citation_click = on_citation_click
# Uncited drafts tracking
self._show_uncited: bool = False
self._original_summary: Summary | None = None
self._filtered_summary: Summary | None = None
self._uncited_key_points: int = 0
self._uncited_action_items: int = 0
# UI references (set in build)
self._container: ft.Container | None = None
self._summary_text: ft.Text | None = None
self._key_points_list: ft.ListView | None = None
self._action_items_list: ft.ListView | None = None
self._generate_btn: ft.ElevatedButton | None = None
self._loading_indicator: ft.ProgressRing | None = None
self._error_text: ft.Text | None = None
self._uncited_toggle: ft.Switch | None = None
self._uncited_count_text: ft.Text | None = None
def build(self) -> ft.Container:
"""Build the summary panel UI.
Returns:
Container with summary panel content.
"""
# Executive summary section
self._summary_text = ft.Text(
"",
size=14,
selectable=True,
)
# Key points list with citation chips
self._key_points_list = ft.ListView(
spacing=5,
height=150,
padding=5,
)
# Action items list with priority badges
self._action_items_list = ft.ListView(
spacing=5,
height=150,
padding=5,
)
# Generate button
self._generate_btn = ft.ElevatedButton(
"Generate Summary",
icon=ft.Icons.AUTO_AWESOME,
on_click=self._on_generate_click,
disabled=True,
)
# Loading/error states
self._loading_indicator = ft.ProgressRing(
visible=False,
width=20,
height=20,
)
self._error_text = ft.Text(
"",
color=ft.Colors.RED_400,
visible=False,
size=12,
)
# Uncited drafts toggle
self._uncited_count_text = ft.Text(
"",
size=11,
color=ft.Colors.GREY_600,
visible=False,
)
self._uncited_toggle = ft.Switch(
label="Show uncited",
value=False,
on_change=self._on_uncited_toggle,
visible=False,
scale=0.8,
)
summary_container = ft.Container(
content=self._summary_text,
padding=10,
bgcolor=ft.Colors.GREY_100,
border_radius=4,
)
self._container = ft.Container(
content=ft.Column(
[
ft.Row(
[
ft.Text("Summary", size=16, weight=ft.FontWeight.BOLD),
self._generate_btn,
self._loading_indicator,
ft.Container(expand=True), # Spacer
self._uncited_count_text,
self._uncited_toggle,
],
alignment=ft.MainAxisAlignment.START,
spacing=10,
),
self._error_text,
summary_container,
ft.Text("Key Points:", size=14, weight=ft.FontWeight.BOLD),
ft.Container(
content=self._key_points_list,
border=ft.border.all(1, ft.Colors.GREY_300),
border_radius=4,
),
ft.Text("Action Items:", size=14, weight=ft.FontWeight.BOLD),
ft.Container(
content=self._action_items_list,
border=ft.border.all(1, ft.Colors.GREY_300),
border_radius=4,
),
],
spacing=10,
),
visible=False,
)
return self._container
def set_visible(self, visible: bool) -> None:
"""Set panel visibility.
Args:
visible: Whether panel should be visible.
"""
if self._container:
self._container.visible = visible
self._state.request_update()
def set_enabled(self, enabled: bool) -> None:
"""Set generate button enabled state.
Args:
enabled: Whether generate button should be enabled.
"""
if self._generate_btn:
self._generate_btn.disabled = not enabled
self._state.request_update()
def _on_generate_click(self, e: ft.ControlEvent) -> None:
"""Handle generate button click."""
if self._state._page:
self._state._page.run_task(self._generate_summary)
async def _generate_summary(self) -> None:
"""Generate summary asynchronously."""
service = self._get_service()
if not service:
self._show_error("Summarization service not available")
return
if not self._state.current_meeting:
self._show_error("No meeting selected")
return
if not self._state.transcript_segments:
self._show_error("No transcript segments to summarize")
return
# Convert TranscriptSegment to domain Segment
segments = self._convert_segments()
self._state.summary_loading = True
self._state.summary_error = None
self._update_loading_state()
# Convert meeting id string to MeetingId
try:
meeting_uuid = UUID(str(self._state.current_meeting.id))
except (AttributeError, ValueError) as exc:
self._show_error("Invalid meeting id")
logger.error("Invalid meeting id for summary: %s", exc)
self._state.summary_loading = False
self._state.run_on_ui_thread(self._update_loading_state)
return
meeting_id = MeetingId(meeting_uuid)
try:
result = await service.summarize(
meeting_id=meeting_id,
segments=segments,
)
# Track original and filtered summaries for toggle
self._original_summary = result.result.summary
self._filtered_summary = result.filtered_summary
self._state.current_summary = result.summary
# Calculate uncited counts
self._calculate_uncited_counts()
self._state.run_on_ui_thread(self._render_summary)
# Log provider info
logger.info(
"Summary generated by %s (fallback=%s)",
result.provider_used,
result.fallback_used,
)
except Exception as exc:
logger.exception("Summarization failed")
error_msg = str(exc)
self._state.summary_error = error_msg
self._state.run_on_ui_thread(lambda msg=error_msg: self._show_error(msg))
finally:
self._state.summary_loading = False
self._state.run_on_ui_thread(self._update_loading_state)
def _convert_segments(self) -> list:
"""Convert TranscriptSegment to domain Segment for service call.
Returns:
List of domain Segment entities.
"""
from noteflow.domain.entities import Segment
segments = []
for ts in self._state.transcript_segments:
seg = Segment(
segment_id=ts.segment_id,
text=ts.text,
start_time=ts.start_time,
end_time=ts.end_time,
language=ts.language,
)
segments.append(seg)
return segments
def _update_loading_state(self) -> None:
"""Update loading indicator visibility."""
if self._loading_indicator:
self._loading_indicator.visible = self._state.summary_loading
if self._generate_btn:
self._generate_btn.disabled = self._state.summary_loading
self._state.request_update()
def _show_error(self, message: str) -> None:
"""Show error message.
Args:
message: Error message to display.
"""
if self._error_text:
self._error_text.value = message
self._error_text.visible = True
self._state.request_update()
def _clear_error(self) -> None:
"""Clear error message."""
if self._error_text:
self._error_text.value = ""
self._error_text.visible = False
self._state.request_update()
def _render_summary(self) -> None:
"""Render summary content (UI thread only)."""
summary = self._get_display_summary()
if not summary:
return
self._clear_error()
# Update uncited toggle visibility
self._update_uncited_ui()
# Executive summary
if self._summary_text:
self._summary_text.value = summary.executive_summary or "No summary generated."
# Key points
if self._key_points_list:
self._key_points_list.controls.clear()
for idx, kp in enumerate(summary.key_points):
self._key_points_list.controls.append(self._create_key_point_row(kp, idx))
# Action items
if self._action_items_list:
self._action_items_list.controls.clear()
for idx, ai in enumerate(summary.action_items):
self._action_items_list.controls.append(self._create_action_item_row(ai, idx))
self._state.request_update()
def _create_key_point_row(self, kp: KeyPoint, index: int) -> ft.Container:
"""Create a row for a key point.
Args:
kp: Key point to display.
index: Index in the list.
Returns:
Container with key point content.
"""
# Citation chips
citation_chips = ft.Row(
[self._create_citation_chip(sid) for sid in kp.segment_ids],
spacing=4,
)
# Evidence indicator
evidence_icon = ft.Icon(
ft.Icons.CHECK_CIRCLE if kp.has_evidence() else ft.Icons.HELP_OUTLINE,
size=16,
color=ft.Colors.GREEN_400 if kp.has_evidence() else ft.Colors.GREY_400,
)
row = ft.Row(
[
ft.Text(f"{index + 1}.", size=12, color=ft.Colors.GREY_600, width=20),
evidence_icon,
ft.Text(kp.text, size=13, expand=True),
citation_chips,
],
spacing=8,
vertical_alignment=ft.CrossAxisAlignment.START,
)
return ft.Container(
content=row,
padding=ft.padding.symmetric(horizontal=8, vertical=4),
border_radius=4,
)
def _create_action_item_row(self, ai: ActionItem, index: int) -> ft.Container:
"""Create a row for an action item.
Args:
ai: Action item to display.
index: Index in the list.
Returns:
Container with action item content.
"""
# Priority badge
priority_badge = self._create_priority_badge(ai.priority)
# Assignee
assignee_text = ft.Text(
ai.assignee if ai.is_assigned() else "Unassigned",
size=11,
color=ft.Colors.BLUE_700 if ai.is_assigned() else ft.Colors.GREY_500,
italic=not ai.is_assigned(),
)
# Citation chips
citation_chips = ft.Row(
[self._create_citation_chip(sid) for sid in ai.segment_ids],
spacing=4,
)
# Evidence indicator
evidence_icon = ft.Icon(
ft.Icons.CHECK_CIRCLE if ai.has_evidence() else ft.Icons.HELP_OUTLINE,
size=16,
color=ft.Colors.GREEN_400 if ai.has_evidence() else ft.Colors.GREY_400,
)
row = ft.Row(
[
ft.Text(f"{index + 1}.", size=12, color=ft.Colors.GREY_600, width=20),
priority_badge,
evidence_icon,
ft.Column(
[
ft.Text(ai.text, size=13),
assignee_text,
],
spacing=2,
expand=True,
),
citation_chips,
],
spacing=8,
vertical_alignment=ft.CrossAxisAlignment.START,
)
return ft.Container(
content=row,
padding=ft.padding.symmetric(horizontal=8, vertical=4),
border_radius=4,
)
def _create_priority_badge(self, priority: int) -> ft.Container:
"""Create priority indicator badge.
Args:
priority: Priority level (0-3).
Returns:
Container with priority badge.
"""
return ft.Container(
content=ft.Text(
PRIORITY_LABELS.get(priority, "—"),
size=10,
color=ft.Colors.WHITE,
),
bgcolor=PRIORITY_COLORS.get(priority, ft.Colors.GREY_400),
border_radius=4,
padding=ft.padding.symmetric(horizontal=6, vertical=2),
width=35,
alignment=ft.alignment.center,
)
def _create_citation_chip(self, segment_id: int) -> ft.Container:
"""Create clickable citation chip.
Args:
segment_id: Segment ID to link to.
Returns:
Container with citation chip.
"""
return ft.Container(
content=ft.Text(
f"[#{segment_id}]",
size=11,
color=ft.Colors.BLUE_700,
),
bgcolor=ft.Colors.BLUE_50,
border_radius=4,
padding=ft.padding.symmetric(horizontal=6, vertical=2),
on_click=lambda _: self._handle_citation_click(segment_id),
ink=True,
)
def _handle_citation_click(self, segment_id: int) -> None:
"""Handle citation chip click.
Args:
segment_id: Segment ID that was clicked.
"""
if self._on_citation_click:
self._on_citation_click(segment_id)
def _calculate_uncited_counts(self) -> None:
"""Calculate number of uncited items filtered out."""
if not self._original_summary or not self._filtered_summary:
self._uncited_key_points = 0
self._uncited_action_items = 0
return
original_kp = len(self._original_summary.key_points)
filtered_kp = len(self._filtered_summary.key_points)
self._uncited_key_points = original_kp - filtered_kp
original_ai = len(self._original_summary.action_items)
filtered_ai = len(self._filtered_summary.action_items)
self._uncited_action_items = original_ai - filtered_ai
def _has_uncited_items(self) -> bool:
"""Check if any uncited items exist."""
return self._uncited_key_points > 0 or self._uncited_action_items > 0
def _on_uncited_toggle(self, e: ft.ControlEvent) -> None:
"""Handle uncited drafts toggle change."""
self._show_uncited = e.control.value
self._render_summary()
def _update_uncited_ui(self) -> None:
"""Update uncited toggle visibility and count text."""
has_uncited = self._has_uncited_items()
if self._uncited_toggle:
self._uncited_toggle.visible = has_uncited
if self._uncited_count_text:
if has_uncited:
total_uncited = self._uncited_key_points + self._uncited_action_items
self._uncited_count_text.value = f"({total_uncited} hidden)"
self._uncited_count_text.visible = not self._show_uncited
else:
self._uncited_count_text.visible = False
def _get_display_summary(self) -> Summary | None:
"""Get summary to display based on toggle state.
Returns:
Original summary if showing uncited, filtered otherwise.
"""
if self._show_uncited and self._original_summary:
return self._original_summary
return self._state.current_summary
File: src/noteflow/config/init.py
"""NoteFlow configuration module."""
from .constants import DEFAULT_GRPC_PORT, DEFAULT_SAMPLE_RATE, MAX_GRPC_MESSAGE_SIZE
from .settings import Settings, TriggerSettings, get_settings, get_trigger_settings
__all__ = [
"DEFAULT_GRPC_PORT",
"DEFAULT_SAMPLE_RATE",
"MAX_GRPC_MESSAGE_SIZE",
"Settings",
"TriggerSettings",
"get_settings",
"get_trigger_settings",
]
File: src/noteflow/domain/entities/segment.py
"""Segment entity for transcript segments."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from noteflow.domain.value_objects import MeetingId
@dataclass
class WordTiming:
"""Word-level timing information within a segment."""
word: str
start_time: float
end_time: float
probability: float
def __post_init__(self) -> None:
"""Validate word timing."""
if self.end_time < self.start_time:
raise ValueError(
f"end_time ({self.end_time}) must be >= start_time ({self.start_time})"
)
if not 0.0 <= self.probability <= 1.0:
raise ValueError(f"probability must be between 0 and 1, got {self.probability}")
@dataclass
class Segment:
"""Transcript segment entity.
Represents a finalized segment of transcribed speech with optional
word-level timing information and language detection.
"""
segment_id: int
text: str
start_time: float
end_time: float
meeting_id: MeetingId | None = None
words: list[WordTiming] = field(default_factory=list)
language: str = "en"
language_confidence: float = 0.0
avg_logprob: float = 0.0
no_speech_prob: float = 0.0
embedding: list[float] | None = None
# Speaker diarization (populated by diarization engine)
speaker_id: str | None = None
speaker_confidence: float = 0.0
# Database primary key (set after persistence)
db_id: int | None = None
def __post_init__(self) -> None:
"""Validate segment data."""
if self.end_time < self.start_time:
raise ValueError(
f"end_time ({self.end_time}) must be >= start_time ({self.start_time})"
)
if self.segment_id < 0:
raise ValueError(f"segment_id must be non-negative, got {self.segment_id}")
@property
def duration(self) -> float:
"""Segment duration in seconds."""
return self.end_time - self.start_time
@property
def word_count(self) -> int:
"""Number of words in segment."""
return len(self.words) if self.words else len(self.text.split())
def has_embedding(self) -> bool:
"""Check if segment has a computed embedding."""
return self.embedding is not None and len(self.embedding) > 0
File: src/noteflow/domain/ports/repositories.py
"""Repository protocol interfaces for persistence."""
from __future__ import annotations
from collections.abc import Sequence
from datetime import datetime
from typing import TYPE_CHECKING, Protocol
if TYPE_CHECKING:
from noteflow.domain.entities import Annotation, Meeting, Segment, Summary
from noteflow.domain.value_objects import AnnotationId, MeetingId, MeetingState
class MeetingRepository(Protocol):
"""Repository protocol for Meeting aggregate operations."""
async def create(self, meeting: Meeting) -> Meeting:
"""Persist a new meeting.
Args:
meeting: Meeting to create.
Returns:
Created meeting with any generated fields populated.
"""
...
async def get(self, meeting_id: MeetingId) -> Meeting | None:
"""Retrieve a meeting by ID.
Args:
meeting_id: Meeting identifier.
Returns:
Meeting if found, None otherwise.
"""
...
async def update(self, meeting: Meeting) -> Meeting:
"""Update an existing meeting.
Args:
meeting: Meeting with updated fields.
Returns:
Updated meeting.
Raises:
ValueError: If meeting does not exist.
"""
...
async def delete(self, meeting_id: MeetingId) -> bool:
"""Delete a meeting and all associated data.
Args:
meeting_id: Meeting identifier.
Returns:
True if deleted, False if not found.
"""
...
async def list_all(
self,
states: list[MeetingState] | None = None,
limit: int = 100,
offset: int = 0,
sort_desc: bool = True,
) -> tuple[Sequence[Meeting], int]:
"""List meetings with optional filtering.
Args:
states: Optional list of states to filter by.
limit: Maximum number of meetings to return.
offset: Number of meetings to skip.
sort_desc: Sort by created_at descending if True.
Returns:
Tuple of (meetings list, total count matching filter).
"""
...
async def count_by_state(self, state: MeetingState) -> int:
"""Count meetings in a specific state.
Args:
state: Meeting state to count.
Returns:
Number of meetings in the specified state.
"""
...
async def find_older_than(self, cutoff: datetime) -> Sequence[Meeting]:
"""Find completed meetings older than cutoff date.
Args:
cutoff: Cutoff datetime; meetings ended before this are returned.
Returns:
Sequence of meetings with ended_at before cutoff.
"""
...
class SegmentRepository(Protocol):
"""Repository protocol for Segment operations."""
async def add(self, meeting_id: MeetingId, segment: Segment) -> Segment:
"""Add a segment to a meeting.
Args:
meeting_id: Meeting identifier.
segment: Segment to add.
Returns:
Added segment with db_id populated.
Raises:
ValueError: If meeting does not exist.
"""
...
async def add_batch(
self,
meeting_id: MeetingId,
segments: Sequence[Segment],
) -> Sequence[Segment]:
"""Add multiple segments to a meeting in batch.
Args:
meeting_id: Meeting identifier.
segments: Segments to add.
Returns:
Added segments with db_ids populated.
Raises:
ValueError: If meeting does not exist.
"""
...
async def get_by_meeting(
self,
meeting_id: MeetingId,
include_words: bool = True,
) -> Sequence[Segment]:
"""Get all segments for a meeting.
Args:
meeting_id: Meeting identifier.
include_words: Include word-level timing.
Returns:
List of segments ordered by segment_id.
"""
...
async def search_semantic(
self,
query_embedding: list[float],
limit: int = 10,
meeting_id: MeetingId | None = None,
) -> Sequence[tuple[Segment, float]]:
"""Search segments by semantic similarity.
Args:
query_embedding: Query embedding vector.
limit: Maximum number of results.
meeting_id: Optional meeting to restrict search to.
Returns:
List of (segment, similarity_score) tuples.
"""
...
async def update_embedding(
self,
segment_db_id: int,
embedding: list[float],
) -> None:
"""Update the embedding for a segment.
Args:
segment_db_id: Segment database primary key.
embedding: New embedding vector.
"""
...
class SummaryRepository(Protocol):
"""Repository protocol for Summary operations."""
async def save(self, summary: Summary) -> Summary:
"""Save or update a meeting summary.
Args:
summary: Summary to save.
Returns:
Saved summary with db_id populated.
"""
...
async def get_by_meeting(self, meeting_id: MeetingId) -> Summary | None:
"""Get summary for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
Summary if exists, None otherwise.
"""
...
async def delete_by_meeting(self, meeting_id: MeetingId) -> bool:
"""Delete summary for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
True if deleted, False if not found.
"""
...
class AnnotationRepository(Protocol):
"""Repository protocol for Annotation operations."""
async def add(self, annotation: Annotation) -> Annotation:
"""Add an annotation to a meeting.
Args:
annotation: Annotation to add.
Returns:
Added annotation with db_id populated.
Raises:
ValueError: If meeting does not exist.
"""
...
async def get(self, annotation_id: AnnotationId) -> Annotation | None:
"""Retrieve an annotation by ID.
Args:
annotation_id: Annotation identifier.
Returns:
Annotation if found, None otherwise.
"""
...
async def get_by_meeting(
self,
meeting_id: MeetingId,
) -> Sequence[Annotation]:
"""Get all annotations for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
List of annotations ordered by start_time.
"""
...
async def get_by_time_range(
self,
meeting_id: MeetingId,
start_time: float,
end_time: float,
) -> Sequence[Annotation]:
"""Get annotations within a time range.
Args:
meeting_id: Meeting identifier.
start_time: Start of time range in seconds.
end_time: End of time range in seconds.
Returns:
List of annotations overlapping the time range.
"""
...
async def update(self, annotation: Annotation) -> Annotation:
"""Update an existing annotation.
Args:
annotation: Annotation with updated fields.
Returns:
Updated annotation.
Raises:
ValueError: If annotation does not exist.
"""
...
async def delete(self, annotation_id: AnnotationId) -> bool:
"""Delete an annotation.
Args:
annotation_id: Annotation identifier.
Returns:
True if deleted, False if not found.
"""
...
File: src/noteflow/domain/triggers/entities.py
"""Trigger domain entities and value objects.
Define trigger signals, decisions, and actions for meeting detection.
"""
from __future__ import annotations
import time
from dataclasses import dataclass, field
from enum import Enum
class TriggerSource(Enum):
"""Source of a trigger signal."""
AUDIO_ACTIVITY = "audio_activity"
FOREGROUND_APP = "foreground_app"
CALENDAR = "calendar" # Deferred - optional connector
class TriggerAction(Enum):
"""Action determined by trigger evaluation."""
IGNORE = "ignore" # Confidence < 0.40
NOTIFY = "notify" # Confidence 0.40-0.79
AUTO_START = "auto_start" # Confidence >= 0.80 (if enabled)
@dataclass(frozen=True)
class TriggerSignal:
"""A signal from a single trigger source.
Attributes:
source: The source that generated this signal.
weight: Confidence contribution (0.0-1.0).
app_name: For foreground app signals, the detected app name.
timestamp: When the signal was generated (monotonic time).
"""
source: TriggerSource
weight: float
app_name: str | None = None
timestamp: float = field(default_factory=time.monotonic)
def __post_init__(self) -> None:
"""Validate weight is in valid range."""
if not 0.0 <= self.weight <= 1.0:
msg = f"Weight must be 0.0-1.0, got {self.weight}"
raise ValueError(msg)
@dataclass(frozen=True)
class TriggerDecision:
"""Result of trigger evaluation.
Attributes:
action: The determined action (ignore, notify, auto_start).
confidence: Total confidence score from all signals.
signals: The signals that contributed to this decision.
timestamp: When the decision was made (monotonic time).
"""
action: TriggerAction
confidence: float
signals: tuple[TriggerSignal, ...]
timestamp: float = field(default_factory=time.monotonic)
@property
def primary_signal(self) -> TriggerSignal | None:
"""Get the signal with highest weight contribution."""
return max(self.signals, key=lambda s: s.weight) if self.signals else None
@property
def detected_app(self) -> str | None:
"""Get the detected app name from any signal if present."""
return next((signal.app_name for signal in self.signals if signal.app_name), None)
File: src/noteflow/domain/value_objects.py
"""Domain value objects for NoteFlow."""
from __future__ import annotations
from enum import Enum, IntEnum
from typing import NewType
from uuid import UUID
# Type-safe identifiers
MeetingId = NewType("MeetingId", UUID)
AnnotationId = NewType("AnnotationId", UUID)
class AnnotationType(Enum):
"""User annotation type.
Used to categorize user-created annotations during recording.
Distinct from LLM-extracted ActionItem/KeyPoint in summaries.
"""
ACTION_ITEM = "action_item"
DECISION = "decision"
NOTE = "note"
RISK = "risk"
class MeetingState(IntEnum):
"""Meeting lifecycle state.
State transitions:
CREATED -> RECORDING -> STOPPING -> STOPPED -> COMPLETED
Any state -> ERROR (on failure)
The STOPPING state allows graceful shutdown with audio flush operations.
"""
UNSPECIFIED = 0
CREATED = 1
RECORDING = 2
STOPPED = 3
COMPLETED = 4
ERROR = 5
STOPPING = 6 # Intermediate state for graceful shutdown
@classmethod
def from_int(cls, value: int) -> MeetingState:
"""Convert integer to MeetingState.
Args:
value: Integer value.
Returns:
Corresponding MeetingState.
Raises:
ValueError: If value is not a valid state.
"""
try:
return cls(value)
except ValueError as e:
raise ValueError(f"Invalid meeting state: {value}") from e
def can_transition_to(self, target: MeetingState) -> bool:
"""Check if transition to target state is valid.
Args:
target: Target state.
Returns:
True if transition is valid.
"""
valid_transitions: dict[MeetingState, set[MeetingState]] = {
MeetingState.UNSPECIFIED: {MeetingState.CREATED},
MeetingState.CREATED: {MeetingState.RECORDING, MeetingState.ERROR},
MeetingState.RECORDING: {MeetingState.STOPPING, MeetingState.ERROR},
MeetingState.STOPPING: {MeetingState.STOPPED, MeetingState.ERROR},
MeetingState.STOPPED: {MeetingState.COMPLETED, MeetingState.ERROR},
MeetingState.COMPLETED: {MeetingState.ERROR},
MeetingState.ERROR: set(), # Terminal state
}
return target in valid_transitions.get(self, set())
File: src/noteflow/grpc/proto/noteflow_pb2.pyi
from google.protobuf.internal import containers as _containers
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from collections.abc import Iterable as _Iterable, Mapping as _Mapping
from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
DESCRIPTOR: _descriptor.FileDescriptor
class UpdateType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
UPDATE_TYPE_UNSPECIFIED: _ClassVar[UpdateType]
UPDATE_TYPE_PARTIAL: _ClassVar[UpdateType]
UPDATE_TYPE_FINAL: _ClassVar[UpdateType]
UPDATE_TYPE_VAD_START: _ClassVar[UpdateType]
UPDATE_TYPE_VAD_END: _ClassVar[UpdateType]
class MeetingState(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
MEETING_STATE_UNSPECIFIED: _ClassVar[MeetingState]
MEETING_STATE_CREATED: _ClassVar[MeetingState]
MEETING_STATE_RECORDING: _ClassVar[MeetingState]
MEETING_STATE_STOPPED: _ClassVar[MeetingState]
MEETING_STATE_COMPLETED: _ClassVar[MeetingState]
MEETING_STATE_ERROR: _ClassVar[MeetingState]
class SortOrder(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
SORT_ORDER_UNSPECIFIED: _ClassVar[SortOrder]
SORT_ORDER_CREATED_DESC: _ClassVar[SortOrder]
SORT_ORDER_CREATED_ASC: _ClassVar[SortOrder]
class Priority(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
PRIORITY_UNSPECIFIED: _ClassVar[Priority]
PRIORITY_LOW: _ClassVar[Priority]
PRIORITY_MEDIUM: _ClassVar[Priority]
PRIORITY_HIGH: _ClassVar[Priority]
class AnnotationType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
ANNOTATION_TYPE_UNSPECIFIED: _ClassVar[AnnotationType]
ANNOTATION_TYPE_ACTION_ITEM: _ClassVar[AnnotationType]
ANNOTATION_TYPE_DECISION: _ClassVar[AnnotationType]
ANNOTATION_TYPE_NOTE: _ClassVar[AnnotationType]
ANNOTATION_TYPE_RISK: _ClassVar[AnnotationType]
class ExportFormat(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
EXPORT_FORMAT_UNSPECIFIED: _ClassVar[ExportFormat]
EXPORT_FORMAT_MARKDOWN: _ClassVar[ExportFormat]
EXPORT_FORMAT_HTML: _ClassVar[ExportFormat]
class JobStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
JOB_STATUS_UNSPECIFIED: _ClassVar[JobStatus]
JOB_STATUS_QUEUED: _ClassVar[JobStatus]
JOB_STATUS_RUNNING: _ClassVar[JobStatus]
JOB_STATUS_COMPLETED: _ClassVar[JobStatus]
JOB_STATUS_FAILED: _ClassVar[JobStatus]
UPDATE_TYPE_UNSPECIFIED: UpdateType
UPDATE_TYPE_PARTIAL: UpdateType
UPDATE_TYPE_FINAL: UpdateType
UPDATE_TYPE_VAD_START: UpdateType
UPDATE_TYPE_VAD_END: UpdateType
MEETING_STATE_UNSPECIFIED: MeetingState
MEETING_STATE_CREATED: MeetingState
MEETING_STATE_RECORDING: MeetingState
MEETING_STATE_STOPPED: MeetingState
MEETING_STATE_COMPLETED: MeetingState
MEETING_STATE_ERROR: MeetingState
SORT_ORDER_UNSPECIFIED: SortOrder
SORT_ORDER_CREATED_DESC: SortOrder
SORT_ORDER_CREATED_ASC: SortOrder
PRIORITY_UNSPECIFIED: Priority
PRIORITY_LOW: Priority
PRIORITY_MEDIUM: Priority
PRIORITY_HIGH: Priority
ANNOTATION_TYPE_UNSPECIFIED: AnnotationType
ANNOTATION_TYPE_ACTION_ITEM: AnnotationType
ANNOTATION_TYPE_DECISION: AnnotationType
ANNOTATION_TYPE_NOTE: AnnotationType
ANNOTATION_TYPE_RISK: AnnotationType
EXPORT_FORMAT_UNSPECIFIED: ExportFormat
EXPORT_FORMAT_MARKDOWN: ExportFormat
EXPORT_FORMAT_HTML: ExportFormat
JOB_STATUS_UNSPECIFIED: JobStatus
JOB_STATUS_QUEUED: JobStatus
JOB_STATUS_RUNNING: JobStatus
JOB_STATUS_COMPLETED: JobStatus
JOB_STATUS_FAILED: JobStatus
class AudioChunk(_message.Message):
__slots__ = ("meeting_id", "audio_data", "timestamp", "sample_rate", "channels")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
AUDIO_DATA_FIELD_NUMBER: _ClassVar[int]
TIMESTAMP_FIELD_NUMBER: _ClassVar[int]
SAMPLE_RATE_FIELD_NUMBER: _ClassVar[int]
CHANNELS_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
audio_data: bytes
timestamp: float
sample_rate: int
channels: int
def __init__(self, meeting_id: _Optional[str] = ..., audio_data: _Optional[bytes] = ..., timestamp: _Optional[float] = ..., sample_rate: _Optional[int] = ..., channels: _Optional[int] = ...) -> None: ...
class TranscriptUpdate(_message.Message):
__slots__ = ("meeting_id", "update_type", "partial_text", "segment", "server_timestamp")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
UPDATE_TYPE_FIELD_NUMBER: _ClassVar[int]
PARTIAL_TEXT_FIELD_NUMBER: _ClassVar[int]
SEGMENT_FIELD_NUMBER: _ClassVar[int]
SERVER_TIMESTAMP_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
update_type: UpdateType
partial_text: str
segment: FinalSegment
server_timestamp: float
def __init__(self, meeting_id: _Optional[str] = ..., update_type: _Optional[_Union[UpdateType, str]] = ..., partial_text: _Optional[str] = ..., segment: _Optional[_Union[FinalSegment, _Mapping]] = ..., server_timestamp: _Optional[float] = ...) -> None: ...
class FinalSegment(_message.Message):
__slots__ = ("segment_id", "text", "start_time", "end_time", "words", "language", "language_confidence", "avg_logprob", "no_speech_prob", "speaker_id", "speaker_confidence")
SEGMENT_ID_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
START_TIME_FIELD_NUMBER: _ClassVar[int]
END_TIME_FIELD_NUMBER: _ClassVar[int]
WORDS_FIELD_NUMBER: _ClassVar[int]
LANGUAGE_FIELD_NUMBER: _ClassVar[int]
LANGUAGE_CONFIDENCE_FIELD_NUMBER: _ClassVar[int]
AVG_LOGPROB_FIELD_NUMBER: _ClassVar[int]
NO_SPEECH_PROB_FIELD_NUMBER: _ClassVar[int]
SPEAKER_ID_FIELD_NUMBER: _ClassVar[int]
SPEAKER_CONFIDENCE_FIELD_NUMBER: _ClassVar[int]
segment_id: int
text: str
start_time: float
end_time: float
words: _containers.RepeatedCompositeFieldContainer[WordTiming]
language: str
language_confidence: float
avg_logprob: float
no_speech_prob: float
speaker_id: str
speaker_confidence: float
def __init__(self, segment_id: _Optional[int] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., words: _Optional[_Iterable[_Union[WordTiming, _Mapping]]] = ..., language: _Optional[str] = ..., language_confidence: _Optional[float] = ..., avg_logprob: _Optional[float] = ..., no_speech_prob: _Optional[float] = ..., speaker_id: _Optional[str] = ..., speaker_confidence: _Optional[float] = ...) -> None: ...
class WordTiming(_message.Message):
__slots__ = ("word", "start_time", "end_time", "probability")
WORD_FIELD_NUMBER: _ClassVar[int]
START_TIME_FIELD_NUMBER: _ClassVar[int]
END_TIME_FIELD_NUMBER: _ClassVar[int]
PROBABILITY_FIELD_NUMBER: _ClassVar[int]
word: str
start_time: float
end_time: float
probability: float
def __init__(self, word: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., probability: _Optional[float] = ...) -> None: ...
class Meeting(_message.Message):
__slots__ = ("id", "title", "state", "created_at", "started_at", "ended_at", "duration_seconds", "segments", "summary", "metadata")
class MetadataEntry(_message.Message):
__slots__ = ("key", "value")
KEY_FIELD_NUMBER: _ClassVar[int]
VALUE_FIELD_NUMBER: _ClassVar[int]
key: str
value: str
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
ID_FIELD_NUMBER: _ClassVar[int]
TITLE_FIELD_NUMBER: _ClassVar[int]
STATE_FIELD_NUMBER: _ClassVar[int]
CREATED_AT_FIELD_NUMBER: _ClassVar[int]
STARTED_AT_FIELD_NUMBER: _ClassVar[int]
ENDED_AT_FIELD_NUMBER: _ClassVar[int]
DURATION_SECONDS_FIELD_NUMBER: _ClassVar[int]
SEGMENTS_FIELD_NUMBER: _ClassVar[int]
SUMMARY_FIELD_NUMBER: _ClassVar[int]
METADATA_FIELD_NUMBER: _ClassVar[int]
id: str
title: str
state: MeetingState
created_at: float
started_at: float
ended_at: float
duration_seconds: float
segments: _containers.RepeatedCompositeFieldContainer[FinalSegment]
summary: Summary
metadata: _containers.ScalarMap[str, str]
def __init__(self, id: _Optional[str] = ..., title: _Optional[str] = ..., state: _Optional[_Union[MeetingState, str]] = ..., created_at: _Optional[float] = ..., started_at: _Optional[float] = ..., ended_at: _Optional[float] = ..., duration_seconds: _Optional[float] = ..., segments: _Optional[_Iterable[_Union[FinalSegment, _Mapping]]] = ..., summary: _Optional[_Union[Summary, _Mapping]] = ..., metadata: _Optional[_Mapping[str, str]] = ...) -> None: ...
class CreateMeetingRequest(_message.Message):
__slots__ = ("title", "metadata")
class MetadataEntry(_message.Message):
__slots__ = ("key", "value")
KEY_FIELD_NUMBER: _ClassVar[int]
VALUE_FIELD_NUMBER: _ClassVar[int]
key: str
value: str
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
TITLE_FIELD_NUMBER: _ClassVar[int]
METADATA_FIELD_NUMBER: _ClassVar[int]
title: str
metadata: _containers.ScalarMap[str, str]
def __init__(self, title: _Optional[str] = ..., metadata: _Optional[_Mapping[str, str]] = ...) -> None: ...
class StopMeetingRequest(_message.Message):
__slots__ = ("meeting_id",)
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
def __init__(self, meeting_id: _Optional[str] = ...) -> None: ...
class ListMeetingsRequest(_message.Message):
__slots__ = ("states", "limit", "offset", "sort_order")
STATES_FIELD_NUMBER: _ClassVar[int]
LIMIT_FIELD_NUMBER: _ClassVar[int]
OFFSET_FIELD_NUMBER: _ClassVar[int]
SORT_ORDER_FIELD_NUMBER: _ClassVar[int]
states: _containers.RepeatedScalarFieldContainer[MeetingState]
limit: int
offset: int
sort_order: SortOrder
def __init__(self, states: _Optional[_Iterable[_Union[MeetingState, str]]] = ..., limit: _Optional[int] = ..., offset: _Optional[int] = ..., sort_order: _Optional[_Union[SortOrder, str]] = ...) -> None: ...
class ListMeetingsResponse(_message.Message):
__slots__ = ("meetings", "total_count")
MEETINGS_FIELD_NUMBER: _ClassVar[int]
TOTAL_COUNT_FIELD_NUMBER: _ClassVar[int]
meetings: _containers.RepeatedCompositeFieldContainer[Meeting]
total_count: int
def __init__(self, meetings: _Optional[_Iterable[_Union[Meeting, _Mapping]]] = ..., total_count: _Optional[int] = ...) -> None: ...
class GetMeetingRequest(_message.Message):
__slots__ = ("meeting_id", "include_segments", "include_summary")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
INCLUDE_SEGMENTS_FIELD_NUMBER: _ClassVar[int]
INCLUDE_SUMMARY_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
include_segments: bool
include_summary: bool
def __init__(self, meeting_id: _Optional[str] = ..., include_segments: bool = ..., include_summary: bool = ...) -> None: ...
class DeleteMeetingRequest(_message.Message):
__slots__ = ("meeting_id",)
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
def __init__(self, meeting_id: _Optional[str] = ...) -> None: ...
class DeleteMeetingResponse(_message.Message):
__slots__ = ("success",)
SUCCESS_FIELD_NUMBER: _ClassVar[int]
success: bool
def __init__(self, success: bool = ...) -> None: ...
class Summary(_message.Message):
__slots__ = ("meeting_id", "executive_summary", "key_points", "action_items", "generated_at", "model_version")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
EXECUTIVE_SUMMARY_FIELD_NUMBER: _ClassVar[int]
KEY_POINTS_FIELD_NUMBER: _ClassVar[int]
ACTION_ITEMS_FIELD_NUMBER: _ClassVar[int]
GENERATED_AT_FIELD_NUMBER: _ClassVar[int]
MODEL_VERSION_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
executive_summary: str
key_points: _containers.RepeatedCompositeFieldContainer[KeyPoint]
action_items: _containers.RepeatedCompositeFieldContainer[ActionItem]
generated_at: float
model_version: str
def __init__(self, meeting_id: _Optional[str] = ..., executive_summary: _Optional[str] = ..., key_points: _Optional[_Iterable[_Union[KeyPoint, _Mapping]]] = ..., action_items: _Optional[_Iterable[_Union[ActionItem, _Mapping]]] = ..., generated_at: _Optional[float] = ..., model_version: _Optional[str] = ...) -> None: ...
class KeyPoint(_message.Message):
__slots__ = ("text", "segment_ids", "start_time", "end_time")
TEXT_FIELD_NUMBER: _ClassVar[int]
SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int]
START_TIME_FIELD_NUMBER: _ClassVar[int]
END_TIME_FIELD_NUMBER: _ClassVar[int]
text: str
segment_ids: _containers.RepeatedScalarFieldContainer[int]
start_time: float
end_time: float
def __init__(self, text: _Optional[str] = ..., segment_ids: _Optional[_Iterable[int]] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ...) -> None: ...
class ActionItem(_message.Message):
__slots__ = ("text", "assignee", "due_date", "priority", "segment_ids")
TEXT_FIELD_NUMBER: _ClassVar[int]
ASSIGNEE_FIELD_NUMBER: _ClassVar[int]
DUE_DATE_FIELD_NUMBER: _ClassVar[int]
PRIORITY_FIELD_NUMBER: _ClassVar[int]
SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int]
text: str
assignee: str
due_date: float
priority: Priority
segment_ids: _containers.RepeatedScalarFieldContainer[int]
def __init__(self, text: _Optional[str] = ..., assignee: _Optional[str] = ..., due_date: _Optional[float] = ..., priority: _Optional[_Union[Priority, str]] = ..., segment_ids: _Optional[_Iterable[int]] = ...) -> None: ...
class GenerateSummaryRequest(_message.Message):
__slots__ = ("meeting_id", "force_regenerate")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
FORCE_REGENERATE_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
force_regenerate: bool
def __init__(self, meeting_id: _Optional[str] = ..., force_regenerate: bool = ...) -> None: ...
class ServerInfoRequest(_message.Message):
__slots__ = ()
def __init__(self) -> None: ...
class ServerInfo(_message.Message):
__slots__ = ("version", "asr_model", "asr_ready", "supported_sample_rates", "max_chunk_size", "uptime_seconds", "active_meetings", "diarization_enabled", "diarization_ready")
VERSION_FIELD_NUMBER: _ClassVar[int]
ASR_MODEL_FIELD_NUMBER: _ClassVar[int]
ASR_READY_FIELD_NUMBER: _ClassVar[int]
SUPPORTED_SAMPLE_RATES_FIELD_NUMBER: _ClassVar[int]
MAX_CHUNK_SIZE_FIELD_NUMBER: _ClassVar[int]
UPTIME_SECONDS_FIELD_NUMBER: _ClassVar[int]
ACTIVE_MEETINGS_FIELD_NUMBER: _ClassVar[int]
DIARIZATION_ENABLED_FIELD_NUMBER: _ClassVar[int]
DIARIZATION_READY_FIELD_NUMBER: _ClassVar[int]
version: str
asr_model: str
asr_ready: bool
supported_sample_rates: _containers.RepeatedScalarFieldContainer[int]
max_chunk_size: int
uptime_seconds: float
active_meetings: int
diarization_enabled: bool
diarization_ready: bool
def __init__(self, version: _Optional[str] = ..., asr_model: _Optional[str] = ..., asr_ready: bool = ..., supported_sample_rates: _Optional[_Iterable[int]] = ..., max_chunk_size: _Optional[int] = ..., uptime_seconds: _Optional[float] = ..., active_meetings: _Optional[int] = ..., diarization_enabled: bool = ..., diarization_ready: bool = ...) -> None: ...
class Annotation(_message.Message):
__slots__ = ("id", "meeting_id", "annotation_type", "text", "start_time", "end_time", "segment_ids", "created_at")
ID_FIELD_NUMBER: _ClassVar[int]
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
START_TIME_FIELD_NUMBER: _ClassVar[int]
END_TIME_FIELD_NUMBER: _ClassVar[int]
SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int]
CREATED_AT_FIELD_NUMBER: _ClassVar[int]
id: str
meeting_id: str
annotation_type: AnnotationType
text: str
start_time: float
end_time: float
segment_ids: _containers.RepeatedScalarFieldContainer[int]
created_at: float
def __init__(self, id: _Optional[str] = ..., meeting_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ..., created_at: _Optional[float] = ...) -> None: ...
class AddAnnotationRequest(_message.Message):
__slots__ = ("meeting_id", "annotation_type", "text", "start_time", "end_time", "segment_ids")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
START_TIME_FIELD_NUMBER: _ClassVar[int]
END_TIME_FIELD_NUMBER: _ClassVar[int]
SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
annotation_type: AnnotationType
text: str
start_time: float
end_time: float
segment_ids: _containers.RepeatedScalarFieldContainer[int]
def __init__(self, meeting_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ...) -> None: ...
class GetAnnotationRequest(_message.Message):
__slots__ = ("annotation_id",)
ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int]
annotation_id: str
def __init__(self, annotation_id: _Optional[str] = ...) -> None: ...
class ListAnnotationsRequest(_message.Message):
__slots__ = ("meeting_id", "start_time", "end_time")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
START_TIME_FIELD_NUMBER: _ClassVar[int]
END_TIME_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
start_time: float
end_time: float
def __init__(self, meeting_id: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ...) -> None: ...
class ListAnnotationsResponse(_message.Message):
__slots__ = ("annotations",)
ANNOTATIONS_FIELD_NUMBER: _ClassVar[int]
annotations: _containers.RepeatedCompositeFieldContainer[Annotation]
def __init__(self, annotations: _Optional[_Iterable[_Union[Annotation, _Mapping]]] = ...) -> None: ...
class UpdateAnnotationRequest(_message.Message):
__slots__ = ("annotation_id", "annotation_type", "text", "start_time", "end_time", "segment_ids")
ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int]
ANNOTATION_TYPE_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
START_TIME_FIELD_NUMBER: _ClassVar[int]
END_TIME_FIELD_NUMBER: _ClassVar[int]
SEGMENT_IDS_FIELD_NUMBER: _ClassVar[int]
annotation_id: str
annotation_type: AnnotationType
text: str
start_time: float
end_time: float
segment_ids: _containers.RepeatedScalarFieldContainer[int]
def __init__(self, annotation_id: _Optional[str] = ..., annotation_type: _Optional[_Union[AnnotationType, str]] = ..., text: _Optional[str] = ..., start_time: _Optional[float] = ..., end_time: _Optional[float] = ..., segment_ids: _Optional[_Iterable[int]] = ...) -> None: ...
class DeleteAnnotationRequest(_message.Message):
__slots__ = ("annotation_id",)
ANNOTATION_ID_FIELD_NUMBER: _ClassVar[int]
annotation_id: str
def __init__(self, annotation_id: _Optional[str] = ...) -> None: ...
class DeleteAnnotationResponse(_message.Message):
__slots__ = ("success",)
SUCCESS_FIELD_NUMBER: _ClassVar[int]
success: bool
def __init__(self, success: bool = ...) -> None: ...
class ExportTranscriptRequest(_message.Message):
__slots__ = ("meeting_id", "format")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
FORMAT_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
format: ExportFormat
def __init__(self, meeting_id: _Optional[str] = ..., format: _Optional[_Union[ExportFormat, str]] = ...) -> None: ...
class ExportTranscriptResponse(_message.Message):
__slots__ = ("content", "format_name", "file_extension")
CONTENT_FIELD_NUMBER: _ClassVar[int]
FORMAT_NAME_FIELD_NUMBER: _ClassVar[int]
FILE_EXTENSION_FIELD_NUMBER: _ClassVar[int]
content: str
format_name: str
file_extension: str
def __init__(self, content: _Optional[str] = ..., format_name: _Optional[str] = ..., file_extension: _Optional[str] = ...) -> None: ...
class RefineSpeakerDiarizationRequest(_message.Message):
__slots__ = ("meeting_id", "num_speakers")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
NUM_SPEAKERS_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
num_speakers: int
def __init__(self, meeting_id: _Optional[str] = ..., num_speakers: _Optional[int] = ...) -> None: ...
class RefineSpeakerDiarizationResponse(_message.Message):
__slots__ = ("segments_updated", "speaker_ids", "error_message", "job_id", "status")
SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int]
SPEAKER_IDS_FIELD_NUMBER: _ClassVar[int]
ERROR_MESSAGE_FIELD_NUMBER: _ClassVar[int]
JOB_ID_FIELD_NUMBER: _ClassVar[int]
STATUS_FIELD_NUMBER: _ClassVar[int]
segments_updated: int
speaker_ids: _containers.RepeatedScalarFieldContainer[str]
error_message: str
job_id: str
status: JobStatus
def __init__(self, segments_updated: _Optional[int] = ..., speaker_ids: _Optional[_Iterable[str]] = ..., error_message: _Optional[str] = ..., job_id: _Optional[str] = ..., status: _Optional[JobStatus] = ...) -> None: ...
class RenameSpeakerRequest(_message.Message):
__slots__ = ("meeting_id", "old_speaker_id", "new_speaker_name")
MEETING_ID_FIELD_NUMBER: _ClassVar[int]
OLD_SPEAKER_ID_FIELD_NUMBER: _ClassVar[int]
NEW_SPEAKER_NAME_FIELD_NUMBER: _ClassVar[int]
meeting_id: str
old_speaker_id: str
new_speaker_name: str
def __init__(self, meeting_id: _Optional[str] = ..., old_speaker_id: _Optional[str] = ..., new_speaker_name: _Optional[str] = ...) -> None: ...
class RenameSpeakerResponse(_message.Message):
__slots__ = ("segments_updated", "success")
SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int]
SUCCESS_FIELD_NUMBER: _ClassVar[int]
segments_updated: int
success: bool
def __init__(self, segments_updated: _Optional[int] = ..., success: bool = ...) -> None: ...
class GetDiarizationJobStatusRequest(_message.Message):
__slots__ = ("job_id",)
JOB_ID_FIELD_NUMBER: _ClassVar[int]
job_id: str
def __init__(self, job_id: _Optional[str] = ...) -> None: ...
class DiarizationJobStatus(_message.Message):
__slots__ = ("job_id", "status", "segments_updated", "speaker_ids", "error_message")
JOB_ID_FIELD_NUMBER: _ClassVar[int]
STATUS_FIELD_NUMBER: _ClassVar[int]
SEGMENTS_UPDATED_FIELD_NUMBER: _ClassVar[int]
SPEAKER_IDS_FIELD_NUMBER: _ClassVar[int]
ERROR_MESSAGE_FIELD_NUMBER: _ClassVar[int]
job_id: str
status: JobStatus
segments_updated: int
speaker_ids: _containers.RepeatedScalarFieldContainer[str]
error_message: str
def __init__(self, job_id: _Optional[str] = ..., status: _Optional[JobStatus] = ..., segments_updated: _Optional[int] = ..., speaker_ids: _Optional[_Iterable[str]] = ..., error_message: _Optional[str] = ...) -> None: ...
File: src/noteflow/grpc/meeting_store.py
"""In-memory meeting storage for the NoteFlow gRPC server.
Provides thread-safe in-memory storage using domain entities directly.
Used as fallback when no database is configured.
"""
from __future__ import annotations
import threading
from typing import TYPE_CHECKING
from noteflow.domain.entities import Meeting, Segment, Summary
from noteflow.domain.value_objects import MeetingState
if TYPE_CHECKING:
from collections.abc import Sequence
from datetime import datetime
class MeetingStore:
"""Thread-safe in-memory meeting storage using domain entities."""
def __init__(self) -> None:
"""Initialize the store."""
self._meetings: dict[str, Meeting] = {}
self._lock = threading.RLock()
def create(
self,
title: str = "",
metadata: dict[str, str] | None = None,
) -> Meeting:
"""Create a new meeting.
Args:
title: Optional meeting title.
metadata: Optional metadata.
Returns:
Created meeting.
"""
meeting = Meeting.create(title=title or "Untitled Meeting", metadata=metadata or {})
with self._lock:
self._meetings[str(meeting.id)] = meeting
return meeting
def get(self, meeting_id: str) -> Meeting | None:
"""Get a meeting by ID.
Args:
meeting_id: Meeting ID string.
Returns:
Meeting or None if not found.
"""
with self._lock:
return self._meetings.get(meeting_id)
def list_all(
self,
states: Sequence[MeetingState] | None = None,
limit: int = 100,
offset: int = 0,
sort_desc: bool = True,
) -> tuple[list[Meeting], int]:
"""List meetings with optional filtering.
Args:
states: Optional list of states to filter by.
limit: Maximum number of meetings to return.
offset: Number of meetings to skip.
sort_desc: Sort by created_at descending if True.
Returns:
Tuple of (meetings list, total count).
"""
with self._lock:
meetings = list(self._meetings.values())
# Filter by state
if states:
state_set = set(states)
meetings = [m for m in meetings if m.state in state_set]
total = len(meetings)
# Sort
meetings.sort(key=lambda m: m.created_at, reverse=sort_desc)
# Paginate
meetings = meetings[offset : offset + limit]
return meetings, total
def update(self, meeting: Meeting) -> Meeting:
"""Update a meeting in the store.
Args:
meeting: Meeting with updated fields.
Returns:
Updated meeting.
"""
with self._lock:
self._meetings[str(meeting.id)] = meeting
return meeting
def add_segment(self, meeting_id: str, segment: Segment) -> Meeting | None:
"""Add a segment to a meeting.
Args:
meeting_id: Meeting ID.
segment: Segment to add.
Returns:
Updated meeting or None if not found.
"""
with self._lock:
meeting = self._meetings.get(meeting_id)
if meeting is None:
return None
meeting.add_segment(segment)
return meeting
def set_summary(self, meeting_id: str, summary: Summary) -> Meeting | None:
"""Set meeting summary.
Args:
meeting_id: Meeting ID.
summary: Summary to set.
Returns:
Updated meeting or None if not found.
"""
with self._lock:
meeting = self._meetings.get(meeting_id)
if meeting is None:
return None
meeting.summary = summary
return meeting
def update_state(self, meeting_id: str, state: MeetingState) -> bool:
"""Atomically update meeting state.
Args:
meeting_id: Meeting ID.
state: New state.
Returns:
True if updated, False if meeting not found.
"""
with self._lock:
meeting = self._meetings.get(meeting_id)
if meeting is None:
return False
meeting.state = state
return True
def update_title(self, meeting_id: str, title: str) -> bool:
"""Atomically update meeting title.
Args:
meeting_id: Meeting ID.
title: New title.
Returns:
True if updated, False if meeting not found.
"""
with self._lock:
meeting = self._meetings.get(meeting_id)
if meeting is None:
return False
meeting.title = title
return True
def update_end_time(self, meeting_id: str, end_time: datetime) -> bool:
"""Atomically update meeting end time.
Args:
meeting_id: Meeting ID.
end_time: New end time.
Returns:
True if updated, False if meeting not found.
"""
with self._lock:
meeting = self._meetings.get(meeting_id)
if meeting is None:
return False
meeting.end_time = end_time
return True
def delete(self, meeting_id: str) -> bool:
"""Delete a meeting.
Args:
meeting_id: Meeting ID.
Returns:
True if deleted, False if not found.
"""
with self._lock:
if meeting_id in self._meetings:
del self._meetings[meeting_id]
return True
return False
@property
def active_count(self) -> int:
"""Count of meetings in RECORDING or STOPPING state."""
with self._lock:
return sum(
m.state in (MeetingState.RECORDING, MeetingState.STOPPING)
for m in self._meetings.values()
)
File: src/noteflow/grpc/server.py
"""NoteFlow gRPC server entry point (async)."""
from __future__ import annotations
import argparse
import asyncio
import logging
import signal
import time
from typing import TYPE_CHECKING, Final
import grpc.aio
from pydantic import ValidationError
from noteflow.application.services import RecoveryService
from noteflow.application.services.summarization_service import SummarizationService
from noteflow.config.settings import get_settings
from noteflow.infrastructure.asr import FasterWhisperEngine
from noteflow.infrastructure.asr.engine import VALID_MODEL_SIZES
from noteflow.infrastructure.diarization import DiarizationEngine
from noteflow.infrastructure.persistence.database import create_async_session_factory
from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork
from noteflow.infrastructure.summarization import create_summarization_service
from .proto import noteflow_pb2_grpc
from .service import NoteFlowServicer
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
logger = logging.getLogger(__name__)
DEFAULT_PORT: Final[int] = 50051
DEFAULT_MODEL: Final[str] = "base"
class NoteFlowServer:
"""Async gRPC server for NoteFlow."""
def __init__(
self,
port: int = DEFAULT_PORT,
asr_model: str = DEFAULT_MODEL,
asr_device: str = "cpu",
asr_compute_type: str = "int8",
session_factory: async_sessionmaker[AsyncSession] | None = None,
summarization_service: SummarizationService | None = None,
diarization_engine: DiarizationEngine | None = None,
) -> None:
"""Initialize the server.
Args:
port: Port to listen on.
asr_model: ASR model size.
asr_device: Device for ASR ("cpu" or "cuda").
asr_compute_type: ASR compute type.
session_factory: Optional async session factory for database.
summarization_service: Optional summarization service for generating summaries.
diarization_engine: Optional diarization engine for speaker identification.
"""
self._port = port
self._asr_model = asr_model
self._asr_device = asr_device
self._asr_compute_type = asr_compute_type
self._session_factory = session_factory
self._summarization_service = summarization_service
self._diarization_engine = diarization_engine
self._server: grpc.aio.Server | None = None
self._servicer: NoteFlowServicer | None = None
async def start(self) -> None:
"""Start the async gRPC server."""
logger.info("Starting NoteFlow gRPC server (async)...")
# Create ASR engine
logger.info(
"Loading ASR model '%s' on %s (%s)...",
self._asr_model,
self._asr_device,
self._asr_compute_type,
)
start_time = time.perf_counter()
asr_engine = FasterWhisperEngine(
compute_type=self._asr_compute_type,
device=self._asr_device,
)
asr_engine.load_model(self._asr_model)
load_time = time.perf_counter() - start_time
logger.info("ASR model loaded in %.2f seconds", load_time)
# Lazy-create summarization service if not provided
if self._summarization_service is None:
self._summarization_service = create_summarization_service()
logger.info("Summarization service initialized (default factory)")
# Create servicer with session factory, summarization, and diarization
self._servicer = NoteFlowServicer(
asr_engine=asr_engine,
session_factory=self._session_factory,
summarization_service=self._summarization_service,
diarization_engine=self._diarization_engine,
)
# Create async gRPC server
self._server = grpc.aio.server(
options=[
("grpc.max_send_message_length", 100 * 1024 * 1024), # 100MB
("grpc.max_receive_message_length", 100 * 1024 * 1024),
],
)
# Register service
noteflow_pb2_grpc.add_NoteFlowServiceServicer_to_server(
self._servicer,
self._server,
)
# Bind to port
address = f"[::]:{self._port}"
self._server.add_insecure_port(address)
# Start server
await self._server.start()
logger.info("Server listening on %s", address)
async def stop(self, grace_period: float = 5.0) -> None:
"""Stop the server gracefully.
Args:
grace_period: Time to wait for in-flight RPCs.
"""
if self._server:
logger.info("Stopping server (grace period: %.1fs)...", grace_period)
await self._server.stop(grace_period)
logger.info("Server stopped")
async def wait_for_termination(self) -> None:
"""Block until server is terminated."""
if self._server:
await self._server.wait_for_termination()
async def run_server(
port: int,
asr_model: str,
asr_device: str,
asr_compute_type: str,
database_url: str | None = None,
diarization_enabled: bool = False,
diarization_hf_token: str | None = None,
diarization_device: str = "auto",
) -> None:
"""Run the async gRPC server.
Args:
port: Port to listen on.
asr_model: ASR model size.
asr_device: Device for ASR.
asr_compute_type: ASR compute type.
database_url: Optional database URL for persistence.
diarization_enabled: Whether to enable speaker diarization.
diarization_hf_token: HuggingFace token for pyannote models.
diarization_device: Device for diarization ("auto", "cpu", "cuda", "mps").
"""
# Create session factory if database URL provided
session_factory = None
if database_url:
logger.info("Connecting to database...")
session_factory = create_async_session_factory(database_url)
logger.info("Database connection pool ready")
# Run crash recovery on startup
uow = SqlAlchemyUnitOfWork(session_factory)
recovery_service = RecoveryService(uow)
recovered = await recovery_service.recover_crashed_meetings()
if recovered:
logger.warning(
"Recovered %d crashed meetings on startup",
len(recovered),
)
# Create summarization service - auto-detects LOCAL/MOCK providers
summarization_service = create_summarization_service()
logger.info("Summarization service initialized")
# Create diarization engine if enabled
diarization_engine: DiarizationEngine | None = None
if diarization_enabled:
if not diarization_hf_token:
logger.warning(
"Diarization enabled but no HuggingFace token provided. "
"Set NOTEFLOW_DIARIZATION_HF_TOKEN or --diarization-hf-token."
)
else:
logger.info("Initializing diarization engine on %s...", diarization_device)
diarization_engine = DiarizationEngine(
device=diarization_device,
hf_token=diarization_hf_token,
)
logger.info("Diarization engine initialized (models loaded on demand)")
server = NoteFlowServer(
port=port,
asr_model=asr_model,
asr_device=asr_device,
asr_compute_type=asr_compute_type,
session_factory=session_factory,
summarization_service=summarization_service,
diarization_engine=diarization_engine,
)
# Set up graceful shutdown
loop = asyncio.get_running_loop()
shutdown_event = asyncio.Event()
def signal_handler() -> None:
logger.info("Received shutdown signal...")
shutdown_event.set()
for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(sig, signal_handler)
try:
await server.start()
print(f"\nNoteFlow server running on port {port}")
print(f"ASR model: {asr_model} ({asr_device}/{asr_compute_type})")
if database_url:
print("Database: Connected")
else:
print("Database: Not configured (in-memory mode)")
if diarization_engine:
print(f"Diarization: Enabled ({diarization_device})")
else:
print("Diarization: Disabled")
print("Press Ctrl+C to stop\n")
# Wait for shutdown signal or server termination
await shutdown_event.wait()
finally:
await server.stop()
def main() -> None:
"""Entry point for NoteFlow gRPC server."""
parser = argparse.ArgumentParser(description="NoteFlow gRPC Server")
parser.add_argument(
"-p",
"--port",
type=int,
default=DEFAULT_PORT,
help=f"Port to listen on (default: {DEFAULT_PORT})",
)
parser.add_argument(
"-m",
"--model",
type=str,
default=DEFAULT_MODEL,
choices=list(VALID_MODEL_SIZES),
help=f"ASR model size (default: {DEFAULT_MODEL})",
)
parser.add_argument(
"-d",
"--device",
type=str,
default="cpu",
choices=["cpu", "cuda"],
help="ASR device (default: cpu)",
)
parser.add_argument(
"-c",
"--compute-type",
type=str,
default="int8",
choices=["int8", "float16", "float32"],
help="ASR compute type (default: int8)",
)
parser.add_argument(
"--database-url",
type=str,
default=None,
help="PostgreSQL database URL (overrides NOTEFLOW_DATABASE_URL)",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable verbose logging",
)
parser.add_argument(
"--diarization",
action="store_true",
help="Enable speaker diarization (requires pyannote.audio)",
)
parser.add_argument(
"--diarization-hf-token",
type=str,
default=None,
help="HuggingFace token for pyannote models (overrides NOTEFLOW_DIARIZATION_HF_TOKEN)",
)
parser.add_argument(
"--diarization-device",
type=str,
default="auto",
choices=["auto", "cpu", "cuda", "mps"],
help="Device for diarization (default: auto)",
)
args = parser.parse_args()
# Configure logging
log_level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(
level=log_level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
# Get settings
try:
settings = get_settings()
except (OSError, ValueError, ValidationError) as exc:
logger.warning("Failed to load settings: %s", exc)
settings = None
# Get database URL from args or settings
database_url = args.database_url
if not database_url and settings:
database_url = str(settings.database_url)
if not database_url:
logger.warning("No database URL configured, running in-memory mode")
# Get diarization config from args or settings
diarization_enabled = args.diarization
diarization_hf_token = args.diarization_hf_token
diarization_device = args.diarization_device
if settings and not diarization_enabled:
diarization_enabled = settings.diarization_enabled
if settings and not diarization_hf_token:
diarization_hf_token = settings.diarization_hf_token
if settings and diarization_device == "auto":
diarization_device = settings.diarization_device
# Run server
asyncio.run(
run_server(
port=args.port,
asr_model=args.model,
asr_device=args.device,
asr_compute_type=args.compute_type,
database_url=database_url,
diarization_enabled=diarization_enabled,
diarization_hf_token=diarization_hf_token,
diarization_device=diarization_device,
)
)
if __name__ == "__main__":
main()
File: src/noteflow/infrastructure/asr/segmenter.py
"""Audio segmenter with VAD-driven state machine.
Manages speech segment boundaries using Voice Activity Detection.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import TYPE_CHECKING
import numpy as np
from numpy.typing import NDArray
if TYPE_CHECKING:
from collections.abc import Iterator
class SegmenterState(Enum):
"""Segmenter state machine states."""
IDLE = auto() # Waiting for speech
SPEECH = auto() # Speech detected, accumulating audio
TRAILING = auto() # Speech ended, collecting trailing audio
@dataclass
class SegmenterConfig:
"""Configuration for segmenter behavior."""
# Minimum speech duration to consider valid (seconds)
min_speech_duration: float = 0.3
# Maximum segment duration before forced split (seconds)
max_segment_duration: float = 30.0
# Trailing silence to include after speech ends (seconds)
trailing_silence: float = 0.5
# Leading audio to include before speech starts (seconds)
leading_buffer: float = 0.2
# Sample rate for audio processing
sample_rate: int = 16000
@dataclass
class AudioSegment:
"""A completed audio segment ready for transcription."""
audio: NDArray[np.float32]
start_time: float
end_time: float
@property
def duration(self) -> float:
"""Segment duration in seconds."""
return self.end_time - self.start_time
@dataclass
class Segmenter:
"""VAD-driven audio segmenter with state machine.
Accumulates audio during speech and emits complete segments
when speech ends or max duration is reached.
"""
config: SegmenterConfig = field(default_factory=SegmenterConfig)
# State machine
_state: SegmenterState = field(default=SegmenterState.IDLE, init=False)
# Timing tracking
_stream_time: float = field(default=0.0, init=False)
_speech_start_time: float = field(default=0.0, init=False)
_leading_duration: float = field(default=0.0, init=False)
# Audio buffers
_leading_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False)
_speech_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False)
_trailing_buffer: list[NDArray[np.float32]] = field(default_factory=list, init=False)
_trailing_duration: float = field(default=0.0, init=False)
@property
def state(self) -> SegmenterState:
"""Get current segmenter state."""
return self._state
def reset(self) -> None:
"""Reset segmenter to initial state."""
self._state = SegmenterState.IDLE
self._stream_time = 0.0
self._speech_start_time = 0.0
self._leading_duration = 0.0
self._leading_buffer.clear()
self._speech_buffer.clear()
self._trailing_buffer.clear()
self._trailing_duration = 0.0
def process_audio(
self,
audio: NDArray[np.float32],
is_speech: bool,
) -> Iterator[AudioSegment]:
"""Process audio chunk with VAD decision.
Args:
audio: Audio samples (float32, mono).
is_speech: VAD decision for this chunk.
Yields:
Complete AudioSegment when speech ends or max duration reached.
"""
chunk_duration = len(audio) / self.config.sample_rate
chunk_start = self._stream_time
self._stream_time += chunk_duration
if self._state == SegmenterState.IDLE:
yield from self._handle_idle(audio, is_speech, chunk_start)
elif self._state == SegmenterState.SPEECH:
yield from self._handle_speech(audio, is_speech, chunk_start, chunk_duration)
elif self._state == SegmenterState.TRAILING:
yield from self._handle_trailing(audio, is_speech, chunk_start, chunk_duration)
def flush(self) -> AudioSegment | None:
"""Flush any pending audio as a segment.
Call when stream ends to get final segment.
Returns:
Remaining audio segment if valid, None otherwise.
"""
if self._state in (SegmenterState.SPEECH, SegmenterState.TRAILING):
segment = self._emit_segment()
self._state = SegmenterState.IDLE
return segment
return None
def _handle_idle(
self,
audio: NDArray[np.float32],
is_speech: bool,
chunk_start: float,
) -> Iterator[AudioSegment]:
"""Handle audio in IDLE state."""
if is_speech:
# Speech started - transition to SPEECH state
self._state = SegmenterState.SPEECH
self._speech_start_time = chunk_start
# Capture how much pre-speech audio we are including.
leading_samples = sum(len(chunk) for chunk in self._leading_buffer)
self._leading_duration = leading_samples / self.config.sample_rate
# Include leading buffer (pre-speech audio)
self._speech_buffer = list(self._leading_buffer)
self._speech_buffer.append(audio)
self._leading_buffer.clear()
else:
# Still idle - maintain leading buffer
self._update_leading_buffer(audio)
yield from () # No segments emitted in IDLE
def _handle_speech(
self,
audio: NDArray[np.float32],
is_speech: bool,
chunk_start: float,
chunk_duration: float,
) -> Iterator[AudioSegment]:
"""Handle audio in SPEECH state."""
if is_speech:
self._speech_buffer.append(audio)
current_duration = self._stream_time - self._speech_start_time
# Check max duration limit
if current_duration >= self.config.max_segment_duration:
segment = self._emit_segment()
if segment is not None:
yield segment
# Start a fresh segment at the end of this chunk
self._speech_start_time = self._stream_time
self._leading_duration = 0.0
self._speech_buffer = []
else:
# Speech ended - transition to TRAILING
# Start trailing buffer with this silent chunk
self._state = SegmenterState.TRAILING
self._trailing_buffer = [audio]
self._trailing_duration = chunk_duration
# Check if already past trailing threshold
if self._trailing_duration >= self.config.trailing_silence:
segment = self._emit_segment()
if segment is not None:
yield segment
self._state = SegmenterState.IDLE
def _handle_trailing(
self,
audio: NDArray[np.float32],
is_speech: bool,
chunk_start: float,
chunk_duration: float,
) -> Iterator[AudioSegment]:
"""Handle audio in TRAILING state."""
if is_speech:
# Speech resumed - merge trailing back and continue
self._speech_buffer.extend(self._trailing_buffer)
self._speech_buffer.append(audio)
self._trailing_buffer.clear()
self._trailing_duration = 0.0
self._state = SegmenterState.SPEECH
else:
# Still silence - accumulate trailing
self._trailing_buffer.append(audio)
self._trailing_duration += chunk_duration
if self._trailing_duration >= self.config.trailing_silence:
# Enough trailing silence - emit segment
segment = self._emit_segment()
if segment is not None:
yield segment
self._state = SegmenterState.IDLE
def _update_leading_buffer(self, audio: NDArray[np.float32]) -> None:
"""Maintain rolling leading buffer."""
self._leading_buffer.append(audio)
# Calculate total buffer duration
total_samples = sum(len(chunk) for chunk in self._leading_buffer)
total_duration = total_samples / self.config.sample_rate
# Trim to configured leading buffer size
while total_duration > self.config.leading_buffer and self._leading_buffer:
removed = self._leading_buffer.pop(0)
total_samples -= len(removed)
total_duration = total_samples / self.config.sample_rate
def _emit_segment(self) -> AudioSegment | None:
"""Create and emit completed segment."""
# Combine speech + trailing audio
all_audio = self._speech_buffer + self._trailing_buffer
# Calculate actual start time (account for leading buffer)
actual_start = max(0.0, self._speech_start_time - self._leading_duration)
# Concatenate audio
audio = np.concatenate(all_audio) if all_audio else np.array([], dtype=np.float32)
# If we only have silence/trailing audio, don't emit a segment.
if not self._speech_buffer:
self._trailing_buffer.clear()
self._trailing_duration = 0.0
self._leading_duration = 0.0
return None
# Check minimum speech duration (excluding leading buffer)
speech_samples = sum(len(chunk) for chunk in self._speech_buffer)
speech_duration = speech_samples / self.config.sample_rate
if speech_duration < self.config.min_speech_duration:
self._speech_buffer.clear()
self._trailing_buffer.clear()
self._trailing_duration = 0.0
self._leading_duration = 0.0
return None
segment = AudioSegment(
audio=audio,
start_time=actual_start,
end_time=self._stream_time,
)
# Clear buffers
self._speech_buffer.clear()
self._trailing_buffer.clear()
self._trailing_duration = 0.0
self._leading_duration = 0.0
return segment
File: src/noteflow/infrastructure/asr/streaming_vad.py
"""Streaming Voice Activity Detection.
Provides real-time speech detection for audio streams.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Protocol
from noteflow.infrastructure.audio import compute_rms
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
class VadEngine(Protocol):
"""Protocol for VAD engine implementations."""
def process(self, audio: NDArray[np.float32]) -> bool:
"""Process audio chunk and return speech detection result.
Args:
audio: Audio samples (float32, mono).
Returns:
True if speech detected, False otherwise.
"""
...
def reset(self) -> None:
"""Reset VAD state."""
...
@dataclass
class EnergyVadConfig:
"""Configuration for energy-based VAD."""
# Speech detection threshold (RMS energy)
speech_threshold: float = 0.01
# Silence threshold (lower than speech for hysteresis)
silence_threshold: float = 0.005
# Minimum consecutive speech frames to confirm speech
min_speech_frames: int = 2
# Minimum consecutive silence frames to confirm silence
min_silence_frames: int = 3
@dataclass
class EnergyVad:
"""Simple energy-based Voice Activity Detection.
Uses RMS energy with hysteresis for robust detection.
Suitable for clean audio; use silero-vad for noisy environments.
"""
config: EnergyVadConfig = field(default_factory=EnergyVadConfig)
# Internal state
_is_speech: bool = field(default=False, init=False)
_speech_frame_count: int = field(default=0, init=False)
_silence_frame_count: int = field(default=0, init=False)
def process(self, audio: NDArray[np.float32]) -> bool:
"""Process audio chunk and detect speech.
Uses RMS energy with hysteresis to detect speech.
State transitions require consecutive frames above/below threshold.
Args:
audio: Audio samples (float32, mono, normalized to [-1, 1]).
Returns:
True if speech detected, False for silence.
"""
energy = compute_rms(audio)
if self._is_speech:
# Currently in speech - check for silence
if energy < self.config.silence_threshold:
self._silence_frame_count += 1
self._speech_frame_count = 0
if self._silence_frame_count >= self.config.min_silence_frames:
self._is_speech = False
else:
self._silence_frame_count = 0
elif energy > self.config.speech_threshold:
self._speech_frame_count += 1
self._silence_frame_count = 0
if self._speech_frame_count >= self.config.min_speech_frames:
self._is_speech = True
else:
self._speech_frame_count = 0
return self._is_speech
def reset(self) -> None:
"""Reset VAD state to initial values."""
self._is_speech = False
self._speech_frame_count = 0
self._silence_frame_count = 0
@dataclass
class StreamingVad:
"""Streaming VAD wrapper with configurable backend.
Wraps VAD engines to provide a unified streaming interface.
"""
engine: VadEngine = field(default_factory=EnergyVad)
sample_rate: int = 16000
def process_chunk(self, audio: NDArray[np.float32]) -> bool:
"""Process audio chunk through VAD engine.
Args:
audio: Audio samples (float32, mono).
Returns:
True if speech detected, False otherwise.
"""
return self.engine.process(audio)
def reset(self) -> None:
"""Reset VAD state."""
self.engine.reset()
File: src/noteflow/infrastructure/audio/levels.py
"""Audio level computation implementation.
Provide RMS and dB level calculation for VU meter display.
"""
from __future__ import annotations
import math
from typing import Final
import numpy as np
from numpy.typing import NDArray
def compute_rms(frames: NDArray[np.float32]) -> float:
"""Calculate Root Mean Square of audio samples.
Args:
frames: Audio samples as float32 array.
Returns:
RMS level as float (0.0 for empty array).
"""
if len(frames) == 0:
return 0.0
# Use float64 for precision during squaring to avoid overflow
return float(np.sqrt(np.mean(frames.astype(np.float64) ** 2)))
class RmsLevelProvider:
"""RMS-based audio level provider.
Compute RMS (Root Mean Square) level from audio frames for VU meter display.
"""
# Minimum dB value to report (silence threshold)
MIN_DB: Final[float] = -60.0
def get_rms(self, frames: NDArray[np.float32]) -> float:
"""Calculate RMS level from audio frames.
Args:
frames: Audio samples as float32 array (normalized -1.0 to 1.0).
Returns:
RMS level normalized to 0.0-1.0 range.
"""
rms = compute_rms(frames)
# Clamp to 0.0-1.0 range for VU meter display
return min(1.0, max(0.0, rms))
def get_db(self, frames: NDArray[np.float32]) -> float:
"""Calculate dB level from audio frames.
Args:
frames: Audio samples as float32 array (normalized -1.0 to 1.0).
Returns:
Level in dB (MIN_DB to 0 range).
"""
rms = self.get_rms(frames)
if rms <= 0:
return self.MIN_DB
# Convert to dB: 20 * log10(rms)
db = 20.0 * math.log10(rms)
# Clamp to MIN_DB to 0 range
return max(self.MIN_DB, min(0.0, db))
def rms_to_db(self, rms: float) -> float:
"""Convert RMS value to dB.
Args:
rms: RMS level (0.0-1.0).
Returns:
Level in dB (MIN_DB to 0 range).
"""
if rms <= 0:
return self.MIN_DB
db = 20.0 * math.log10(rms)
return max(self.MIN_DB, min(0.0, db))
def db_to_rms(self, db: float) -> float:
"""Convert dB value to RMS.
Args:
db: Level in dB.
Returns:
RMS level (0.0-1.0).
"""
return 0.0 if db <= self.MIN_DB else 10.0 ** (db / 20.0)
File: src/noteflow/infrastructure/audio/reader.py
"""Read encrypted audio from archived meetings.
Mirror of MeetingAudioWriter - reads encrypted PCM16 chunks and converts to float32.
Reuses ChunkedAssetReader from security/crypto.py for decryption.
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import TYPE_CHECKING
import numpy as np
from noteflow.infrastructure.audio.dto import TimestampedAudio
from noteflow.infrastructure.security.crypto import ChunkedAssetReader
if TYPE_CHECKING:
from noteflow.infrastructure.security.crypto import AesGcmCryptoBox
logger = logging.getLogger(__name__)
class MeetingAudioReader:
"""Read audio chunks from encrypted meeting file.
Mirror of MeetingAudioWriter - handles manifest parsing, DEK unwrapping,
and encrypted audio decryption.
Directory structure (as created by MeetingAudioWriter):
~/.noteflow/meetings/<meeting-uuid>/
├── manifest.json # Meeting metadata + wrapped DEK
└── audio.enc # Encrypted PCM16 chunks (NFAE format)
"""
def __init__(
self,
crypto: AesGcmCryptoBox,
meetings_dir: Path,
) -> None:
"""Initialize audio reader.
Args:
crypto: CryptoBox instance for decryption and DEK unwrapping.
meetings_dir: Root directory for all meetings (e.g., ~/.noteflow/meetings).
"""
self._crypto = crypto
self._meetings_dir = meetings_dir
self._meeting_dir: Path | None = None
self._sample_rate: int = 16000
def load_meeting_audio(
self,
meeting_id: str,
) -> list[TimestampedAudio]:
"""Load all audio from an archived meeting.
Reads manifest, unwraps DEK, decrypts audio chunks, converts to float32.
Args:
meeting_id: Meeting UUID string.
Returns:
List of TimestampedAudio chunks (or empty list if not found/failed).
Raises:
FileNotFoundError: If meeting directory or audio file not found.
ValueError: If manifest is invalid or audio format unsupported.
"""
meeting_dir = self._meetings_dir / meeting_id
self._meeting_dir = meeting_dir
# Load and parse manifest
manifest_path = meeting_dir / "manifest.json"
if not manifest_path.exists():
raise FileNotFoundError(f"Manifest not found: {manifest_path}")
manifest = json.loads(manifest_path.read_text())
self._sample_rate = manifest.get("sample_rate", 16000)
wrapped_dek_hex = manifest.get("wrapped_dek")
if not wrapped_dek_hex:
raise ValueError("Manifest missing wrapped_dek")
# Unwrap DEK
wrapped_dek = bytes.fromhex(wrapped_dek_hex)
dek = self._crypto.unwrap_dek(wrapped_dek)
# Open encrypted audio file
audio_path = meeting_dir / "audio.enc"
if not audio_path.exists():
raise FileNotFoundError(f"Audio file not found: {audio_path}")
reader = ChunkedAssetReader(self._crypto)
reader.open(audio_path, dek)
try:
return self._read_all_chunks(reader)
finally:
reader.close()
def _read_all_chunks(
self,
reader: ChunkedAssetReader,
) -> list[TimestampedAudio]:
"""Read and convert all audio chunks.
Args:
reader: Open ChunkedAssetReader.
Returns:
List of TimestampedAudio chunks.
"""
chunks: list[TimestampedAudio] = []
current_time = 0.0
for chunk_bytes in reader.read_chunks():
# Convert PCM16 bytes back to int16 array
pcm16 = np.frombuffer(chunk_bytes, dtype=np.int16)
# Convert int16 [-32768, 32767] to float32 [-1.0, 1.0]
audio_float = pcm16.astype(np.float32) / 32767.0
# Calculate duration based on sample rate
duration = len(audio_float) / self._sample_rate
chunks.append(
TimestampedAudio(
frames=audio_float,
timestamp=current_time,
duration=duration,
)
)
current_time += duration
logger.info(
"Loaded audio: meeting_dir=%s, chunks=%d, total_duration=%.2fs",
self._meeting_dir,
len(chunks),
current_time,
)
return chunks
def get_manifest(self, meeting_id: str) -> dict[str, object] | None:
"""Get manifest metadata for a meeting.
Args:
meeting_id: Meeting UUID string.
Returns:
Manifest dict or None if not found.
"""
manifest_path = self._meetings_dir / meeting_id / "manifest.json"
if not manifest_path.exists():
return None
return dict(json.loads(manifest_path.read_text()))
def audio_exists(self, meeting_id: str) -> bool:
"""Check if audio file exists for a meeting.
Args:
meeting_id: Meeting UUID string.
Returns:
True if audio.enc exists.
"""
meeting_dir = self._meetings_dir / meeting_id
audio_path = meeting_dir / "audio.enc"
manifest_path = meeting_dir / "manifest.json"
return audio_path.exists() and manifest_path.exists()
@property
def sample_rate(self) -> int:
"""Return the sample rate from the last loaded manifest."""
return self._sample_rate
File: src/noteflow/infrastructure/export/markdown.py
"""Markdown exporter implementation.
Export meeting transcripts to Markdown format.
"""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING
from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp
if TYPE_CHECKING:
from collections.abc import Sequence
from noteflow.domain.entities.meeting import Meeting
from noteflow.domain.entities.segment import Segment
class MarkdownExporter:
"""Export meeting transcripts to Markdown format.
Produces clean, readable Markdown with meeting metadata header,
transcript sections with timestamps, and optional summary section.
"""
@property
def format_name(self) -> str:
"""Human-readable format name."""
return "Markdown"
@property
def file_extension(self) -> str:
"""File extension for Markdown."""
return ".md"
def export(
self,
meeting: Meeting,
segments: Sequence[Segment],
) -> str:
"""Export meeting transcript to Markdown.
Args:
meeting: Meeting entity with metadata.
segments: Ordered list of transcript segments.
Returns:
Markdown-formatted transcript string.
"""
lines: list[str] = [
f"# {meeting.title}",
"",
"## Meeting Info",
"",
f"- **Date:** {format_datetime(meeting.created_at)}",
]
if meeting.started_at:
lines.append(f"- **Started:** {format_datetime(meeting.started_at)}")
if meeting.ended_at:
lines.append(f"- **Ended:** {format_datetime(meeting.ended_at)}")
lines.append(f"- **Duration:** {format_timestamp(meeting.duration_seconds)}")
lines.extend((f"- **Segments:** {len(segments)}", "", "## Transcript", ""))
for segment in segments:
timestamp = format_timestamp(segment.start_time)
lines.extend((f"**[{timestamp}]** {segment.text}", ""))
# Summary section (if available)
if meeting.summary:
lines.extend(("## Summary", ""))
if meeting.summary.executive_summary:
lines.extend((meeting.summary.executive_summary, ""))
if meeting.summary.key_points:
lines.extend(("### Key Points", ""))
lines.extend(f"- {point.text}" for point in meeting.summary.key_points)
lines.append("")
if meeting.summary.action_items:
lines.extend(("### Action Items", ""))
for item in meeting.summary.action_items:
assignee = f" (@{item.assignee})" if item.assignee else ""
lines.append(f"- [ ] {item.text}{assignee}")
lines.append("")
# Footer
lines.append("---")
lines.append(f"*Exported from NoteFlow on {format_datetime(datetime.now())}*")
return "\n".join(lines)
File: src/noteflow/infrastructure/persistence/migrations/env.py
"""Alembic migration environment configuration."""
from __future__ import annotations
import asyncio
import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
from noteflow.infrastructure.persistence.models import Base
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Import all models to ensure they're registered with Base.metadata
target_metadata = Base.metadata
if database_url := os.environ.get("NOTEFLOW_DATABASE_URL"):
# Convert postgres:// to postgresql+asyncpg://
if database_url.startswith("postgres://"):
database_url = database_url.replace("postgres://", "postgresql+asyncpg://", 1)
elif database_url.startswith("postgresql://"):
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
config.set_main_option("sqlalchemy.url", database_url)
def include_object(
obj: object,
name: str | None,
type_: str,
reflected: bool,
compare_to: object | None,
) -> bool:
"""Filter objects for autogenerate."""
# Only include objects in the noteflow schema
if type_ == "table":
schema = getattr(obj, "schema", None)
return schema == "noteflow"
return True
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
include_schemas=True,
include_object=include_object,
version_table_schema="noteflow",
)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection: Connection) -> None:
"""Execute migrations with the provided connection."""
context.configure(
connection=connection,
target_metadata=target_metadata,
include_schemas=True,
include_object=include_object,
version_table_schema="noteflow",
)
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations() -> None:
"""Run migrations in async mode.
Create an Engine and associate a connection with the context.
"""
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
asyncio.run(run_async_migrations())
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
File: src/noteflow/infrastructure/persistence/repositories/annotation_repo.py
"""SQLAlchemy implementation of AnnotationRepository."""
from __future__ import annotations
from collections.abc import Sequence
from typing import TYPE_CHECKING
from uuid import UUID
from sqlalchemy import and_, delete, or_, select
from noteflow.domain.entities import Annotation
from noteflow.domain.value_objects import AnnotationId
from noteflow.infrastructure.converters import OrmConverter
from noteflow.infrastructure.persistence.models import AnnotationModel
from noteflow.infrastructure.persistence.repositories._base import BaseRepository
if TYPE_CHECKING:
from noteflow.domain.value_objects import MeetingId
class SqlAlchemyAnnotationRepository(BaseRepository):
"""SQLAlchemy implementation of AnnotationRepository."""
async def add(self, annotation: Annotation) -> Annotation:
"""Add an annotation to a meeting.
Args:
annotation: Annotation to add.
Returns:
Added annotation with db_id populated.
Raises:
ValueError: If meeting does not exist.
"""
model = AnnotationModel(
annotation_id=UUID(str(annotation.id)),
meeting_id=UUID(str(annotation.meeting_id)),
annotation_type=annotation.annotation_type.value,
text=annotation.text,
start_time=annotation.start_time,
end_time=annotation.end_time,
segment_ids=annotation.segment_ids,
created_at=annotation.created_at,
)
self._session.add(model)
await self._session.flush()
annotation.db_id = model.id
return annotation
async def get(self, annotation_id: AnnotationId) -> Annotation | None:
"""Retrieve an annotation by ID.
Args:
annotation_id: Annotation identifier.
Returns:
Annotation if found, None otherwise.
"""
stmt = select(AnnotationModel).where(
AnnotationModel.annotation_id == UUID(str(annotation_id))
)
model = await self._execute_scalar(stmt)
return None if model is None else OrmConverter.annotation_to_domain(model)
async def get_by_meeting(
self,
meeting_id: MeetingId,
) -> Sequence[Annotation]:
"""Get all annotations for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
List of annotations ordered by start_time.
"""
stmt = (
select(AnnotationModel)
.where(AnnotationModel.meeting_id == UUID(str(meeting_id)))
.order_by(AnnotationModel.start_time)
)
models = await self._execute_scalars(stmt)
return [OrmConverter.annotation_to_domain(model) for model in models]
async def get_by_time_range(
self,
meeting_id: MeetingId,
start_time: float,
end_time: float,
) -> Sequence[Annotation]:
"""Get annotations within a time range.
Args:
meeting_id: Meeting identifier.
start_time: Start of time range in seconds.
end_time: End of time range in seconds.
Returns:
List of annotations overlapping the time range.
"""
# Find annotations that overlap with the given time range
stmt = (
select(AnnotationModel)
.where(
and_(
AnnotationModel.meeting_id == UUID(str(meeting_id)),
or_(
# Annotation starts within range
and_(
AnnotationModel.start_time >= start_time,
AnnotationModel.start_time <= end_time,
),
# Annotation ends within range
and_(
AnnotationModel.end_time >= start_time,
AnnotationModel.end_time <= end_time,
),
# Annotation spans entire range
and_(
AnnotationModel.start_time <= start_time,
AnnotationModel.end_time >= end_time,
),
),
)
)
.order_by(AnnotationModel.start_time)
)
models = await self._execute_scalars(stmt)
return [OrmConverter.annotation_to_domain(model) for model in models]
async def update(self, annotation: Annotation) -> Annotation:
"""Update an existing annotation.
Args:
annotation: Annotation with updated fields.
Returns:
Updated annotation.
Raises:
ValueError: If annotation does not exist.
"""
stmt = select(AnnotationModel).where(
AnnotationModel.annotation_id == UUID(str(annotation.id))
)
model = await self._execute_scalar(stmt)
if model is None:
raise ValueError(f"Annotation {annotation.id} not found")
model.annotation_type = annotation.annotation_type.value
model.text = annotation.text
model.start_time = annotation.start_time
model.end_time = annotation.end_time
model.segment_ids = annotation.segment_ids
await self._session.flush()
return annotation
async def delete(self, annotation_id: AnnotationId) -> bool:
"""Delete an annotation.
Args:
annotation_id: Annotation identifier.
Returns:
True if deleted, False if not found.
"""
stmt = select(AnnotationModel).where(
AnnotationModel.annotation_id == UUID(str(annotation_id))
)
model = await self._execute_scalar(stmt)
if model is None:
return False
await self._session.execute(delete(AnnotationModel).where(AnnotationModel.id == model.id))
await self._session.flush()
return True
File: src/noteflow/infrastructure/persistence/repositories/summary_repo.py
"""SQLAlchemy implementation of SummaryRepository."""
from __future__ import annotations
from typing import TYPE_CHECKING
from uuid import UUID
from sqlalchemy import delete, select
from noteflow.domain.entities import ActionItem, KeyPoint, Summary
from noteflow.infrastructure.converters import OrmConverter
from noteflow.infrastructure.persistence.models import (
ActionItemModel,
KeyPointModel,
SummaryModel,
)
from noteflow.infrastructure.persistence.repositories._base import BaseRepository
if TYPE_CHECKING:
from noteflow.domain.value_objects import MeetingId
class SqlAlchemySummaryRepository(BaseRepository):
"""SQLAlchemy implementation of SummaryRepository."""
async def save(self, summary: Summary) -> Summary:
"""Save or update a meeting summary.
Args:
summary: Summary to save.
Returns:
Saved summary with db_id populated.
"""
# Check if summary exists for this meeting
stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(summary.meeting_id)))
result = await self._session.execute(stmt)
if existing := result.scalar_one_or_none():
# Update existing summary
existing.executive_summary = summary.executive_summary
if summary.generated_at is not None:
existing.generated_at = summary.generated_at
existing.model_version = summary.model_version
# Delete old key points and action items
await self._session.execute(
delete(KeyPointModel).where(KeyPointModel.summary_id == existing.id)
)
await self._session.execute(
delete(ActionItemModel).where(ActionItemModel.summary_id == existing.id)
)
# Add new key points
kp_models: list[tuple[KeyPointModel, KeyPoint]] = []
for kp in summary.key_points:
kp_model = KeyPointModel(
summary_id=existing.id,
text=kp.text,
start_time=kp.start_time,
end_time=kp.end_time,
segment_ids=kp.segment_ids,
)
self._session.add(kp_model)
kp_models.append((kp_model, kp))
# Add new action items
ai_models: list[tuple[ActionItemModel, ActionItem]] = []
for ai in summary.action_items:
ai_model = ActionItemModel(
summary_id=existing.id,
text=ai.text,
assignee=ai.assignee,
due_date=ai.due_date,
priority=ai.priority,
segment_ids=ai.segment_ids,
)
self._session.add(ai_model)
ai_models.append((ai_model, ai))
await self._session.flush()
for kp_model, kp in kp_models:
kp.db_id = kp_model.id
for ai_model, ai in ai_models:
ai.db_id = ai_model.id
summary.db_id = existing.id
else:
# Create new summary
model = SummaryModel(
meeting_id=UUID(str(summary.meeting_id)),
executive_summary=summary.executive_summary,
generated_at=summary.generated_at,
model_version=summary.model_version,
)
self._session.add(model)
await self._session.flush()
# Add key points
for kp in summary.key_points:
kp_model = KeyPointModel(
summary_id=model.id,
text=kp.text,
start_time=kp.start_time,
end_time=kp.end_time,
segment_ids=kp.segment_ids,
)
self._session.add(kp_model)
await self._session.flush()
kp.db_id = kp_model.id
# Add action items
for ai in summary.action_items:
ai_model = ActionItemModel(
summary_id=model.id,
text=ai.text,
assignee=ai.assignee,
due_date=ai.due_date,
priority=ai.priority,
segment_ids=ai.segment_ids,
)
self._session.add(ai_model)
await self._session.flush()
ai.db_id = ai_model.id
summary.db_id = model.id
return summary
async def get_by_meeting(self, meeting_id: MeetingId) -> Summary | None:
"""Get summary for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
Summary if exists, None otherwise.
"""
stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(meeting_id)))
model = await self._execute_scalar(stmt)
return None if model is None else OrmConverter.summary_to_domain(model, meeting_id)
async def delete_by_meeting(self, meeting_id: MeetingId) -> bool:
"""Delete summary for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
True if deleted, False if not found.
"""
stmt = select(SummaryModel).where(SummaryModel.meeting_id == UUID(str(meeting_id)))
model = await self._execute_scalar(stmt)
if model is None:
return False
await self._delete_and_flush(model)
return True
File: src/noteflow/infrastructure/persistence/models.py
"""SQLAlchemy ORM models for NoteFlow."""
from __future__ import annotations
from datetime import datetime
from typing import ClassVar
from uuid import uuid4
from pgvector.sqlalchemy import Vector
from sqlalchemy import (
DateTime,
Float,
ForeignKey,
Integer,
LargeBinary,
String,
Text,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
# Vector dimension for embeddings (OpenAI compatible)
EMBEDDING_DIM = 1536
class Base(DeclarativeBase):
"""Base class for all ORM models."""
pass
class MeetingModel(Base):
"""SQLAlchemy model for meetings table."""
__tablename__ = "meetings"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
title: Mapped[str] = mapped_column(String(255), nullable=False)
state: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=datetime.now,
)
started_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
ended_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
metadata_: Mapped[dict[str, str]] = mapped_column(
"metadata",
JSONB,
nullable=False,
default=dict,
)
wrapped_dek: Mapped[bytes | None] = mapped_column(
LargeBinary,
nullable=True,
)
# Relationships
segments: Mapped[list[SegmentModel]] = relationship(
"SegmentModel",
back_populates="meeting",
cascade="all, delete-orphan",
lazy="selectin",
)
summary: Mapped[SummaryModel | None] = relationship(
"SummaryModel",
back_populates="meeting",
cascade="all, delete-orphan",
uselist=False,
lazy="selectin",
)
annotations: Mapped[list[AnnotationModel]] = relationship(
"AnnotationModel",
back_populates="meeting",
cascade="all, delete-orphan",
lazy="selectin",
)
class SegmentModel(Base):
"""SQLAlchemy model for segments table."""
__tablename__ = "segments"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
meeting_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
)
segment_id: Mapped[int] = mapped_column(Integer, nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
language_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
avg_logprob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
no_speech_prob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
embedding: Mapped[list[float] | None] = mapped_column(
Vector(EMBEDDING_DIM),
nullable=True,
)
speaker_id: Mapped[str | None] = mapped_column(String(50), nullable=True)
speaker_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=datetime.now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="segments",
)
words: Mapped[list[WordTimingModel]] = relationship(
"WordTimingModel",
back_populates="segment",
cascade="all, delete-orphan",
lazy="selectin",
)
class WordTimingModel(Base):
"""SQLAlchemy model for word_timings table."""
__tablename__ = "word_timings"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
segment_pk: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.segments.id", ondelete="CASCADE"),
nullable=False,
)
word: Mapped[str] = mapped_column(String(255), nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
probability: Mapped[float] = mapped_column(Float, nullable=False)
# Relationships
segment: Mapped[SegmentModel] = relationship(
"SegmentModel",
back_populates="words",
)
class SummaryModel(Base):
"""SQLAlchemy model for summaries table."""
__tablename__ = "summaries"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
meeting_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
unique=True,
)
executive_summary: Mapped[str | None] = mapped_column(Text, nullable=True)
generated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=datetime.now,
)
model_version: Mapped[str | None] = mapped_column(String(50), nullable=True)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="summary",
)
key_points: Mapped[list[KeyPointModel]] = relationship(
"KeyPointModel",
back_populates="summary",
cascade="all, delete-orphan",
lazy="selectin",
)
action_items: Mapped[list[ActionItemModel]] = relationship(
"ActionItemModel",
back_populates="summary",
cascade="all, delete-orphan",
lazy="selectin",
)
class KeyPointModel(Base):
"""SQLAlchemy model for key_points table."""
__tablename__ = "key_points"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
summary_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
)
text: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
# Relationships
summary: Mapped[SummaryModel] = relationship(
"SummaryModel",
back_populates="key_points",
)
class ActionItemModel(Base):
"""SQLAlchemy model for action_items table."""
__tablename__ = "action_items"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
summary_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
)
text: Mapped[str] = mapped_column(Text, nullable=False)
assignee: Mapped[str] = mapped_column(String(255), nullable=False, default="")
due_date: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
# Relationships
summary: Mapped[SummaryModel] = relationship(
"SummaryModel",
back_populates="action_items",
)
class AnnotationModel(Base):
"""SQLAlchemy model for annotations table.
User-created annotations during recording. Distinct from LLM-extracted
ActionItem/KeyPoint which belong to Summary. Annotations belong directly
to Meeting and are created in real-time.
"""
__tablename__ = "annotations"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
annotation_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
nullable=False,
unique=True,
default=uuid4,
)
meeting_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
)
annotation_type: Mapped[str] = mapped_column(String(50), nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=datetime.now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="annotations",
)
File: src/noteflow/infrastructure/summarization/init.py
"""Summarization infrastructure module.
Provides summarization provider implementations and citation verification.
"""
from noteflow.infrastructure.summarization.citation_verifier import (
SegmentCitationVerifier,
)
from noteflow.infrastructure.summarization.cloud_provider import (
CloudBackend,
CloudSummarizer,
)
from noteflow.infrastructure.summarization.factory import create_summarization_service
from noteflow.infrastructure.summarization.mock_provider import MockSummarizer
from noteflow.infrastructure.summarization.ollama_provider import OllamaSummarizer
__all__ = [
"CloudBackend",
"CloudSummarizer",
"MockSummarizer",
"OllamaSummarizer",
"SegmentCitationVerifier",
"create_summarization_service",
]
File: src/noteflow/infrastructure/summarization/_parsing.py
"""Shared parsing utilities for summarization providers."""
from __future__ import annotations
import json
from datetime import UTC, datetime
from typing import TYPE_CHECKING
from noteflow.domain.entities import ActionItem, KeyPoint, Summary
from noteflow.domain.summarization import InvalidResponseError
if TYPE_CHECKING:
from noteflow.domain.summarization import SummarizationRequest
# System prompt for structured summarization
SYSTEM_PROMPT = """You are a meeting summarization assistant. Analyze the transcript and produce structured output.
OUTPUT FORMAT (JSON):
{
"executive_summary": "2-3 sentence high-level overview",
"key_points": [
{"text": "Key insight or decision", "segment_ids": [0, 1]}
],
"action_items": [
{"text": "Action to take", "assignee": "Person name or empty string", "priority": 0, "segment_ids": [2]}
]
}
RULES:
1. Each key_point and action_item MUST have at least one segment_id referencing the source
2. segment_ids are integers matching the [N] markers in the transcript
3. priority: 0=unspecified, 1=low, 2=medium, 3=high
4. Only extract action items that clearly indicate tasks to be done
5. Output ONLY valid JSON, no markdown or explanation"""
def build_transcript_prompt(request: SummarizationRequest) -> str:
"""Build transcript prompt with segment markers.
Args:
request: Summarization request with segments.
Returns:
Formatted prompt string with transcript and constraints.
"""
lines = [f"[{seg.segment_id}] {seg.text}" for seg in request.segments]
constraints = ""
if request.segments:
valid_ids = ", ".join(str(seg.segment_id) for seg in request.segments)
constraints = (
"\n\nCONSTRAINTS:\n"
f"- Maximum {request.max_key_points} key points\n"
f"- Maximum {request.max_action_items} action items\n"
f"- Valid segment_ids: {valid_ids}"
)
return f"TRANSCRIPT:\n{chr(10).join(lines)}{constraints}"
def parse_llm_response(response_text: str, request: SummarizationRequest) -> Summary:
"""Parse JSON response into Summary entity.
Args:
response_text: Raw JSON response from LLM.
request: Original request for validation context.
Returns:
Summary entity with parsed data.
Raises:
InvalidResponseError: If JSON is malformed.
"""
# Strip markdown code fences if present
text = response_text.strip()
if text.startswith("```"):
lines = text.split("\n")
if lines[0].startswith("```"):
lines = lines[1:]
if lines and lines[-1].strip() == "```":
lines = lines[:-1]
text = "\n".join(lines)
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise InvalidResponseError(f"Invalid JSON response: {e}") from e
valid_ids = {seg.segment_id for seg in request.segments}
# Parse key points
key_points: list[KeyPoint] = []
for kp_data in data.get("key_points", [])[: request.max_key_points]:
seg_ids = [sid for sid in kp_data.get("segment_ids", []) if sid in valid_ids]
start_time = 0.0
end_time = 0.0
if seg_ids and (refs := [s for s in request.segments if s.segment_id in seg_ids]):
start_time = min(s.start_time for s in refs)
end_time = max(s.end_time for s in refs)
key_points.append(
KeyPoint(
text=str(kp_data.get("text", "")),
segment_ids=seg_ids,
start_time=start_time,
end_time=end_time,
)
)
# Parse action items
action_items: list[ActionItem] = []
for ai_data in data.get("action_items", [])[: request.max_action_items]:
seg_ids = [sid for sid in ai_data.get("segment_ids", []) if sid in valid_ids]
priority = ai_data.get("priority", 0)
if not isinstance(priority, int) or priority not in range(4):
priority = 0
action_items.append(
ActionItem(
text=str(ai_data.get("text", "")),
assignee=str(ai_data.get("assignee", "")),
priority=priority,
segment_ids=seg_ids,
)
)
return Summary(
meeting_id=request.meeting_id,
executive_summary=str(data.get("executive_summary", "")),
key_points=key_points,
action_items=action_items,
generated_at=datetime.now(UTC),
)
File: src/noteflow/infrastructure/summarization/cloud_provider.py
"""Cloud summarization provider for OpenAI/Anthropic APIs."""
from __future__ import annotations
import asyncio
import os
import time
from datetime import UTC, datetime
from enum import Enum
from typing import TYPE_CHECKING, cast
from noteflow.domain.entities import Summary
from noteflow.domain.summarization import (
InvalidResponseError,
ProviderUnavailableError,
SummarizationRequest,
SummarizationResult,
SummarizationTimeoutError,
)
from noteflow.infrastructure.summarization._parsing import (
SYSTEM_PROMPT,
build_transcript_prompt,
parse_llm_response,
)
if TYPE_CHECKING:
import anthropic
import openai
class CloudBackend(Enum):
"""Supported cloud LLM backends."""
OPENAI = "openai"
ANTHROPIC = "anthropic"
class CloudSummarizer:
"""Cloud-based LLM summarizer using OpenAI or Anthropic.
Requires explicit user consent as data is sent to external services.
"""
def __init__(
self,
backend: CloudBackend = CloudBackend.OPENAI,
api_key: str | None = None,
model: str | None = None,
timeout_seconds: float = 60.0,
base_url: str | None = None,
) -> None:
"""Initialize cloud summarizer.
Args:
backend: Cloud provider backend (OpenAI or Anthropic).
api_key: API key (defaults to env var if not provided).
model: Model name (defaults per backend if not provided).
timeout_seconds: Request timeout in seconds.
base_url: Optional base URL (OpenAI only; defaults to OpenAI API).
"""
self._backend = backend
self._api_key = api_key
self._timeout = timeout_seconds
self._client: openai.OpenAI | anthropic.Anthropic | None = None
# Only used for OpenAI
self._openai_base_url = (
base_url
if base_url is not None
else os.environ.get("OPENAI_BASE_URL")
if backend == CloudBackend.OPENAI
else None
)
# Set default models per backend
if model is None:
self._model = (
"gpt-4o-mini" if backend == CloudBackend.OPENAI else "claude-3-haiku-20240307"
)
else:
self._model = model
def _get_openai_client(self) -> openai.OpenAI:
"""Get or create OpenAI client."""
if self._client is None:
try:
import openai
self._client = openai.OpenAI(
api_key=self._api_key,
timeout=self._timeout,
base_url=self._openai_base_url,
)
except ImportError as e:
raise ProviderUnavailableError(
"openai package not installed. Install with: pip install openai"
) from e
return cast(openai.OpenAI, self._client)
def _get_anthropic_client(self) -> anthropic.Anthropic:
"""Get or create Anthropic client."""
if self._client is None:
try:
import anthropic
self._client = anthropic.Anthropic(api_key=self._api_key, timeout=self._timeout)
except ImportError as e:
raise ProviderUnavailableError(
"anthropic package not installed. Install with: pip install anthropic"
) from e
return cast(anthropic.Anthropic, self._client)
@property
def provider_name(self) -> str:
"""Provider identifier."""
return self._backend.value
@property
def is_available(self) -> bool:
"""Check if cloud provider is configured with an API key."""
import os
if self._api_key:
return True
# Check environment variables
if self._backend == CloudBackend.OPENAI:
return bool(os.environ.get("OPENAI_API_KEY"))
return bool(os.environ.get("ANTHROPIC_API_KEY"))
@property
def requires_cloud_consent(self) -> bool:
"""Cloud providers require explicit user consent."""
return True
async def summarize(self, request: SummarizationRequest) -> SummarizationResult:
"""Generate evidence-linked summary using cloud LLM.
Args:
request: Summarization request with segments.
Returns:
SummarizationResult with generated summary.
Raises:
ProviderUnavailableError: If provider not configured.
SummarizationTimeoutError: If request times out.
InvalidResponseError: If response cannot be parsed.
"""
start = time.monotonic()
# Handle empty segments
if not request.segments:
return SummarizationResult(
summary=Summary(
meeting_id=request.meeting_id,
executive_summary="No transcript segments to summarize.",
key_points=[],
action_items=[],
generated_at=datetime.now(UTC),
model_version=self._model,
),
model_name=self._model,
provider_name=self.provider_name,
tokens_used=None,
latency_ms=0.0,
)
user_prompt = build_transcript_prompt(request)
if self._backend == CloudBackend.OPENAI:
content, tokens_used = await asyncio.to_thread(self._call_openai, user_prompt)
else:
content, tokens_used = await asyncio.to_thread(self._call_anthropic, user_prompt)
# Parse into Summary
summary = parse_llm_response(content, request)
summary = Summary(
meeting_id=summary.meeting_id,
executive_summary=summary.executive_summary,
key_points=summary.key_points,
action_items=summary.action_items,
generated_at=summary.generated_at,
model_version=self._model,
)
elapsed_ms = (time.monotonic() - start) * 1000
return SummarizationResult(
summary=summary,
model_name=self._model,
provider_name=self.provider_name,
tokens_used=tokens_used,
latency_ms=elapsed_ms,
)
def _call_openai(self, user_prompt: str) -> tuple[str, int | None]:
"""Call OpenAI API and return (content, tokens_used)."""
try:
client = self._get_openai_client()
except ProviderUnavailableError:
raise
try:
response = client.chat.completions.create(
model=self._model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt},
],
temperature=0.3,
response_format={"type": "json_object"},
)
except TimeoutError as e:
raise SummarizationTimeoutError(f"OpenAI request timed out: {e}") from e
except Exception as e:
err_str = str(e).lower()
if "api key" in err_str or "authentication" in err_str:
raise ProviderUnavailableError(f"OpenAI authentication failed: {e}") from e
if "rate limit" in err_str:
raise SummarizationTimeoutError(f"OpenAI rate limited: {e}") from e
raise InvalidResponseError(f"OpenAI error: {e}") from e
content = response.choices[0].message.content or ""
if not content:
raise InvalidResponseError("Empty response from OpenAI")
tokens_used = response.usage.total_tokens if response.usage else None
return content, tokens_used
def _call_anthropic(self, user_prompt: str) -> tuple[str, int | None]:
"""Call Anthropic API and return (content, tokens_used)."""
try:
client = self._get_anthropic_client()
except ProviderUnavailableError:
raise
try:
response = client.messages.create(
model=self._model,
max_tokens=4096,
system=SYSTEM_PROMPT,
messages=[{"role": "user", "content": user_prompt}],
)
except TimeoutError as e:
raise SummarizationTimeoutError(f"Anthropic request timed out: {e}") from e
except Exception as e:
err_str = str(e).lower()
if "api key" in err_str or "authentication" in err_str:
raise ProviderUnavailableError(f"Anthropic authentication failed: {e}") from e
if "rate limit" in err_str:
raise SummarizationTimeoutError(f"Anthropic rate limited: {e}") from e
raise InvalidResponseError(f"Anthropic error: {e}") from e
content = "".join(block.text for block in response.content if hasattr(block, "text"))
if not content:
raise InvalidResponseError("Empty response from Anthropic")
tokens_used = None
if hasattr(response, "usage"):
tokens_used = response.usage.input_tokens + response.usage.output_tokens
return content, tokens_used
File: src/noteflow/infrastructure/summarization/mock_provider.py
"""Mock summarization provider for testing."""
from __future__ import annotations
import time
from datetime import UTC, datetime
from noteflow.domain.entities import ActionItem, KeyPoint, Summary
from noteflow.domain.summarization import (
SummarizationRequest,
SummarizationResult,
)
class MockSummarizer:
"""Deterministic mock summarizer for testing.
Generates predictable summaries based on input segments without
requiring an actual LLM. Useful for unit tests and development.
"""
def __init__(self, latency_ms: float = 10.0) -> None:
"""Initialize mock summarizer.
Args:
latency_ms: Simulated latency in milliseconds.
"""
self._latency_ms = latency_ms
@property
def provider_name(self) -> str:
"""Provider identifier."""
return "mock"
@property
def is_available(self) -> bool:
"""Mock provider is always available."""
return True
@property
def requires_cloud_consent(self) -> bool:
"""Mock provider does not send data externally."""
return False
async def summarize(self, request: SummarizationRequest) -> SummarizationResult:
"""Generate deterministic mock summary.
Creates key points and action items based on segment content,
with proper evidence linking to segment_ids.
Args:
request: Summarization request with segments.
Returns:
SummarizationResult with mock summary.
"""
start = time.monotonic()
# Generate executive summary
segment_count = request.segment_count
total_duration = request.total_duration
executive_summary = (
f"Meeting with {segment_count} segments spanning {total_duration:.1f} seconds."
)
# Generate key points from segments (up to max_key_points)
key_points: list[KeyPoint] = []
for i, segment in enumerate(request.segments[: request.max_key_points]):
# Truncate text for key point
text = f"{segment.text[:100]}..." if len(segment.text) > 100 else segment.text
key_points.append(
KeyPoint(
text=f"Point {i + 1}: {text}",
segment_ids=[segment.segment_id],
start_time=segment.start_time,
end_time=segment.end_time,
)
)
# Generate action items from segments containing action words
action_items: list[ActionItem] = []
action_keywords = {"todo", "action", "will", "should", "must", "need to"}
for segment in request.segments:
text_lower = segment.text.lower()
if any(kw in text_lower for kw in action_keywords):
if len(action_items) >= request.max_action_items:
break
action_items.append(
ActionItem(
text=f"Action: {segment.text[:80]}",
assignee="", # Mock doesn't extract assignees
segment_ids=[segment.segment_id],
)
)
summary = Summary(
meeting_id=request.meeting_id,
executive_summary=executive_summary,
key_points=key_points,
action_items=action_items,
generated_at=datetime.now(UTC),
model_version="mock-1.0",
)
elapsed = (time.monotonic() - start) * 1000 + self._latency_ms
return SummarizationResult(
summary=summary,
model_name="mock-1.0",
provider_name=self.provider_name,
tokens_used=None,
latency_ms=elapsed,
)
File: src/noteflow/infrastructure/summarization/ollama_provider.py
"""Ollama summarization provider for local LLM inference."""
from __future__ import annotations
import asyncio
import os
import time
from datetime import UTC, datetime
from typing import TYPE_CHECKING
from noteflow.domain.entities import Summary
from noteflow.domain.summarization import (
InvalidResponseError,
ProviderUnavailableError,
SummarizationRequest,
SummarizationResult,
SummarizationTimeoutError,
)
from noteflow.infrastructure.summarization._parsing import (
SYSTEM_PROMPT,
build_transcript_prompt,
parse_llm_response,
)
if TYPE_CHECKING:
import ollama
class OllamaSummarizer:
"""Ollama-based local LLM summarizer.
Uses a local Ollama server for privacy-preserving summarization.
No data is sent to external cloud services.
"""
def __init__(
self,
model: str | None = None,
host: str | None = None,
timeout_seconds: float = 120.0,
) -> None:
"""Initialize Ollama summarizer.
Args:
model: Ollama model name (e.g., 'llama3.2', 'mistral').
host: Ollama server URL.
timeout_seconds: Request timeout in seconds.
"""
self._model = model or os.environ.get("OLLAMA_MODEL", "llama3.2")
self._host = host or os.environ.get("OLLAMA_HOST", "http://localhost:11434")
self._timeout = timeout_seconds
self._client: ollama.Client | None = None
def _get_client(self) -> ollama.Client:
"""Lazy-load Ollama client."""
if self._client is None:
try:
import ollama
self._client = ollama.Client(host=self._host)
except ImportError as e:
raise ProviderUnavailableError(
"ollama package not installed. Install with: pip install ollama"
) from e
return self._client
@property
def provider_name(self) -> str:
"""Provider identifier."""
return "ollama"
@property
def is_available(self) -> bool:
"""Check if Ollama server is reachable."""
try:
client = self._get_client()
# Try to list models to verify connectivity
client.list()
return True
except (ConnectionError, TimeoutError, RuntimeError, OSError):
return False
@property
def requires_cloud_consent(self) -> bool:
"""Ollama runs locally, no cloud consent required."""
return False
async def summarize(self, request: SummarizationRequest) -> SummarizationResult:
"""Generate evidence-linked summary using Ollama.
Args:
request: Summarization request with segments.
Returns:
SummarizationResult with generated summary.
Raises:
ProviderUnavailableError: If Ollama is not accessible.
SummarizationTimeoutError: If request times out.
InvalidResponseError: If response cannot be parsed.
"""
start = time.monotonic()
# Handle empty segments
if not request.segments:
return SummarizationResult(
summary=Summary(
meeting_id=request.meeting_id,
executive_summary="No transcript segments to summarize.",
key_points=[],
action_items=[],
generated_at=datetime.now(UTC),
model_version=self._model,
),
model_name=self._model,
provider_name=self.provider_name,
tokens_used=None,
latency_ms=0.0,
)
try:
client = self._get_client()
except ProviderUnavailableError:
raise
user_prompt = build_transcript_prompt(request)
try:
# Offload blocking call to a worker thread to avoid blocking the event loop
response = await asyncio.to_thread(
client.chat,
model=self._model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt},
],
options={"temperature": 0.3},
format="json",
)
except TimeoutError as e:
raise SummarizationTimeoutError(f"Ollama request timed out: {e}") from e
except Exception as e:
err_str = str(e).lower()
if "connection" in err_str or "refused" in err_str:
raise ProviderUnavailableError(f"Cannot connect to Ollama: {e}") from e
raise InvalidResponseError(f"Ollama error: {e}") from e
# Extract response text
content = response.get("message", {}).get("content", "")
if not content:
raise InvalidResponseError("Empty response from Ollama")
# Parse into Summary
summary = parse_llm_response(content, request)
summary = Summary(
meeting_id=summary.meeting_id,
executive_summary=summary.executive_summary,
key_points=summary.key_points,
action_items=summary.action_items,
generated_at=summary.generated_at,
model_version=self._model,
)
elapsed_ms = (time.monotonic() - start) * 1000
# Extract token usage if available
tokens_used = None
if "eval_count" in response:
tokens_used = response.get("eval_count", 0) + response.get("prompt_eval_count", 0)
return SummarizationResult(
summary=summary,
model_name=self._model,
provider_name=self.provider_name,
tokens_used=tokens_used,
latency_ms=elapsed_ms,
)
File: src/noteflow/infrastructure/triggers/audio_activity.py
"""Audio activity signal provider.
Detect sustained audio activity using existing RmsLevelProvider.
"""
from __future__ import annotations
import threading
import time
from collections import deque
from dataclasses import dataclass
from typing import TYPE_CHECKING
from noteflow.domain.triggers.entities import TriggerSignal, TriggerSource
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
from noteflow.infrastructure.audio import RmsLevelProvider
@dataclass
class AudioActivitySettings:
"""Configuration for audio activity detection.
Attributes:
enabled: Whether audio activity detection is enabled.
threshold_db: Minimum dB level to consider as activity (default -40 dB).
window_seconds: Time window for sustained activity detection.
min_active_ratio: Minimum ratio of active samples in window (0.0-1.0).
min_samples: Minimum samples required before evaluation.
max_history: Maximum samples retained in history.
weight: Confidence weight contributed by this provider.
"""
enabled: bool
threshold_db: float
window_seconds: float
min_active_ratio: float
min_samples: int
max_history: int
weight: float
def __post_init__(self) -> None:
if self.min_samples > self.max_history:
msg = "min_samples must be <= max_history"
raise ValueError(msg)
class AudioActivityProvider:
"""Detect sustained audio activity using existing RmsLevelProvider.
Reuses RmsLevelProvider from infrastructure/audio for dB calculation.
Tracks activity history over a sliding window and generates signals
when sustained speech activity is detected.
"""
def __init__(
self,
level_provider: RmsLevelProvider,
settings: AudioActivitySettings,
) -> None:
"""Initialize audio activity provider.
Args:
level_provider: Existing RmsLevelProvider instance to reuse.
settings: Configuration settings for audio activity detection.
"""
self._level_provider = level_provider
self._settings = settings
self._history: deque[tuple[float, bool]] = deque(maxlen=self._settings.max_history)
self._lock = threading.Lock()
@property
def source(self) -> TriggerSource:
"""Get the source type for this provider."""
return TriggerSource.AUDIO_ACTIVITY
@property
def max_weight(self) -> float:
"""Get the maximum weight this provider can contribute."""
return self._settings.weight
def update(self, frames: NDArray[np.float32], timestamp: float) -> None:
"""Update activity history with new audio frames.
Call this from the audio capture callback to feed new samples.
Args:
frames: Audio samples as float32 array.
timestamp: Monotonic timestamp of the audio chunk.
"""
if not self._settings.enabled:
return
db = self._level_provider.get_db(frames)
is_active = db >= self._settings.threshold_db
with self._lock:
self._history.append((timestamp, is_active))
def get_signal(self) -> TriggerSignal | None:
"""Get current signal if sustained activity detected.
Returns:
TriggerSignal if activity ratio exceeds threshold, None otherwise.
"""
if not self._settings.enabled:
return None
# Need minimum samples before we can evaluate
with self._lock:
history = list(self._history)
if len(history) < self._settings.min_samples:
return None
# Prune old samples outside window
now = time.monotonic()
cutoff = now - self._settings.window_seconds
recent = [(ts, active) for ts, active in history if ts >= cutoff]
if len(recent) < self._settings.min_samples:
return None
# Calculate activity ratio
active_count = sum(bool(active) for _, active in recent)
ratio = active_count / len(recent)
if ratio < self._settings.min_active_ratio:
return None
return TriggerSignal(source=self.source, weight=self.max_weight)
def is_enabled(self) -> bool:
"""Check if this provider is enabled."""
return self._settings.enabled
def clear_history(self) -> None:
"""Clear activity history. Useful when recording starts."""
with self._lock:
self._history.clear()
File: src/noteflow/infrastructure/init.py
"""NoteFlow infrastructure layer.
Contains implementations of ports and adapters for external systems:
- asr: Speech-to-text transcription (faster-whisper)
- diarization: Speaker diarization (pyannote.audio + diart)
- persistence: Database access (SQLAlchemy + PostgreSQL)
- security: Encryption and key management (AES-GCM + OS keychain)
"""
File: tests/application/test_export_service.py
"""Tests for ExportService application service."""
from __future__ import annotations
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
import pytest
from noteflow.application.services.export_service import ExportService
from noteflow.domain.entities import Meeting, Segment
from noteflow.domain.value_objects import MeetingId
def _uow_with_meeting(meeting: Meeting | None, segments: list[Segment] | None = None) -> MagicMock:
"""Build a minimal async UnitOfWork double."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.meetings = MagicMock(get=AsyncMock(return_value=meeting))
uow.segments = MagicMock(get_by_meeting=AsyncMock(return_value=segments or []))
return uow
@pytest.mark.asyncio
async def test_export_transcript_meeting_not_found() -> None:
"""export_transcript should raise when meeting is missing."""
meeting_id = MeetingId(uuid4())
service = ExportService(_uow_with_meeting(meeting=None))
with pytest.raises(ValueError, match="not found"):
await service.export_transcript(meeting_id)
@pytest.mark.asyncio
async def test_export_to_file_infers_format_and_writes(tmp_path: Path) -> None:
"""export_to_file infers markdown from extension and writes content."""
meeting = Meeting.create(title="Demo")
segments = [
Segment(
segment_id=0,
text="Hello world",
start_time=0.0,
end_time=1.0,
meeting_id=meeting.id,
)
]
uow = _uow_with_meeting(meeting, segments)
service = ExportService(uow)
output = await service.export_to_file(meeting.id, tmp_path / "export.markdown")
assert output.suffix == ".md"
assert output.exists()
content = output.read_text(encoding="utf-8")
assert "Hello world" in content
def test_infer_format_rejects_unknown_extension() -> None:
"""_infer_format_from_extension should raise for unknown suffix."""
service = ExportService(_uow_with_meeting(None))
with pytest.raises(ValueError, match="Cannot infer format"):
service._infer_format_from_extension(".txt") # type: ignore[arg-type]
def test_get_exporter_raises_for_unknown_format() -> None:
"""_get_exporter should guard against unsupported enums."""
service = ExportService(_uow_with_meeting(None))
class FakeFormat:
HTML = "html"
with pytest.raises(ValueError, match="Unsupported"):
service._get_exporter(FakeFormat.HTML) # type: ignore[arg-type]
def test_get_supported_formats_returns_names_and_extensions() -> None:
"""get_supported_formats should expose format metadata."""
service = ExportService(_uow_with_meeting(None))
formats = {name.lower(): ext for name, ext in service.get_supported_formats()}
assert formats["markdown"] == ".md"
assert formats["html"] == ".html"
File: tests/application/test_retention_service.py
"""Tests for RetentionService."""
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock
import pytest
from noteflow.application.services.retention_service import RetentionReport, RetentionService
from noteflow.domain.entities import Meeting
def _create_meeting(ended_at: datetime | None = None) -> Meeting:
"""Create a test meeting with optional ended_at."""
meeting = Meeting.create(title="Test Meeting")
if ended_at:
meeting._ended_at = ended_at
return meeting
class TestRetentionServiceProperties:
"""Tests for RetentionService properties."""
def test_is_enabled_reflects_init(self) -> None:
"""is_enabled should reflect constructor parameter."""
uow = MagicMock()
def factory() -> MagicMock:
return uow
enabled_service = RetentionService(factory, retention_days=30, enabled=True)
disabled_service = RetentionService(factory, retention_days=30, enabled=False)
assert enabled_service.is_enabled is True
assert disabled_service.is_enabled is False
def test_retention_days_property(self) -> None:
"""retention_days should return configured value."""
uow = MagicMock()
service = RetentionService(lambda: uow, retention_days=45)
assert service.retention_days == 45
def test_cutoff_date_calculation(self) -> None:
"""cutoff_date should be retention_days in the past."""
uow = MagicMock()
service = RetentionService(lambda: uow, retention_days=30)
cutoff = service.cutoff_date
expected = datetime.now(UTC) - timedelta(days=30)
# Allow 1 second tolerance
assert abs((cutoff - expected).total_seconds()) < 1
class TestRetentionServiceFindExpired:
"""Tests for find_expired_meetings method."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.meetings = MagicMock()
return uow
@pytest.mark.asyncio
async def test_find_expired_returns_meetings(self, mock_uow: MagicMock) -> None:
"""find_expired_meetings should return meetings from repository."""
old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100))
mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting])
service = RetentionService(lambda: mock_uow, retention_days=30)
result = await service.find_expired_meetings()
assert len(result) == 1
mock_uow.meetings.find_older_than.assert_awaited_once()
@pytest.mark.asyncio
async def test_find_expired_returns_empty_list(self, mock_uow: MagicMock) -> None:
"""find_expired_meetings should return empty list when none found."""
mock_uow.meetings.find_older_than = AsyncMock(return_value=[])
service = RetentionService(lambda: mock_uow, retention_days=30)
result = await service.find_expired_meetings()
assert result == []
class TestRetentionServiceRunCleanup:
"""Tests for run_cleanup method."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.meetings = MagicMock()
uow.commit = AsyncMock()
return uow
@pytest.mark.asyncio
async def test_run_cleanup_disabled_returns_empty_report(self, mock_uow: MagicMock) -> None:
"""run_cleanup should return empty report when disabled."""
service = RetentionService(lambda: mock_uow, retention_days=30, enabled=False)
report = await service.run_cleanup()
assert report.meetings_checked == 0
assert report.meetings_deleted == 0
assert report.errors == ()
@pytest.mark.asyncio
async def test_run_cleanup_dry_run_does_not_delete(self, mock_uow: MagicMock) -> None:
"""run_cleanup with dry_run should not delete meetings."""
old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100))
mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting])
service = RetentionService(lambda: mock_uow, retention_days=30, enabled=False)
report = await service.run_cleanup(dry_run=True)
# Should report meeting was checked but not deleted
assert report.meetings_checked == 1
assert report.meetings_deleted == 0
assert report.errors == ()
@pytest.mark.asyncio
async def test_run_cleanup_deletes_expired_meetings(
self, mock_uow: MagicMock, tmp_path: Path
) -> None:
"""run_cleanup should delete expired meetings when enabled."""
old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100))
mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting])
mock_uow.meetings.get = AsyncMock(return_value=old_meeting)
mock_uow.meetings.delete = AsyncMock(return_value=True)
service = RetentionService(
lambda: mock_uow,
retention_days=30,
meetings_dir=tmp_path,
enabled=True,
)
report = await service.run_cleanup()
assert report.meetings_checked == 1
assert report.meetings_deleted == 1
assert report.errors == ()
@pytest.mark.asyncio
async def test_run_cleanup_handles_errors_gracefully(self, mock_uow: MagicMock) -> None:
"""run_cleanup should capture errors without failing."""
old_meeting = _create_meeting(ended_at=datetime.now(UTC) - timedelta(days=100))
mock_uow.meetings.find_older_than = AsyncMock(return_value=[old_meeting])
mock_uow.meetings.get = AsyncMock(side_effect=RuntimeError("DB error"))
service = RetentionService(lambda: mock_uow, retention_days=30, enabled=True)
report = await service.run_cleanup()
assert report.meetings_checked == 1
assert report.meetings_deleted == 0
assert len(report.errors) == 1
assert "DB error" in report.errors[0]
class TestRetentionReport:
"""Tests for RetentionReport dataclass."""
def test_retention_report_is_immutable(self) -> None:
"""RetentionReport should be frozen."""
report = RetentionReport(
meetings_checked=5,
meetings_deleted=3,
errors=("error1",),
)
with pytest.raises(AttributeError):
report.meetings_checked = 10 # type: ignore[misc]
def test_retention_report_stores_values(self) -> None:
"""RetentionReport should store all values correctly."""
report = RetentionReport(
meetings_checked=10,
meetings_deleted=8,
errors=("err1", "err2"),
)
assert report.meetings_checked == 10
assert report.meetings_deleted == 8
assert report.errors == ("err1", "err2")
File: tests/domain/test_meeting.py
"""Tests for Meeting entity."""
from __future__ import annotations
from datetime import datetime, timedelta
import pytest
from noteflow.domain.entities.meeting import Meeting
from noteflow.domain.entities.segment import Segment
from noteflow.domain.entities.summary import Summary
from noteflow.domain.value_objects import MeetingState
class TestMeetingCreation:
"""Tests for Meeting creation methods."""
def test_create_with_default_title(self) -> None:
"""Test factory method generates default title."""
meeting = Meeting.create()
assert meeting.title.startswith("Meeting ")
assert meeting.state == MeetingState.CREATED
assert meeting.started_at is None
assert meeting.ended_at is None
assert meeting.segments == []
assert meeting.summary is None
def test_create_with_custom_title(self) -> None:
"""Test factory method accepts custom title."""
meeting = Meeting.create(title="Team Standup")
assert meeting.title == "Team Standup"
def test_create_with_metadata(self) -> None:
"""Test factory method accepts metadata."""
metadata = {"project": "NoteFlow", "team": "Engineering"}
meeting = Meeting.create(title="Sprint Planning", metadata=metadata)
assert meeting.metadata == metadata
def test_from_uuid_str(self) -> None:
"""Test creation from existing UUID string."""
uuid_str = "12345678-1234-5678-1234-567812345678"
meeting = Meeting.from_uuid_str(
uuid_str=uuid_str,
title="Restored Meeting",
state=MeetingState.STOPPED,
)
assert str(meeting.id) == uuid_str
assert meeting.title == "Restored Meeting"
assert meeting.state == MeetingState.STOPPED
class TestMeetingStateTransitions:
"""Tests for Meeting state machine transitions."""
def test_start_recording_from_created(self) -> None:
"""Test starting recording from CREATED state."""
meeting = Meeting.create()
meeting.start_recording()
assert meeting.state == MeetingState.RECORDING
assert meeting.started_at is not None
def test_start_recording_invalid_state_raises(self) -> None:
"""Test starting recording from invalid state raises."""
meeting = Meeting.create()
meeting.start_recording()
meeting.begin_stopping()
meeting.stop_recording()
with pytest.raises(ValueError, match="Cannot start recording"):
meeting.start_recording()
def test_begin_stopping_from_recording(self) -> None:
"""Test transitioning to STOPPING from RECORDING state."""
meeting = Meeting.create()
meeting.start_recording()
meeting.begin_stopping()
assert meeting.state == MeetingState.STOPPING
def test_begin_stopping_invalid_state_raises(self) -> None:
"""Test begin_stopping from invalid state raises."""
meeting = Meeting.create()
with pytest.raises(ValueError, match="Cannot begin stopping"):
meeting.begin_stopping()
def test_stop_recording_from_stopping(self) -> None:
"""Test stopping recording from STOPPING state."""
meeting = Meeting.create()
meeting.start_recording()
meeting.begin_stopping()
meeting.stop_recording()
assert meeting.state == MeetingState.STOPPED
assert meeting.ended_at is not None
def test_stop_recording_from_recording_raises(self) -> None:
"""Test stopping recording directly from RECORDING raises.
Must go through STOPPING state for graceful shutdown.
"""
meeting = Meeting.create()
meeting.start_recording()
with pytest.raises(ValueError, match="Cannot stop recording"):
meeting.stop_recording()
def test_stop_recording_from_created_raises(self) -> None:
"""Test stopping recording from CREATED state raises."""
meeting = Meeting.create()
with pytest.raises(ValueError, match="Cannot stop recording"):
meeting.stop_recording()
def test_complete_from_stopped(self) -> None:
"""Test completing meeting from STOPPED state."""
meeting = Meeting.create()
meeting.start_recording()
meeting.begin_stopping()
meeting.stop_recording()
meeting.complete()
assert meeting.state == MeetingState.COMPLETED
def test_complete_invalid_state_raises(self) -> None:
"""Test completing from invalid state raises."""
meeting = Meeting.create()
with pytest.raises(ValueError, match="Cannot complete"):
meeting.complete()
def test_mark_error(self) -> None:
"""Test marking meeting as error state."""
meeting = Meeting.create()
meeting.mark_error()
assert meeting.state == MeetingState.ERROR
def test_stopping_to_recording_invalid(self) -> None:
"""Test cannot transition from STOPPING back to RECORDING."""
meeting = Meeting.create()
meeting.start_recording()
meeting.begin_stopping()
with pytest.raises(ValueError, match="Cannot start recording"):
meeting.start_recording()
class TestMeetingSegments:
"""Tests for Meeting segment management."""
def test_add_segment(self) -> None:
"""Test adding a segment to meeting."""
meeting = Meeting.create()
segment = Segment(segment_id=0, text="Hello world", start_time=0.0, end_time=1.0)
meeting.add_segment(segment)
assert meeting.segment_count == 1
assert meeting.segments[0] == segment
def test_next_segment_id_empty(self) -> None:
"""Test next segment ID when no segments exist."""
meeting = Meeting.create()
assert meeting.next_segment_id == 0
def test_next_segment_id_with_segments(self) -> None:
"""Test next segment ID increments correctly."""
meeting = Meeting.create()
meeting.add_segment(Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0))
meeting.add_segment(Segment(segment_id=1, text="Second", start_time=1.0, end_time=2.0))
assert meeting.next_segment_id == 2
def test_next_segment_id_non_contiguous(self) -> None:
"""Test next segment ID uses max + 1 for non-contiguous IDs."""
meeting = Meeting.create()
meeting.add_segment(Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0))
meeting.add_segment(Segment(segment_id=5, text="Sixth", start_time=1.0, end_time=2.0))
assert meeting.next_segment_id == 6
def test_full_transcript(self) -> None:
"""Test concatenating all segment text."""
meeting = Meeting.create()
meeting.add_segment(Segment(segment_id=0, text="Hello", start_time=0.0, end_time=1.0))
meeting.add_segment(Segment(segment_id=1, text="world", start_time=1.0, end_time=2.0))
assert meeting.full_transcript == "Hello world"
def test_full_transcript_empty(self) -> None:
"""Test full_transcript is empty when there are no segments."""
meeting = Meeting.create()
assert meeting.full_transcript == ""
class TestMeetingProperties:
"""Tests for Meeting computed properties."""
def test_duration_seconds_not_started(self) -> None:
"""Test duration is 0 when not started."""
meeting = Meeting.create()
assert meeting.duration_seconds == 0.0
def test_duration_seconds_with_times(self) -> None:
"""Test duration calculation with start and end times."""
meeting = Meeting.create()
meeting.started_at = datetime(2024, 1, 1, 10, 0, 0)
meeting.ended_at = datetime(2024, 1, 1, 10, 30, 0)
assert meeting.duration_seconds == 1800.0
def test_duration_seconds_in_progress(self) -> None:
"""Test duration is > 0 when started but not ended."""
meeting = Meeting.create()
meeting.started_at = datetime.now() - timedelta(seconds=5)
assert meeting.duration_seconds >= 5.0
def test_is_active_created(self) -> None:
"""Test is_active returns True for CREATED state."""
meeting = Meeting.create()
assert meeting.is_active() is True
def test_is_active_recording(self) -> None:
"""Test is_active returns True for RECORDING state."""
meeting = Meeting.create()
meeting.start_recording()
assert meeting.is_active() is True
def test_is_active_stopping(self) -> None:
"""Test is_active returns False for STOPPING state."""
meeting = Meeting.create()
meeting.start_recording()
meeting.begin_stopping()
assert meeting.is_active() is False
def test_is_active_stopped(self) -> None:
"""Test is_active returns False for STOPPED state."""
meeting = Meeting.create()
meeting.start_recording()
meeting.begin_stopping()
meeting.stop_recording()
assert meeting.is_active() is False
def test_has_summary_false(self) -> None:
"""Test has_summary returns False when no summary."""
meeting = Meeting.create()
assert meeting.has_summary() is False
def test_has_summary_true(self) -> None:
"""Test has_summary returns True when summary set."""
meeting = Meeting.create()
summary = Summary(meeting_id=meeting.id)
meeting.set_summary(summary)
assert meeting.has_summary() is True
File: tests/domain/test_triggers.py
"""Tests for trigger domain entities."""
from __future__ import annotations
import pytest
from noteflow.domain.triggers import TriggerAction, TriggerDecision, TriggerSignal, TriggerSource
def test_trigger_signal_weight_bounds() -> None:
"""TriggerSignal enforces weight bounds."""
with pytest.raises(ValueError, match=r"Weight must be 0\.0-1\.0"):
TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=-0.1)
with pytest.raises(ValueError, match=r"Weight must be 0\.0-1\.0"):
TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=1.1)
signal = TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=0.5)
assert signal.weight == 0.5
def test_trigger_decision_primary_signal_and_detected_app() -> None:
"""TriggerDecision exposes primary signal and detected app."""
audio = TriggerSignal(source=TriggerSource.AUDIO_ACTIVITY, weight=0.2)
foreground = TriggerSignal(
source=TriggerSource.FOREGROUND_APP,
weight=0.4,
app_name="Zoom Meeting",
)
decision = TriggerDecision(
action=TriggerAction.NOTIFY,
confidence=0.6,
signals=(audio, foreground),
)
assert decision.primary_signal == foreground
assert decision.detected_app == "Zoom Meeting"
empty = TriggerDecision(action=TriggerAction.IGNORE, confidence=0.0, signals=())
assert empty.primary_signal is None
assert empty.detected_app is None
File: tests/grpc/test_generate_summary.py
"""Tests for GenerateSummary RPC fallback behavior."""
from __future__ import annotations
import pytest
from noteflow.domain.entities import Segment
from noteflow.domain.summarization import ProviderUnavailableError
from noteflow.grpc.proto import noteflow_pb2
from noteflow.grpc.service import NoteFlowServicer
class _DummyContext:
"""Minimal gRPC context that raises if abort is invoked."""
async def abort(self, code, details): # type: ignore[override]
raise AssertionError(f"abort called: {code} - {details}")
@pytest.mark.asyncio
async def test_generate_summary_uses_placeholder_when_service_missing() -> None:
"""Ensure RPC returns a placeholder when no summarization service is configured."""
servicer = NoteFlowServicer()
store = servicer._get_memory_store()
meeting = store.create("Test Meeting")
store.add_segment(
str(meeting.id),
Segment(segment_id=0, text="Hello world", start_time=0.0, end_time=1.0, language="en"),
)
response = await servicer.GenerateSummary(
noteflow_pb2.GenerateSummaryRequest(meeting_id=str(meeting.id)),
_DummyContext(),
)
assert response.executive_summary != ""
assert response.model_version == "placeholder-v0"
retrieved_meeting = store.get(str(meeting.id))
assert retrieved_meeting is not None, "Meeting should exist after creation"
assert retrieved_meeting.summary is not None
class _FailingSummarizationService:
"""Summarization service that always reports provider unavailability."""
async def summarize(self, meeting_id, segments): # type: ignore[override]
raise ProviderUnavailableError("LLM unavailable")
@pytest.mark.asyncio
async def test_generate_summary_falls_back_when_provider_unavailable() -> None:
"""Provider errors should fall back to placeholder instead of failing the RPC."""
servicer = NoteFlowServicer(summarization_service=_FailingSummarizationService())
store = servicer._get_memory_store()
meeting = store.create("Test Meeting")
store.add_segment(
str(meeting.id),
Segment(segment_id=1, text="Action item noted", start_time=0.0, end_time=2.0, language="en"),
)
response = await servicer.GenerateSummary(
noteflow_pb2.GenerateSummaryRequest(meeting_id=str(meeting.id)),
_DummyContext(),
)
assert response.executive_summary != ""
assert response.model_version == "placeholder-v0"
File: tests/infrastructure/asr/test_dto.py
"""Tests for ASR DTO validation and properties."""
from __future__ import annotations
from dataclasses import FrozenInstanceError
import pytest
from noteflow.infrastructure.asr.dto import (
AsrResult,
PartialUpdate,
VadEvent,
VadEventType,
WordTiming,
)
class TestWordTimingDto:
"""Tests for WordTiming DTO."""
def test_word_timing_valid(self) -> None:
word = WordTiming(word="hello", start=0.0, end=0.5, probability=0.75)
assert word.word == "hello"
assert word.start == 0.0
assert word.end == 0.5
assert word.probability == 0.75
def test_word_timing_invalid_times_raises(self) -> None:
with pytest.raises(ValueError, match=r"Word end .* < start"):
WordTiming(word="bad", start=1.0, end=0.5, probability=0.5)
@pytest.mark.parametrize("prob", [-0.1, 1.1])
def test_word_timing_invalid_probability_raises(self, prob: float) -> None:
with pytest.raises(ValueError, match=r"Probability must be 0\.0-1\.0"):
WordTiming(word="bad", start=0.0, end=0.1, probability=prob)
def test_word_timing_frozen(self) -> None:
word = WordTiming(word="hello", start=0.0, end=0.5, probability=0.9)
with pytest.raises(FrozenInstanceError):
word.word = "mutate" # type: ignore[misc]
class TestAsrResultDto:
"""Tests for AsrResult DTO."""
def test_asr_result_duration(self) -> None:
result = AsrResult(text="hello", start=1.0, end=3.5)
assert result.duration == 2.5
def test_asr_result_invalid_times_raises(self) -> None:
with pytest.raises(ValueError, match=r"Segment end .* < start"):
AsrResult(text="bad", start=2.0, end=1.0)
class TestPartialUpdateDto:
"""Tests for PartialUpdate DTO."""
def test_partial_update_invalid_times_raises(self) -> None:
with pytest.raises(ValueError, match=r"Partial end .* < start"):
PartialUpdate(text="partial", start=2.0, end=1.0)
class TestVadEventDto:
"""Tests for VadEvent DTO."""
def test_vad_event_invalid_timestamp_raises(self) -> None:
with pytest.raises(ValueError, match="Timestamp must be non-negative"):
VadEvent(event_type=VadEventType.SPEECH_START, timestamp=-1.0)
@pytest.mark.parametrize("confidence", [-0.1, 1.1])
def test_vad_event_invalid_confidence_raises(self, confidence: float) -> None:
with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"):
VadEvent(event_type=VadEventType.SPEECH_END, timestamp=0.5, confidence=confidence)
File: tests/infrastructure/asr/test_segmenter.py
"""Tests for Segmenter state machine."""
from __future__ import annotations
import numpy as np
import pytest
from noteflow.infrastructure.asr.segmenter import (
AudioSegment,
Segmenter,
SegmenterConfig,
SegmenterState,
)
class TestSegmenterInitialization:
"""Tests for Segmenter initialization."""
def test_default_config(self) -> None:
"""Segmenter uses default config when not provided."""
segmenter = Segmenter()
assert segmenter.config.sample_rate == 16000
assert segmenter.config.min_speech_duration == 0.3
def test_custom_config(self) -> None:
"""Segmenter accepts custom configuration."""
config = SegmenterConfig(sample_rate=44100, max_segment_duration=60.0)
segmenter = Segmenter(config=config)
assert segmenter.config.sample_rate == 44100
assert segmenter.config.max_segment_duration == 60.0
def test_initial_state_is_idle(self) -> None:
"""Segmenter starts in IDLE state."""
segmenter = Segmenter()
assert segmenter.state == SegmenterState.IDLE
class TestSegmenterStateTransitions:
"""Tests for Segmenter state machine transitions."""
@pytest.fixture
def segmenter(self) -> Segmenter:
"""Create segmenter with test-friendly config."""
return Segmenter(
config=SegmenterConfig(
sample_rate=16000,
trailing_silence=0.1,
leading_buffer=0.1,
min_speech_duration=0.1,
)
)
@staticmethod
def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray:
"""Create test audio of specified duration."""
return np.zeros(int(duration * sample_rate), dtype=np.float32)
def test_idle_to_speech_on_voice_detected(self, segmenter: Segmenter) -> None:
"""Transition from IDLE to SPEECH when voice detected."""
audio = self.make_audio(0.1)
list(segmenter.process_audio(audio, is_speech=True))
assert segmenter.state == SegmenterState.SPEECH
def test_idle_stays_idle_on_silence(self, segmenter: Segmenter) -> None:
"""Stay in IDLE state when no speech detected."""
audio = self.make_audio(0.1)
list(segmenter.process_audio(audio, is_speech=False))
assert segmenter.state == SegmenterState.IDLE
def test_speech_to_trailing_on_silence(self, segmenter: Segmenter) -> None:
"""Transition from SPEECH to TRAILING when speech ends."""
speech_audio = self.make_audio(0.1)
short_silence = self.make_audio(0.05) # Less than trailing_silence threshold
list(segmenter.process_audio(speech_audio, is_speech=True))
list(segmenter.process_audio(short_silence, is_speech=False))
assert segmenter.state == SegmenterState.TRAILING
def test_trailing_to_idle_after_silence_threshold(self, segmenter: Segmenter) -> None:
"""Transition from TRAILING to IDLE after enough silence."""
audio = self.make_audio(0.1)
list(segmenter.process_audio(audio, is_speech=True))
list(segmenter.process_audio(audio, is_speech=False))
list(segmenter.process_audio(audio, is_speech=False))
assert segmenter.state == SegmenterState.IDLE
def test_trailing_to_speech_if_voice_resumes(self, segmenter: Segmenter) -> None:
"""Transition from TRAILING back to SPEECH if voice resumes."""
audio = self.make_audio(0.05)
list(segmenter.process_audio(audio, is_speech=True))
list(segmenter.process_audio(audio, is_speech=False))
assert segmenter.state == SegmenterState.TRAILING
list(segmenter.process_audio(audio, is_speech=True))
assert segmenter.state == SegmenterState.SPEECH
class TestSegmenterEmission:
"""Tests for segment emission behavior."""
@pytest.fixture
def segmenter(self) -> Segmenter:
"""Create segmenter with test-friendly config."""
return Segmenter(
config=SegmenterConfig(
sample_rate=16000,
trailing_silence=0.1,
leading_buffer=0.1,
min_speech_duration=0.0,
)
)
@staticmethod
def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray:
"""Create test audio of specified duration."""
return np.ones(int(duration * sample_rate), dtype=np.float32)
def test_emits_segment_after_trailing_silence(self, segmenter: Segmenter) -> None:
"""Emit segment when trailing silence threshold is reached."""
audio = self.make_audio(0.2)
segments_speech = list(segmenter.process_audio(audio, is_speech=True))
segments_silence = list(segmenter.process_audio(audio, is_speech=False))
assert not segments_speech
assert len(segments_silence) == 1
assert isinstance(segments_silence[0], AudioSegment)
def test_emitted_segment_has_correct_timing(self, segmenter: Segmenter) -> None:
"""Emitted segment has correct start and end times."""
audio = self.make_audio(0.2)
list(segmenter.process_audio(audio, is_speech=True))
segments = list(segmenter.process_audio(audio, is_speech=False))
segment = segments[0]
assert segment.start_time >= 0.0
assert segment.end_time > segment.start_time
assert segment.duration > 0
def test_emitted_segment_contains_audio(self, segmenter: Segmenter) -> None:
"""Emitted segment contains concatenated audio."""
audio = self.make_audio(0.2)
list(segmenter.process_audio(audio, is_speech=True))
segments = list(segmenter.process_audio(audio, is_speech=False))
assert len(segments[0].audio) > 0
def test_emits_on_max_duration(self) -> None:
"""Force emit segment when max duration is reached."""
segmenter = Segmenter(
config=SegmenterConfig(
sample_rate=16000,
max_segment_duration=0.3,
)
)
audio = self.make_audio(0.2)
segments_1 = list(segmenter.process_audio(audio, is_speech=True))
segments_2 = list(segmenter.process_audio(audio, is_speech=True))
assert not segments_1
assert len(segments_2) == 1
def test_min_speech_duration_filters_short_segments(self) -> None:
"""Segments shorter than min_speech_duration should be ignored."""
segmenter = Segmenter(
config=SegmenterConfig(
sample_rate=16000,
min_speech_duration=0.5,
trailing_silence=0.1,
)
)
short_speech = self.make_audio(0.1)
silence = self.make_audio(0.1)
list(segmenter.process_audio(short_speech, is_speech=True))
emitted = list(segmenter.process_audio(silence, is_speech=False))
assert not emitted
class TestSegmenterFlush:
"""Tests for flush behavior."""
@pytest.fixture
def segmenter(self) -> Segmenter:
"""Create segmenter with test-friendly config."""
return Segmenter(
config=SegmenterConfig(
sample_rate=16000,
trailing_silence=0.5,
min_speech_duration=0.0,
)
)
@staticmethod
def make_audio(duration: float, sample_rate: int = 16000) -> np.ndarray:
"""Create test audio of specified duration."""
return np.ones(int(duration * sample_rate), dtype=np.float32)
def test_flush_returns_none_when_idle(self, segmenter: Segmenter) -> None:
"""Flush returns None when no pending audio."""
result = segmenter.flush()
assert result is None
def test_flush_returns_segment_when_in_speech(self, segmenter: Segmenter) -> None:
"""Flush returns pending segment when in SPEECH state."""
audio = self.make_audio(0.2)
list(segmenter.process_audio(audio, is_speech=True))
result = segmenter.flush()
assert result is not None
assert isinstance(result, AudioSegment)
def test_flush_returns_segment_when_in_trailing(self, segmenter: Segmenter) -> None:
"""Flush returns pending segment when in TRAILING state."""
audio = self.make_audio(0.1)
list(segmenter.process_audio(audio, is_speech=True))
list(segmenter.process_audio(audio, is_speech=False))
assert segmenter.state == SegmenterState.TRAILING
result = segmenter.flush()
assert result is not None
assert isinstance(result, AudioSegment)
def test_flush_resets_to_idle(self, segmenter: Segmenter) -> None:
"""Flush resets state to IDLE."""
audio = self.make_audio(0.2)
list(segmenter.process_audio(audio, is_speech=True))
segmenter.flush()
assert segmenter.state == SegmenterState.IDLE
class TestSegmenterReset:
"""Tests for reset behavior."""
def test_reset_clears_state(self) -> None:
"""Reset returns segmenter to initial state."""
segmenter = Segmenter()
audio = np.ones(1600, dtype=np.float32)
list(segmenter.process_audio(audio, is_speech=True))
assert segmenter.state == SegmenterState.SPEECH
segmenter.reset()
assert segmenter.state == SegmenterState.IDLE
class TestAudioSegmentDataclass:
"""Tests for AudioSegment dataclass."""
def test_duration_property(self) -> None:
"""Duration property calculates correctly."""
segment = AudioSegment(
audio=np.zeros(1600, dtype=np.float32),
start_time=1.0,
end_time=2.5,
)
assert segment.duration == 1.5
File: tests/infrastructure/asr/test_streaming_vad.py
"""Tests for StreamingVad and EnergyVad."""
from __future__ import annotations
import numpy as np
from noteflow.infrastructure.asr.streaming_vad import (
EnergyVad,
EnergyVadConfig,
StreamingVad,
)
class TestEnergyVadBasics:
"""Basic tests for EnergyVad."""
def test_default_config(self) -> None:
"""EnergyVad uses default config when not provided."""
vad = EnergyVad()
assert vad.config.speech_threshold == 0.01
assert vad.config.silence_threshold == 0.005
def test_custom_config(self) -> None:
"""EnergyVad accepts custom configuration."""
config = EnergyVadConfig(speech_threshold=0.02, min_speech_frames=5)
vad = EnergyVad(config=config)
assert vad.config.speech_threshold == 0.02
assert vad.config.min_speech_frames == 5
def test_initial_state_is_silence(self) -> None:
"""EnergyVad starts in silence state."""
vad = EnergyVad()
assert vad._is_speech is False
class TestEnergyVadDetection:
"""Tests for EnergyVad speech detection."""
def test_detects_silence_for_zeros(self) -> None:
"""Silent audio detected as non-speech."""
vad = EnergyVad()
audio = np.zeros(1600, dtype=np.float32)
result = vad.process(audio)
assert result is False
def test_detects_speech_for_high_energy(self) -> None:
"""High energy audio eventually detected as speech."""
vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=2))
# Audio with energy above threshold
audio = np.ones(1600, dtype=np.float32) * 0.1
vad.process(audio)
result = vad.process(audio)
assert result is True
def test_speech_requires_consecutive_frames(self) -> None:
"""Speech detection requires min_speech_frames consecutive frames."""
vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=3))
audio = np.ones(1600, dtype=np.float32) * 0.1
assert vad.process(audio) is False
assert vad.process(audio) is False
assert vad.process(audio) is True
def test_silence_after_speech_requires_frames(self) -> None:
"""Transition to silence requires min_silence_frames."""
config = EnergyVadConfig(min_speech_frames=1, min_silence_frames=2)
vad = EnergyVad(config=config)
speech = np.ones(1600, dtype=np.float32) * 0.1
silence = np.zeros(1600, dtype=np.float32)
vad.process(speech)
assert vad._is_speech is True
vad.process(silence)
assert vad._is_speech is True
vad.process(silence)
assert vad._is_speech is False
def test_hysteresis_prevents_chatter(self) -> None:
"""Hysteresis prevents rapid speech/silence toggling."""
config = EnergyVadConfig(
speech_threshold=0.01,
silence_threshold=0.005,
min_speech_frames=1,
min_silence_frames=1,
)
vad = EnergyVad(config=config)
# Just above speech threshold -> speech
high = np.ones(1600, dtype=np.float32) * 0.015
vad.process(high)
assert vad._is_speech is True
# Between thresholds (below speech, above silence) -> stays speech
mid = np.ones(1600, dtype=np.float32) * 0.007
vad.process(mid)
assert vad._is_speech is True
# Below silence threshold -> silence
low = np.ones(1600, dtype=np.float32) * 0.003
vad.process(low)
assert vad._is_speech is False
class TestEnergyVadReset:
"""Tests for EnergyVad reset behavior."""
def test_reset_clears_state(self) -> None:
"""Reset returns VAD to initial state."""
vad = EnergyVad(config=EnergyVadConfig(min_speech_frames=1))
audio = np.ones(1600, dtype=np.float32) * 0.1
vad.process(audio)
vad.reset()
assert vad._is_speech is False
assert vad._speech_frame_count == 0
assert vad._silence_frame_count == 0
class TestStreamingVad:
"""Tests for StreamingVad wrapper."""
def test_default_engine_is_energy_vad(self) -> None:
"""StreamingVad uses EnergyVad by default."""
vad = StreamingVad()
assert isinstance(vad.engine, EnergyVad)
def test_process_chunk_delegates_to_engine(self) -> None:
"""process_chunk delegates to underlying engine."""
vad = StreamingVad()
silence = np.zeros(1600, dtype=np.float32)
result = vad.process_chunk(silence)
assert result is False
def test_reset_delegates_to_engine(self) -> None:
"""reset delegates to underlying engine."""
vad = StreamingVad()
speech = np.ones(1600, dtype=np.float32) * 0.1
vad.process_chunk(speech)
vad.process_chunk(speech)
vad.reset()
assert vad.engine._is_speech is False
File: tests/infrastructure/audio/test_levels.py
"""Tests for RmsLevelProvider and compute_rms."""
from __future__ import annotations
import math
from typing import TYPE_CHECKING
import numpy as np
import pytest
from noteflow.infrastructure.audio import RmsLevelProvider, compute_rms
if TYPE_CHECKING:
from numpy.typing import NDArray
class TestComputeRms:
"""Tests for compute_rms function."""
def test_empty_array_returns_zero(self) -> None:
"""RMS of empty array is zero."""
frames = np.array([], dtype=np.float32)
assert compute_rms(frames) == 0.0
def test_zeros_returns_zero(self) -> None:
"""RMS of zeros is zero."""
frames = np.zeros(100, dtype=np.float32)
assert compute_rms(frames) == 0.0
def test_ones_returns_one(self) -> None:
"""RMS of all ones is one."""
frames = np.ones(100, dtype=np.float32)
assert compute_rms(frames) == 1.0
def test_half_amplitude_returns_half(self) -> None:
"""RMS of constant 0.5 is 0.5."""
frames = np.full(100, 0.5, dtype=np.float32)
assert compute_rms(frames) == 0.5
def test_sine_wave_returns_sqrt_half(self) -> None:
"""RMS of sine wave is approximately 1/sqrt(2)."""
t = np.linspace(0, 2 * np.pi, 1000, dtype=np.float32)
frames = np.sin(t).astype(np.float32)
result = compute_rms(frames)
assert 0.7 < result < 0.72 # ~0.707
class TestRmsLevelProvider:
"""Tests for RmsLevelProvider class."""
@pytest.fixture
def provider(self) -> RmsLevelProvider:
"""Create RmsLevelProvider instance."""
return RmsLevelProvider()
def test_get_rms_empty_array_returns_zero(self, provider: RmsLevelProvider) -> None:
"""Test RMS of empty array is zero."""
frames = np.array([], dtype=np.float32)
assert provider.get_rms(frames) == 0.0
def test_get_rms_silence_returns_zero(
self, provider: RmsLevelProvider, silence_audio: NDArray[np.float32]
) -> None:
"""Test RMS of silence is zero."""
assert provider.get_rms(silence_audio) == 0.0
def test_get_rms_full_scale_returns_one(
self, provider: RmsLevelProvider, full_scale_audio: NDArray[np.float32]
) -> None:
"""Test RMS of full scale signal is one."""
assert provider.get_rms(full_scale_audio) == 1.0
def test_get_rms_half_scale_returns_half(
self, provider: RmsLevelProvider, half_scale_audio: NDArray[np.float32]
) -> None:
"""Test RMS of half scale signal is 0.5."""
assert provider.get_rms(half_scale_audio) == 0.5
def test_get_rms_normalized_range(self, provider: RmsLevelProvider) -> None:
"""Test RMS is always in 0.0-1.0 range."""
# Test with values > 1.0 (should clamp)
frames = np.full(100, 2.0, dtype=np.float32)
rms = provider.get_rms(frames)
assert 0.0 <= rms <= 1.0
def test_get_db_silence_returns_min_db(
self, provider: RmsLevelProvider, silence_audio: NDArray[np.float32]
) -> None:
"""Test dB of silence returns MIN_DB."""
assert provider.get_db(silence_audio) == provider.MIN_DB
def test_get_db_full_scale_returns_zero(
self, provider: RmsLevelProvider, full_scale_audio: NDArray[np.float32]
) -> None:
"""Test dB of full scale signal is 0 dB."""
assert provider.get_db(full_scale_audio) == 0.0
def test_get_db_half_scale_is_negative_six(
self, provider: RmsLevelProvider, half_scale_audio: NDArray[np.float32]
) -> None:
"""Test dB of half scale is approximately -6 dB."""
db = provider.get_db(half_scale_audio)
# -6.02 dB for half amplitude
assert -7.0 < db < -5.0
def test_rms_to_db_zero_returns_min_db(self, provider: RmsLevelProvider) -> None:
"""Test rms_to_db(0) returns MIN_DB."""
assert provider.rms_to_db(0.0) == provider.MIN_DB
def test_rms_to_db_one_returns_zero(self, provider: RmsLevelProvider) -> None:
"""Test rms_to_db(1.0) returns 0 dB."""
assert provider.rms_to_db(1.0) == 0.0
def test_db_to_rms_min_db_returns_zero(self, provider: RmsLevelProvider) -> None:
"""Test db_to_rms(MIN_DB) returns 0."""
assert provider.db_to_rms(provider.MIN_DB) == 0.0
def test_db_to_rms_zero_returns_one(self, provider: RmsLevelProvider) -> None:
"""Test db_to_rms(0) returns 1.0."""
assert provider.db_to_rms(0.0) == 1.0
@pytest.mark.parametrize("rms", [0.1, 0.25, 0.5, 0.75, 1.0])
def test_rms_db_roundtrip(self, provider: RmsLevelProvider, rms: float) -> None:
"""Test RMS -> dB -> RMS roundtrip preserves value."""
db = provider.rms_to_db(rms)
recovered = provider.db_to_rms(db)
assert math.isclose(recovered, rms, rel_tol=1e-9)
File: tests/infrastructure/security/test_keystore.py
"""Tests for KeyringKeyStore and InMemoryKeyStore."""
from __future__ import annotations
import types
from typing import Any
import pytest
from noteflow.infrastructure.security import keystore
def _install_fake_keyring(monkeypatch: pytest.MonkeyPatch) -> dict[tuple[str, str], str]:
"""Install a fake keyring backend backed by a dictionary."""
storage: dict[tuple[str, str], str] = {}
class DummyErrors:
class KeyringError(Exception): ...
class PasswordDeleteError(KeyringError): ...
def get_password(service: str, key: str) -> str | None:
return storage.get((service, key))
def set_password(service: str, key: str, value: str) -> None:
storage[(service, key)] = value
def delete_password(service: str, key: str) -> None:
storage.pop((service, key), None)
monkeypatch.setattr(
keystore,
"keyring",
types.SimpleNamespace(
get_password=get_password,
set_password=set_password,
delete_password=delete_password,
errors=DummyErrors,
),
)
return storage
def test_get_or_create_master_key_creates_and_reuses(monkeypatch: pytest.MonkeyPatch) -> None:
"""Master key should be created once and then reused."""
storage = _install_fake_keyring(monkeypatch)
ks = keystore.KeyringKeyStore(service_name="svc", key_name="key")
first = ks.get_or_create_master_key()
second = ks.get_or_create_master_key()
assert len(first) == keystore.KEY_SIZE
assert first == second
assert ("svc", "key") in storage
def test_get_or_create_master_key_wraps_keyring_errors(monkeypatch: pytest.MonkeyPatch) -> None:
"""Keyring errors should surface as RuntimeError."""
class DummyErrors:
class KeyringError(Exception): ...
def raise_error(*_: Any, **__: Any) -> None:
raise DummyErrors.KeyringError("unavailable")
monkeypatch.setattr(
keystore,
"keyring",
types.SimpleNamespace(
get_password=raise_error,
set_password=raise_error,
errors=DummyErrors,
delete_password=raise_error,
),
)
ks = keystore.KeyringKeyStore()
with pytest.raises(RuntimeError, match="Keyring unavailable"):
ks.get_or_create_master_key()
def test_delete_master_key_handles_missing(monkeypatch: pytest.MonkeyPatch) -> None:
"""delete_master_key should swallow missing-key errors."""
storage = _install_fake_keyring(monkeypatch)
class DummyErrors:
class KeyringError(Exception): ...
class PasswordDeleteError(KeyringError): ...
# Reinstall with errors that raise on delete to exercise branch
def delete_password(*_: Any, **__: Any) -> None:
raise DummyErrors.PasswordDeleteError("not found")
monkeypatch.setattr(
keystore,
"keyring",
types.SimpleNamespace(
get_password=lambda s, k: storage.get((s, k)),
set_password=lambda s, k, v: storage.setdefault((s, k), v),
delete_password=delete_password,
errors=DummyErrors,
),
)
ks = keystore.KeyringKeyStore()
# Should not raise even when delete_password errors
ks.delete_master_key()
def test_has_master_key_false_on_errors(monkeypatch: pytest.MonkeyPatch) -> None:
"""has_master_key should return False when keyring raises."""
class DummyErrors:
class KeyringError(Exception): ...
def raise_error(*_: Any, **__: Any) -> None:
raise DummyErrors.KeyringError("oops")
monkeypatch.setattr(
keystore,
"keyring",
types.SimpleNamespace(
get_password=raise_error,
errors=DummyErrors,
delete_password=lambda *a, **k: None,
set_password=lambda *a, **k: None,
),
)
ks = keystore.KeyringKeyStore()
assert ks.has_master_key() is False
File: tests/integration/test_trigger_settings.py
"""Integration tests for trigger and retention settings loading."""
from __future__ import annotations
import pytest
from noteflow.config.settings import Settings, get_settings, get_trigger_settings
pytestmark = pytest.mark.integration
@pytest.fixture(autouse=True)
def _clear_settings_cache() -> None:
get_trigger_settings.cache_clear()
get_settings.cache_clear()
def test_trigger_settings_env_parsing(monkeypatch: pytest.MonkeyPatch) -> None:
"""TriggerSettings should parse CSV lists from environment variables."""
monkeypatch.setenv("NOTEFLOW_TRIGGER_MEETING_APPS", "zoom, teams")
monkeypatch.setenv("NOTEFLOW_TRIGGER_SUPPRESSED_APPS", "spotify")
monkeypatch.setenv("NOTEFLOW_TRIGGER_AUDIO_MIN_SAMPLES", "5")
monkeypatch.setenv("NOTEFLOW_TRIGGER_POLL_INTERVAL_SECONDS", "1.5")
settings = get_trigger_settings()
assert settings.trigger_meeting_apps == ["zoom", "teams"]
assert settings.trigger_suppressed_apps == ["spotify"]
assert settings.trigger_audio_min_samples == 5
assert settings.trigger_poll_interval_seconds == pytest.approx(1.5)
class TestRetentionSettings:
"""Tests for retention settings."""
def test_retention_defaults(self) -> None:
"""Retention settings should have correct defaults."""
# Access via class to check field defaults without loading from env
assert Settings.model_fields["retention_enabled"].default is False
assert Settings.model_fields["retention_days"].default == 90
assert Settings.model_fields["retention_check_interval_hours"].default == 24
def test_retention_env_parsing(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""Retention settings should parse from environment variables."""
monkeypatch.setenv("NOTEFLOW_DATABASE_URL", "postgresql+asyncpg://user:pass@localhost/db")
monkeypatch.setenv("NOTEFLOW_RETENTION_ENABLED", "true")
monkeypatch.setenv("NOTEFLOW_RETENTION_DAYS", "30")
monkeypatch.setenv("NOTEFLOW_RETENTION_CHECK_INTERVAL_HOURS", "12")
settings = get_settings()
assert settings.retention_enabled is True
assert settings.retention_days == 30
assert settings.retention_check_interval_hours == 12
def test_retention_days_validation(self) -> None:
"""Retention days should be validated within range."""
from pydantic import ValidationError
# ge=1, le=3650
with pytest.raises(ValidationError):
Settings.model_validate(
{"database_url": "postgresql+asyncpg://x:x@x/x", "retention_days": 0}
)
with pytest.raises(ValidationError):
Settings.model_validate(
{"database_url": "postgresql+asyncpg://x:x@x/x", "retention_days": 4000}
)
def test_retention_check_interval_validation(self) -> None:
"""Retention check interval should be validated within range."""
from pydantic import ValidationError
# ge=1, le=168
with pytest.raises(ValidationError):
Settings.model_validate(
{
"database_url": "postgresql+asyncpg://x:x@x/x",
"retention_check_interval_hours": 0,
}
)
with pytest.raises(ValidationError):
Settings.model_validate(
{
"database_url": "postgresql+asyncpg://x:x@x/x",
"retention_check_interval_hours": 200,
}
)
File: src/noteflow/application/services/init.py
"""Application services for NoteFlow use cases."""
from noteflow.application.services.export_service import ExportFormat, ExportService
from noteflow.application.services.meeting_service import MeetingService
from noteflow.application.services.recovery_service import RecoveryService
from noteflow.application.services.retention_service import RetentionReport, RetentionService
from noteflow.application.services.summarization_service import (
SummarizationMode,
SummarizationService,
SummarizationServiceResult,
SummarizationServiceSettings,
)
from noteflow.application.services.trigger_service import TriggerService, TriggerServiceSettings
__all__ = [
"ExportFormat",
"ExportService",
"MeetingService",
"RecoveryService",
"RetentionReport",
"RetentionService",
"SummarizationMode",
"SummarizationService",
"SummarizationServiceResult",
"SummarizationServiceSettings",
"TriggerService",
"TriggerServiceSettings",
]
File: src/noteflow/client/state.py
"""Centralized application state for NoteFlow client.
Composes existing types from grpc.client and infrastructure.audio.
Does not recreate any dataclasses - imports and uses existing ones.
"""
from __future__ import annotations
import logging
from collections.abc import Callable
from dataclasses import dataclass, field
import flet as ft
# REUSE existing types - do not recreate
from noteflow.domain.entities import Summary
from noteflow.domain.triggers import TriggerDecision
from noteflow.grpc.client import AnnotationInfo, MeetingInfo, ServerInfo, TranscriptSegment
from noteflow.infrastructure.audio import (
RmsLevelProvider,
SoundDevicePlayback,
TimestampedAudio,
)
logger = logging.getLogger(__name__)
# Callback type aliases (follow NoteFlowClient pattern from grpc/client.py)
OnTranscriptCallback = Callable[[TranscriptSegment], None]
OnConnectionCallback = Callable[[bool, str], None]
@dataclass
class AppState:
"""Centralized application state for NoteFlow client.
Composes existing types from grpc.client and infrastructure.audio.
All state is centralized here for component access.
"""
# Connection state
server_address: str = "localhost:50051"
connected: bool = False
server_info: ServerInfo | None = None # REUSE existing type
# Recording state
recording: bool = False
current_meeting: MeetingInfo | None = None # REUSE existing type
recording_start_time: float | None = None
elapsed_seconds: int = 0
# Audio state (REUSE existing RmsLevelProvider)
level_provider: RmsLevelProvider = field(default_factory=RmsLevelProvider)
current_db_level: float = -60.0
# Transcript state (REUSE existing TranscriptSegment)
transcript_segments: list[TranscriptSegment] = field(default_factory=list)
current_partial_text: str = "" # Live partial transcript (not yet final)
# Playback state (REUSE existing SoundDevicePlayback)
playback: SoundDevicePlayback = field(default_factory=SoundDevicePlayback)
playback_position: float = 0.0
session_audio_buffer: list[TimestampedAudio] = field(default_factory=list)
# Transcript sync state
highlighted_segment_index: int | None = None
# Annotations state (REUSE existing AnnotationInfo)
annotations: list[AnnotationInfo] = field(default_factory=list)
# Meeting library state (REUSE existing MeetingInfo)
meetings: list[MeetingInfo] = field(default_factory=list)
selected_meeting: MeetingInfo | None = None
# Trigger state (REUSE existing TriggerDecision)
trigger_enabled: bool = True
trigger_pending: bool = False # True when prompt is shown
trigger_decision: TriggerDecision | None = None # Last trigger decision
# Summary state (REUSE existing Summary entity)
current_summary: Summary | None = None
summary_loading: bool = False
summary_error: str | None = None
# UI page reference (private)
_page: ft.Page | None = field(default=None, repr=False)
def set_page(self, page: ft.Page) -> None:
"""Set page reference for thread-safe updates.
Args:
page: Flet page instance.
"""
self._page = page
def request_update(self) -> None:
"""Request UI update from any thread.
Safe to call from background threads.
"""
if self._page:
self._page.update()
def run_on_ui_thread(self, callback: Callable[[], None]) -> None:
"""Schedule callback on the UI event loop safely.
Follows NoteFlowClient callback pattern with error handling.
Args:
callback: Function to execute on the UI event loop.
"""
if not self._page:
return
try:
if hasattr(self._page, "run_task"):
async def _run() -> None:
callback()
self._page.run_task(_run)
else:
self._page.run_thread(callback)
except Exception as e:
logger.error("UI thread callback error: %s", e)
def clear_transcript(self) -> None:
"""Clear all transcript segments and partial text."""
self.transcript_segments.clear()
self.current_partial_text = ""
def reset_recording_state(self) -> None:
"""Reset recording-related state."""
self.recording = False
self.current_meeting = None
self.recording_start_time = None
self.elapsed_seconds = 0
def clear_session_audio(self) -> None:
"""Clear session audio buffer and reset playback state."""
self.session_audio_buffer.clear()
self.playback_position = 0.0
def find_segment_at_position(self, position: float) -> int | None:
"""Find segment index containing the given position using binary search.
Args:
position: Time in seconds.
Returns:
Index of segment containing position, or None if not found.
"""
segments = self.transcript_segments
if not segments:
return None
left, right = 0, len(segments) - 1
while left <= right:
mid = (left + right) // 2
segment = segments[mid]
if segment.start_time <= position <= segment.end_time:
return mid
if position < segment.start_time:
right = mid - 1
else:
left = mid + 1
return None
File: src/noteflow/grpc/proto/noteflow_pb2_grpc.py
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import warnings
import noteflow_pb2 as noteflow__pb2
GRPC_VERSION = grpc.__version__
_version_not_supported = False
GRPC_GENERATED_VERSION = '1.76.0'
try:
from grpc._utilities import first_version_is_lower
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
except ImportError:
_version_not_supported = True
if _version_not_supported:
raise RuntimeError(
f'The grpc package installed is at version {GRPC_VERSION}, but the generated code in noteflow_pb2_grpc.py depends on'
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
)
class NoteFlowServiceStub(object):
"""=============================================================================
Core Service
=============================================================================
"""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.StreamTranscription = channel.stream_stream(
'/noteflow.NoteFlowService/StreamTranscription',
request_serializer=noteflow__pb2.AudioChunk.SerializeToString,
response_deserializer=noteflow__pb2.TranscriptUpdate.FromString,
_registered_method=True)
self.CreateMeeting = channel.unary_unary(
'/noteflow.NoteFlowService/CreateMeeting',
request_serializer=noteflow__pb2.CreateMeetingRequest.SerializeToString,
response_deserializer=noteflow__pb2.Meeting.FromString,
_registered_method=True)
self.StopMeeting = channel.unary_unary(
'/noteflow.NoteFlowService/StopMeeting',
request_serializer=noteflow__pb2.StopMeetingRequest.SerializeToString,
response_deserializer=noteflow__pb2.Meeting.FromString,
_registered_method=True)
self.ListMeetings = channel.unary_unary(
'/noteflow.NoteFlowService/ListMeetings',
request_serializer=noteflow__pb2.ListMeetingsRequest.SerializeToString,
response_deserializer=noteflow__pb2.ListMeetingsResponse.FromString,
_registered_method=True)
self.GetMeeting = channel.unary_unary(
'/noteflow.NoteFlowService/GetMeeting',
request_serializer=noteflow__pb2.GetMeetingRequest.SerializeToString,
response_deserializer=noteflow__pb2.Meeting.FromString,
_registered_method=True)
self.DeleteMeeting = channel.unary_unary(
'/noteflow.NoteFlowService/DeleteMeeting',
request_serializer=noteflow__pb2.DeleteMeetingRequest.SerializeToString,
response_deserializer=noteflow__pb2.DeleteMeetingResponse.FromString,
_registered_method=True)
self.GenerateSummary = channel.unary_unary(
'/noteflow.NoteFlowService/GenerateSummary',
request_serializer=noteflow__pb2.GenerateSummaryRequest.SerializeToString,
response_deserializer=noteflow__pb2.Summary.FromString,
_registered_method=True)
self.AddAnnotation = channel.unary_unary(
'/noteflow.NoteFlowService/AddAnnotation',
request_serializer=noteflow__pb2.AddAnnotationRequest.SerializeToString,
response_deserializer=noteflow__pb2.Annotation.FromString,
_registered_method=True)
self.GetAnnotation = channel.unary_unary(
'/noteflow.NoteFlowService/GetAnnotation',
request_serializer=noteflow__pb2.GetAnnotationRequest.SerializeToString,
response_deserializer=noteflow__pb2.Annotation.FromString,
_registered_method=True)
self.ListAnnotations = channel.unary_unary(
'/noteflow.NoteFlowService/ListAnnotations',
request_serializer=noteflow__pb2.ListAnnotationsRequest.SerializeToString,
response_deserializer=noteflow__pb2.ListAnnotationsResponse.FromString,
_registered_method=True)
self.UpdateAnnotation = channel.unary_unary(
'/noteflow.NoteFlowService/UpdateAnnotation',
request_serializer=noteflow__pb2.UpdateAnnotationRequest.SerializeToString,
response_deserializer=noteflow__pb2.Annotation.FromString,
_registered_method=True)
self.DeleteAnnotation = channel.unary_unary(
'/noteflow.NoteFlowService/DeleteAnnotation',
request_serializer=noteflow__pb2.DeleteAnnotationRequest.SerializeToString,
response_deserializer=noteflow__pb2.DeleteAnnotationResponse.FromString,
_registered_method=True)
self.ExportTranscript = channel.unary_unary(
'/noteflow.NoteFlowService/ExportTranscript',
request_serializer=noteflow__pb2.ExportTranscriptRequest.SerializeToString,
response_deserializer=noteflow__pb2.ExportTranscriptResponse.FromString,
_registered_method=True)
self.RefineSpeakerDiarization = channel.unary_unary(
'/noteflow.NoteFlowService/RefineSpeakerDiarization',
request_serializer=noteflow__pb2.RefineSpeakerDiarizationRequest.SerializeToString,
response_deserializer=noteflow__pb2.RefineSpeakerDiarizationResponse.FromString,
_registered_method=True)
self.RenameSpeaker = channel.unary_unary(
'/noteflow.NoteFlowService/RenameSpeaker',
request_serializer=noteflow__pb2.RenameSpeakerRequest.SerializeToString,
response_deserializer=noteflow__pb2.RenameSpeakerResponse.FromString,
_registered_method=True)
self.GetDiarizationJobStatus = channel.unary_unary(
'/noteflow.NoteFlowService/GetDiarizationJobStatus',
request_serializer=noteflow__pb2.GetDiarizationJobStatusRequest.SerializeToString,
response_deserializer=noteflow__pb2.DiarizationJobStatus.FromString,
_registered_method=True)
self.GetServerInfo = channel.unary_unary(
'/noteflow.NoteFlowService/GetServerInfo',
request_serializer=noteflow__pb2.ServerInfoRequest.SerializeToString,
response_deserializer=noteflow__pb2.ServerInfo.FromString,
_registered_method=True)
class NoteFlowServiceServicer(object):
"""=============================================================================
Core Service
=============================================================================
"""
def StreamTranscription(self, request_iterator, context):
"""Bidirectional streaming: client sends audio chunks, server returns transcripts
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def CreateMeeting(self, request, context):
"""Meeting lifecycle management
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def StopMeeting(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def ListMeetings(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GetMeeting(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def DeleteMeeting(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GenerateSummary(self, request, context):
"""Summary generation
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def AddAnnotation(self, request, context):
"""Annotation management
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GetAnnotation(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def ListAnnotations(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def UpdateAnnotation(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def DeleteAnnotation(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def ExportTranscript(self, request, context):
"""Export functionality
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def RefineSpeakerDiarization(self, request, context):
"""Speaker diarization
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def RenameSpeaker(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GetDiarizationJobStatus(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GetServerInfo(self, request, context):
"""Server health and capabilities
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_NoteFlowServiceServicer_to_server(servicer, server):
rpc_method_handlers = {
'StreamTranscription': grpc.stream_stream_rpc_method_handler(
servicer.StreamTranscription,
request_deserializer=noteflow__pb2.AudioChunk.FromString,
response_serializer=noteflow__pb2.TranscriptUpdate.SerializeToString,
),
'CreateMeeting': grpc.unary_unary_rpc_method_handler(
servicer.CreateMeeting,
request_deserializer=noteflow__pb2.CreateMeetingRequest.FromString,
response_serializer=noteflow__pb2.Meeting.SerializeToString,
),
'StopMeeting': grpc.unary_unary_rpc_method_handler(
servicer.StopMeeting,
request_deserializer=noteflow__pb2.StopMeetingRequest.FromString,
response_serializer=noteflow__pb2.Meeting.SerializeToString,
),
'ListMeetings': grpc.unary_unary_rpc_method_handler(
servicer.ListMeetings,
request_deserializer=noteflow__pb2.ListMeetingsRequest.FromString,
response_serializer=noteflow__pb2.ListMeetingsResponse.SerializeToString,
),
'GetMeeting': grpc.unary_unary_rpc_method_handler(
servicer.GetMeeting,
request_deserializer=noteflow__pb2.GetMeetingRequest.FromString,
response_serializer=noteflow__pb2.Meeting.SerializeToString,
),
'DeleteMeeting': grpc.unary_unary_rpc_method_handler(
servicer.DeleteMeeting,
request_deserializer=noteflow__pb2.DeleteMeetingRequest.FromString,
response_serializer=noteflow__pb2.DeleteMeetingResponse.SerializeToString,
),
'GenerateSummary': grpc.unary_unary_rpc_method_handler(
servicer.GenerateSummary,
request_deserializer=noteflow__pb2.GenerateSummaryRequest.FromString,
response_serializer=noteflow__pb2.Summary.SerializeToString,
),
'AddAnnotation': grpc.unary_unary_rpc_method_handler(
servicer.AddAnnotation,
request_deserializer=noteflow__pb2.AddAnnotationRequest.FromString,
response_serializer=noteflow__pb2.Annotation.SerializeToString,
),
'GetAnnotation': grpc.unary_unary_rpc_method_handler(
servicer.GetAnnotation,
request_deserializer=noteflow__pb2.GetAnnotationRequest.FromString,
response_serializer=noteflow__pb2.Annotation.SerializeToString,
),
'ListAnnotations': grpc.unary_unary_rpc_method_handler(
servicer.ListAnnotations,
request_deserializer=noteflow__pb2.ListAnnotationsRequest.FromString,
response_serializer=noteflow__pb2.ListAnnotationsResponse.SerializeToString,
),
'UpdateAnnotation': grpc.unary_unary_rpc_method_handler(
servicer.UpdateAnnotation,
request_deserializer=noteflow__pb2.UpdateAnnotationRequest.FromString,
response_serializer=noteflow__pb2.Annotation.SerializeToString,
),
'DeleteAnnotation': grpc.unary_unary_rpc_method_handler(
servicer.DeleteAnnotation,
request_deserializer=noteflow__pb2.DeleteAnnotationRequest.FromString,
response_serializer=noteflow__pb2.DeleteAnnotationResponse.SerializeToString,
),
'ExportTranscript': grpc.unary_unary_rpc_method_handler(
servicer.ExportTranscript,
request_deserializer=noteflow__pb2.ExportTranscriptRequest.FromString,
response_serializer=noteflow__pb2.ExportTranscriptResponse.SerializeToString,
),
'RefineSpeakerDiarization': grpc.unary_unary_rpc_method_handler(
servicer.RefineSpeakerDiarization,
request_deserializer=noteflow__pb2.RefineSpeakerDiarizationRequest.FromString,
response_serializer=noteflow__pb2.RefineSpeakerDiarizationResponse.SerializeToString,
),
'RenameSpeaker': grpc.unary_unary_rpc_method_handler(
servicer.RenameSpeaker,
request_deserializer=noteflow__pb2.RenameSpeakerRequest.FromString,
response_serializer=noteflow__pb2.RenameSpeakerResponse.SerializeToString,
),
'GetDiarizationJobStatus': grpc.unary_unary_rpc_method_handler(
servicer.GetDiarizationJobStatus,
request_deserializer=noteflow__pb2.GetDiarizationJobStatusRequest.FromString,
response_serializer=noteflow__pb2.DiarizationJobStatus.SerializeToString,
),
'GetServerInfo': grpc.unary_unary_rpc_method_handler(
servicer.GetServerInfo,
request_deserializer=noteflow__pb2.ServerInfoRequest.FromString,
response_serializer=noteflow__pb2.ServerInfo.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'noteflow.NoteFlowService', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
server.add_registered_method_handlers('noteflow.NoteFlowService', rpc_method_handlers)
# This class is part of an EXPERIMENTAL API.
class NoteFlowService(object):
"""=============================================================================
Core Service
=============================================================================
"""
@staticmethod
def StreamTranscription(request_iterator,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.stream_stream(
request_iterator,
target,
'/noteflow.NoteFlowService/StreamTranscription',
noteflow__pb2.AudioChunk.SerializeToString,
noteflow__pb2.TranscriptUpdate.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def CreateMeeting(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/CreateMeeting',
noteflow__pb2.CreateMeetingRequest.SerializeToString,
noteflow__pb2.Meeting.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def StopMeeting(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/StopMeeting',
noteflow__pb2.StopMeetingRequest.SerializeToString,
noteflow__pb2.Meeting.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def ListMeetings(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/ListMeetings',
noteflow__pb2.ListMeetingsRequest.SerializeToString,
noteflow__pb2.ListMeetingsResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def GetMeeting(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/GetMeeting',
noteflow__pb2.GetMeetingRequest.SerializeToString,
noteflow__pb2.Meeting.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def DeleteMeeting(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/DeleteMeeting',
noteflow__pb2.DeleteMeetingRequest.SerializeToString,
noteflow__pb2.DeleteMeetingResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def GenerateSummary(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/GenerateSummary',
noteflow__pb2.GenerateSummaryRequest.SerializeToString,
noteflow__pb2.Summary.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def AddAnnotation(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/AddAnnotation',
noteflow__pb2.AddAnnotationRequest.SerializeToString,
noteflow__pb2.Annotation.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def GetAnnotation(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/GetAnnotation',
noteflow__pb2.GetAnnotationRequest.SerializeToString,
noteflow__pb2.Annotation.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def ListAnnotations(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/ListAnnotations',
noteflow__pb2.ListAnnotationsRequest.SerializeToString,
noteflow__pb2.ListAnnotationsResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def UpdateAnnotation(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/UpdateAnnotation',
noteflow__pb2.UpdateAnnotationRequest.SerializeToString,
noteflow__pb2.Annotation.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def DeleteAnnotation(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/DeleteAnnotation',
noteflow__pb2.DeleteAnnotationRequest.SerializeToString,
noteflow__pb2.DeleteAnnotationResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def ExportTranscript(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/ExportTranscript',
noteflow__pb2.ExportTranscriptRequest.SerializeToString,
noteflow__pb2.ExportTranscriptResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def RefineSpeakerDiarization(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/RefineSpeakerDiarization',
noteflow__pb2.RefineSpeakerDiarizationRequest.SerializeToString,
noteflow__pb2.RefineSpeakerDiarizationResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def RenameSpeaker(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/RenameSpeaker',
noteflow__pb2.RenameSpeakerRequest.SerializeToString,
noteflow__pb2.RenameSpeakerResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def GetDiarizationJobStatus(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/GetDiarizationJobStatus',
noteflow__pb2.GetDiarizationJobStatusRequest.SerializeToString,
noteflow__pb2.DiarizationJobStatus.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def GetServerInfo(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/noteflow.NoteFlowService/GetServerInfo',
noteflow__pb2.ServerInfoRequest.SerializeToString,
noteflow__pb2.ServerInfo.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
File: src/noteflow/grpc/proto/noteflow_pb2.py
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# NO CHECKED-IN PROTOBUF GENCODE
# source: noteflow.proto
# Protobuf Python Version: 6.31.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import runtime_version as _runtime_version
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
_runtime_version.ValidateProtobufRuntimeVersion(
_runtime_version.Domain.PUBLIC,
6,
31,
1,
'',
'noteflow.proto'
)
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0enoteflow.proto\x12\x08noteflow\"n\n\nAudioChunk\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\naudio_data\x18\x02 \x01(\x0c\x12\x11\n\ttimestamp\x18\x03 \x01(\x01\x12\x13\n\x0bsample_rate\x18\x04 \x01(\x05\x12\x10\n\x08\x63hannels\x18\x05 \x01(\x05\"\xaa\x01\n\x10TranscriptUpdate\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12)\n\x0bupdate_type\x18\x02 \x01(\x0e\x32\x14.noteflow.UpdateType\x12\x14\n\x0cpartial_text\x18\x03 \x01(\t\x12\'\n\x07segment\x18\x04 \x01(\x0b\x32\x16.noteflow.FinalSegment\x12\x18\n\x10server_timestamp\x18\x05 \x01(\x01\"\x87\x02\n\x0c\x46inalSegment\x12\x12\n\nsegment_id\x18\x01 \x01(\x05\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\x12#\n\x05words\x18\x05 \x03(\x0b\x32\x14.noteflow.WordTiming\x12\x10\n\x08language\x18\x06 \x01(\t\x12\x1b\n\x13language_confidence\x18\x07 \x01(\x02\x12\x13\n\x0b\x61vg_logprob\x18\x08 \x01(\x02\x12\x16\n\x0eno_speech_prob\x18\t \x01(\x02\x12\x12\n\nspeaker_id\x18\n \x01(\t\x12\x1a\n\x12speaker_confidence\x18\x0b \x01(\x02\"U\n\nWordTiming\x12\x0c\n\x04word\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\x12\x13\n\x0bprobability\x18\x04 \x01(\x02\"\xd1\x02\n\x07Meeting\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12%\n\x05state\x18\x03 \x01(\x0e\x32\x16.noteflow.MeetingState\x12\x12\n\ncreated_at\x18\x04 \x01(\x01\x12\x12\n\nstarted_at\x18\x05 \x01(\x01\x12\x10\n\x08\x65nded_at\x18\x06 \x01(\x01\x12\x18\n\x10\x64uration_seconds\x18\x07 \x01(\x01\x12(\n\x08segments\x18\x08 \x03(\x0b\x32\x16.noteflow.FinalSegment\x12\"\n\x07summary\x18\t \x01(\x0b\x32\x11.noteflow.Summary\x12\x31\n\x08metadata\x18\n \x03(\x0b\x32\x1f.noteflow.Meeting.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x96\x01\n\x14\x43reateMeetingRequest\x12\r\n\x05title\x18\x01 \x01(\t\x12>\n\x08metadata\x18\x02 \x03(\x0b\x32,.noteflow.CreateMeetingRequest.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"(\n\x12StopMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"\x85\x01\n\x13ListMeetingsRequest\x12&\n\x06states\x18\x01 \x03(\x0e\x32\x16.noteflow.MeetingState\x12\r\n\x05limit\x18\x02 \x01(\x05\x12\x0e\n\x06offset\x18\x03 \x01(\x05\x12\'\n\nsort_order\x18\x04 \x01(\x0e\x32\x13.noteflow.SortOrder\"P\n\x14ListMeetingsResponse\x12#\n\x08meetings\x18\x01 \x03(\x0b\x32\x11.noteflow.Meeting\x12\x13\n\x0btotal_count\x18\x02 \x01(\x05\"Z\n\x11GetMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10include_segments\x18\x02 \x01(\x08\x12\x17\n\x0finclude_summary\x18\x03 \x01(\x08\"*\n\x14\x44\x65leteMeetingRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\"(\n\x15\x44\x65leteMeetingResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\xb9\x01\n\x07Summary\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x19\n\x11\x65xecutive_summary\x18\x02 \x01(\t\x12&\n\nkey_points\x18\x03 \x03(\x0b\x32\x12.noteflow.KeyPoint\x12*\n\x0c\x61\x63tion_items\x18\x04 \x03(\x0b\x32\x14.noteflow.ActionItem\x12\x14\n\x0cgenerated_at\x18\x05 \x01(\x01\x12\x15\n\rmodel_version\x18\x06 \x01(\t\"S\n\x08KeyPoint\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x13\n\x0bsegment_ids\x18\x02 \x03(\x05\x12\x12\n\nstart_time\x18\x03 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x04 \x01(\x01\"y\n\nActionItem\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x10\n\x08\x61ssignee\x18\x02 \x01(\t\x12\x10\n\x08\x64ue_date\x18\x03 \x01(\x01\x12$\n\x08priority\x18\x04 \x01(\x0e\x32\x12.noteflow.Priority\x12\x13\n\x0bsegment_ids\x18\x05 \x03(\x05\"F\n\x16GenerateSummaryRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x18\n\x10\x66orce_regenerate\x18\x02 \x01(\x08\"\x13\n\x11ServerInfoRequest\"\xe4\x01\n\nServerInfo\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\x11\n\tasr_model\x18\x02 \x01(\t\x12\x11\n\tasr_ready\x18\x03 \x01(\x08\x12\x1e\n\x16supported_sample_rates\x18\x04 \x03(\x05\x12\x16\n\x0emax_chunk_size\x18\x05 \x01(\x05\x12\x16\n\x0euptime_seconds\x18\x06 \x01(\x01\x12\x17\n\x0f\x61\x63tive_meetings\x18\x07 \x01(\x05\x12\x1b\n\x13\x64iarization_enabled\x18\x08 \x01(\x08\x12\x19\n\x11\x64iarization_ready\x18\t \x01(\x08\"\xbc\x01\n\nAnnotation\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\nmeeting_id\x18\x02 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x03 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x12\n\nstart_time\x18\x05 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x06 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x07 \x03(\x05\x12\x12\n\ncreated_at\x18\x08 \x01(\x01\"\xa6\x01\n\x14\x41\x64\x64\x41nnotationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"-\n\x14GetAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"R\n\x16ListAnnotationsRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x12\n\nstart_time\x18\x02 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x03 \x01(\x01\"D\n\x17ListAnnotationsResponse\x12)\n\x0b\x61nnotations\x18\x01 \x03(\x0b\x32\x14.noteflow.Annotation\"\xac\x01\n\x17UpdateAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\x12\x31\n\x0f\x61nnotation_type\x18\x02 \x01(\x0e\x32\x18.noteflow.AnnotationType\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x12\n\nstart_time\x18\x04 \x01(\x01\x12\x10\n\x08\x65nd_time\x18\x05 \x01(\x01\x12\x13\n\x0bsegment_ids\x18\x06 \x03(\x05\"0\n\x17\x44\x65leteAnnotationRequest\x12\x15\n\rannotation_id\x18\x01 \x01(\t\"+\n\x18\x44\x65leteAnnotationResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"U\n\x17\x45xportTranscriptRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12&\n\x06\x66ormat\x18\x02 \x01(\x0e\x32\x16.noteflow.ExportFormat\"X\n\x18\x45xportTranscriptResponse\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x13\n\x0b\x66ormat_name\x18\x02 \x01(\t\x12\x16\n\x0e\x66ile_extension\x18\x03 \x01(\t\"K\n\x1fRefineSpeakerDiarizationRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x14\n\x0cnum_speakers\x18\x02 \x01(\x05\"\x9d\x01\n RefineSpeakerDiarizationResponse\x12\x18\n\x10segments_updated\x18\x01 \x01(\x05\x12\x13\n\x0bspeaker_ids\x18\x02 \x03(\t\x12\x15\n\rerror_message\x18\x03 \x01(\t\x12\x0e\n\x06job_id\x18\x04 \x01(\t\x12#\n\x06status\x18\x05 \x01(\x0e\x32\x13.noteflow.JobStatus\"\\\n\x14RenameSpeakerRequest\x12\x12\n\nmeeting_id\x18\x01 \x01(\t\x12\x16\n\x0eold_speaker_id\x18\x02 \x01(\t\x12\x18\n\x10new_speaker_name\x18\x03 \x01(\t\"B\n\x15RenameSpeakerResponse\x12\x18\n\x10segments_updated\x18\x01 \x01(\x05\x12\x0f\n\x07success\x18\x02 \x01(\x08\"0\n\x1eGetDiarizationJobStatusRequest\x12\x0e\n\x06job_id\x18\x01 \x01(\t\"\x91\x01\n\x14\x44iarizationJobStatus\x12\x0e\n\x06job_id\x18\x01 \x01(\t\x12#\n\x06status\x18\x02 \x01(\x0e\x32\x13.noteflow.JobStatus\x12\x18\n\x10segments_updated\x18\x03 \x01(\x05\x12\x13\n\x0bspeaker_ids\x18\x04 \x03(\t\x12\x15\n\rerror_message\x18\x05 \x01(\t*\x8d\x01\n\nUpdateType\x12\x1b\n\x17UPDATE_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13UPDATE_TYPE_PARTIAL\x10\x01\x12\x15\n\x11UPDATE_TYPE_FINAL\x10\x02\x12\x19\n\x15UPDATE_TYPE_VAD_START\x10\x03\x12\x17\n\x13UPDATE_TYPE_VAD_END\x10\x04*\xb6\x01\n\x0cMeetingState\x12\x1d\n\x19MEETING_STATE_UNSPECIFIED\x10\x00\x12\x19\n\x15MEETING_STATE_CREATED\x10\x01\x12\x1b\n\x17MEETING_STATE_RECORDING\x10\x02\x12\x19\n\x15MEETING_STATE_STOPPED\x10\x03\x12\x1b\n\x17MEETING_STATE_COMPLETED\x10\x04\x12\x17\n\x13MEETING_STATE_ERROR\x10\x05*`\n\tSortOrder\x12\x1a\n\x16SORT_ORDER_UNSPECIFIED\x10\x00\x12\x1b\n\x17SORT_ORDER_CREATED_DESC\x10\x01\x12\x1a\n\x16SORT_ORDER_CREATED_ASC\x10\x02*^\n\x08Priority\x12\x18\n\x14PRIORITY_UNSPECIFIED\x10\x00\x12\x10\n\x0cPRIORITY_LOW\x10\x01\x12\x13\n\x0fPRIORITY_MEDIUM\x10\x02\x12\x11\n\rPRIORITY_HIGH\x10\x03*\xa4\x01\n\x0e\x41nnotationType\x12\x1f\n\x1b\x41NNOTATION_TYPE_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x41NNOTATION_TYPE_ACTION_ITEM\x10\x01\x12\x1c\n\x18\x41NNOTATION_TYPE_DECISION\x10\x02\x12\x18\n\x14\x41NNOTATION_TYPE_NOTE\x10\x03\x12\x18\n\x14\x41NNOTATION_TYPE_RISK\x10\x04*a\n\x0c\x45xportFormat\x12\x1d\n\x19\x45XPORT_FORMAT_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x45XPORT_FORMAT_MARKDOWN\x10\x01\x12\x16\n\x12\x45XPORT_FORMAT_HTML\x10\x02*\x87\x01\n\tJobStatus\x12\x1a\n\x16JOB_STATUS_UNSPECIFIED\x10\x00\x12\x15\n\x11JOB_STATUS_QUEUED\x10\x01\x12\x16\n\x12JOB_STATUS_RUNNING\x10\x02\x12\x18\n\x14JOB_STATUS_COMPLETED\x10\x03\x12\x15\n\x11JOB_STATUS_FAILED\x10\x04\x32\xe0\n\n\x0fNoteFlowService\x12K\n\x13StreamTranscription\x12\x14.noteflow.AudioChunk\x1a\x1a.noteflow.TranscriptUpdate(\x01\x30\x01\x12\x42\n\rCreateMeeting\x12\x1e.noteflow.CreateMeetingRequest\x1a\x11.noteflow.Meeting\x12>\n\x0bStopMeeting\x12\x1c.noteflow.StopMeetingRequest\x1a\x11.noteflow.Meeting\x12M\n\x0cListMeetings\x12\x1d.noteflow.ListMeetingsRequest\x1a\x1e.noteflow.ListMeetingsResponse\x12<\n\nGetMeeting\x12\x1b.noteflow.GetMeetingRequest\x1a\x11.noteflow.Meeting\x12P\n\rDeleteMeeting\x12\x1e.noteflow.DeleteMeetingRequest\x1a\x1f.noteflow.DeleteMeetingResponse\x12\x46\n\x0fGenerateSummary\x12 .noteflow.GenerateSummaryRequest\x1a\x11.noteflow.Summary\x12\x45\n\rAddAnnotation\x12\x1e.noteflow.AddAnnotationRequest\x1a\x14.noteflow.Annotation\x12\x45\n\rGetAnnotation\x12\x1e.noteflow.GetAnnotationRequest\x1a\x14.noteflow.Annotation\x12V\n\x0fListAnnotations\x12 .noteflow.ListAnnotationsRequest\x1a!.noteflow.ListAnnotationsResponse\x12K\n\x10UpdateAnnotation\x12!.noteflow.UpdateAnnotationRequest\x1a\x14.noteflow.Annotation\x12Y\n\x10\x44\x65leteAnnotation\x12!.noteflow.DeleteAnnotationRequest\x1a\".noteflow.DeleteAnnotationResponse\x12Y\n\x10\x45xportTranscript\x12!.noteflow.ExportTranscriptRequest\x1a\".noteflow.ExportTranscriptResponse\x12q\n\x18RefineSpeakerDiarization\x12).noteflow.RefineSpeakerDiarizationRequest\x1a*.noteflow.RefineSpeakerDiarizationResponse\x12P\n\rRenameSpeaker\x12\x1e.noteflow.RenameSpeakerRequest\x1a\x1f.noteflow.RenameSpeakerResponse\x12\x63\n\x17GetDiarizationJobStatus\x12(.noteflow.GetDiarizationJobStatusRequest\x1a\x1e.noteflow.DiarizationJobStatus\x12\x42\n\rGetServerInfo\x12\x1b.noteflow.ServerInfoRequest\x1a\x14.noteflow.ServerInfob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'noteflow_pb2', _globals)
if not _descriptor._USE_C_DESCRIPTORS:
DESCRIPTOR._loaded_options = None
_globals['_MEETING_METADATAENTRY']._loaded_options = None
_globals['_MEETING_METADATAENTRY']._serialized_options = b'8\001'
_globals['_CREATEMEETINGREQUEST_METADATAENTRY']._loaded_options = None
_globals['_CREATEMEETINGREQUEST_METADATAENTRY']._serialized_options = b'8\001'
_globals['_UPDATETYPE']._serialized_start=3923
_globals['_UPDATETYPE']._serialized_end=4064
_globals['_MEETINGSTATE']._serialized_start=4067
_globals['_MEETINGSTATE']._serialized_end=4249
_globals['_SORTORDER']._serialized_start=4251
_globals['_SORTORDER']._serialized_end=4347
_globals['_PRIORITY']._serialized_start=4349
_globals['_PRIORITY']._serialized_end=4443
_globals['_ANNOTATIONTYPE']._serialized_start=4446
_globals['_ANNOTATIONTYPE']._serialized_end=4610
_globals['_EXPORTFORMAT']._serialized_start=4612
_globals['_EXPORTFORMAT']._serialized_end=4709
_globals['_JOBSTATUS']._serialized_start=4712
_globals['_JOBSTATUS']._serialized_end=4847
_globals['_AUDIOCHUNK']._serialized_start=28
_globals['_AUDIOCHUNK']._serialized_end=138
_globals['_TRANSCRIPTUPDATE']._serialized_start=141
_globals['_TRANSCRIPTUPDATE']._serialized_end=311
_globals['_FINALSEGMENT']._serialized_start=314
_globals['_FINALSEGMENT']._serialized_end=577
_globals['_WORDTIMING']._serialized_start=579
_globals['_WORDTIMING']._serialized_end=664
_globals['_MEETING']._serialized_start=667
_globals['_MEETING']._serialized_end=1004
_globals['_MEETING_METADATAENTRY']._serialized_start=957
_globals['_MEETING_METADATAENTRY']._serialized_end=1004
_globals['_CREATEMEETINGREQUEST']._serialized_start=1007
_globals['_CREATEMEETINGREQUEST']._serialized_end=1157
_globals['_CREATEMEETINGREQUEST_METADATAENTRY']._serialized_start=957
_globals['_CREATEMEETINGREQUEST_METADATAENTRY']._serialized_end=1004
_globals['_STOPMEETINGREQUEST']._serialized_start=1159
_globals['_STOPMEETINGREQUEST']._serialized_end=1199
_globals['_LISTMEETINGSREQUEST']._serialized_start=1202
_globals['_LISTMEETINGSREQUEST']._serialized_end=1335
_globals['_LISTMEETINGSRESPONSE']._serialized_start=1337
_globals['_LISTMEETINGSRESPONSE']._serialized_end=1417
_globals['_GETMEETINGREQUEST']._serialized_start=1419
_globals['_GETMEETINGREQUEST']._serialized_end=1509
_globals['_DELETEMEETINGREQUEST']._serialized_start=1511
_globals['_DELETEMEETINGREQUEST']._serialized_end=1553
_globals['_DELETEMEETINGRESPONSE']._serialized_start=1555
_globals['_DELETEMEETINGRESPONSE']._serialized_end=1595
_globals['_SUMMARY']._serialized_start=1598
_globals['_SUMMARY']._serialized_end=1783
_globals['_KEYPOINT']._serialized_start=1785
_globals['_KEYPOINT']._serialized_end=1868
_globals['_ACTIONITEM']._serialized_start=1870
_globals['_ACTIONITEM']._serialized_end=1991
_globals['_GENERATESUMMARYREQUEST']._serialized_start=1993
_globals['_GENERATESUMMARYREQUEST']._serialized_end=2063
_globals['_SERVERINFOREQUEST']._serialized_start=2065
_globals['_SERVERINFOREQUEST']._serialized_end=2084
_globals['_SERVERINFO']._serialized_start=2087
_globals['_SERVERINFO']._serialized_end=2315
_globals['_ANNOTATION']._serialized_start=2318
_globals['_ANNOTATION']._serialized_end=2506
_globals['_ADDANNOTATIONREQUEST']._serialized_start=2509
_globals['_ADDANNOTATIONREQUEST']._serialized_end=2675
_globals['_GETANNOTATIONREQUEST']._serialized_start=2677
_globals['_GETANNOTATIONREQUEST']._serialized_end=2722
_globals['_LISTANNOTATIONSREQUEST']._serialized_start=2724
_globals['_LISTANNOTATIONSREQUEST']._serialized_end=2806
_globals['_LISTANNOTATIONSRESPONSE']._serialized_start=2808
_globals['_LISTANNOTATIONSRESPONSE']._serialized_end=2876
_globals['_UPDATEANNOTATIONREQUEST']._serialized_start=2879
_globals['_UPDATEANNOTATIONREQUEST']._serialized_end=3051
_globals['_DELETEANNOTATIONREQUEST']._serialized_start=3053
_globals['_DELETEANNOTATIONREQUEST']._serialized_end=3101
_globals['_DELETEANNOTATIONRESPONSE']._serialized_start=3103
_globals['_DELETEANNOTATIONRESPONSE']._serialized_end=3146
_globals['_EXPORTTRANSCRIPTREQUEST']._serialized_start=3148
_globals['_EXPORTTRANSCRIPTREQUEST']._serialized_end=3233
_globals['_EXPORTTRANSCRIPTRESPONSE']._serialized_start=3235
_globals['_EXPORTTRANSCRIPTRESPONSE']._serialized_end=3323
_globals['_REFINESPEAKERDIARIZATIONREQUEST']._serialized_start=3325
_globals['_REFINESPEAKERDIARIZATIONREQUEST']._serialized_end=3400
_globals['_REFINESPEAKERDIARIZATIONRESPONSE']._serialized_start=3403
_globals['_REFINESPEAKERDIARIZATIONRESPONSE']._serialized_end=3560
_globals['_RENAMESPEAKERREQUEST']._serialized_start=3562
_globals['_RENAMESPEAKERREQUEST']._serialized_end=3654
_globals['_RENAMESPEAKERRESPONSE']._serialized_start=3656
_globals['_RENAMESPEAKERRESPONSE']._serialized_end=3722
_globals['_GETDIARIZATIONJOBSTATUSREQUEST']._serialized_start=3724
_globals['_GETDIARIZATIONJOBSTATUSREQUEST']._serialized_end=3772
_globals['_DIARIZATIONJOBSTATUS']._serialized_start=3775
_globals['_DIARIZATIONJOBSTATUS']._serialized_end=3920
_globals['_NOTEFLOWSERVICE']._serialized_start=4850
_globals['_NOTEFLOWSERVICE']._serialized_end=6226
# @@protoc_insertion_point(module_scope)
File: src/noteflow/grpc/proto/noteflow.proto
// NoteFlow gRPC Service Definition
// Provides real-time ASR streaming and meeting management
syntax = "proto3";
package noteflow;
// =============================================================================
// Core Service
// =============================================================================
service NoteFlowService {
// Bidirectional streaming: client sends audio chunks, server returns transcripts
rpc StreamTranscription(stream AudioChunk) returns (stream TranscriptUpdate);
// Meeting lifecycle management
rpc CreateMeeting(CreateMeetingRequest) returns (Meeting);
rpc StopMeeting(StopMeetingRequest) returns (Meeting);
rpc ListMeetings(ListMeetingsRequest) returns (ListMeetingsResponse);
rpc GetMeeting(GetMeetingRequest) returns (Meeting);
rpc DeleteMeeting(DeleteMeetingRequest) returns (DeleteMeetingResponse);
// Summary generation
rpc GenerateSummary(GenerateSummaryRequest) returns (Summary);
// Annotation management
rpc AddAnnotation(AddAnnotationRequest) returns (Annotation);
rpc GetAnnotation(GetAnnotationRequest) returns (Annotation);
rpc ListAnnotations(ListAnnotationsRequest) returns (ListAnnotationsResponse);
rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation);
rpc DeleteAnnotation(DeleteAnnotationRequest) returns (DeleteAnnotationResponse);
// Export functionality
rpc ExportTranscript(ExportTranscriptRequest) returns (ExportTranscriptResponse);
// Speaker diarization
rpc RefineSpeakerDiarization(RefineSpeakerDiarizationRequest) returns (RefineSpeakerDiarizationResponse);
rpc RenameSpeaker(RenameSpeakerRequest) returns (RenameSpeakerResponse);
rpc GetDiarizationJobStatus(GetDiarizationJobStatusRequest) returns (DiarizationJobStatus);
// Server health and capabilities
rpc GetServerInfo(ServerInfoRequest) returns (ServerInfo);
}
// =============================================================================
// Audio Streaming Messages
// =============================================================================
message AudioChunk {
// Meeting ID this audio belongs to
string meeting_id = 1;
// Raw audio data (float32, mono, 16kHz expected)
bytes audio_data = 2;
// Timestamp when audio was captured (monotonic, seconds)
double timestamp = 3;
// Sample rate in Hz (default 16000)
int32 sample_rate = 4;
// Number of channels (default 1 for mono)
int32 channels = 5;
}
message TranscriptUpdate {
// Meeting ID this transcript belongs to
string meeting_id = 1;
// Type of update
UpdateType update_type = 2;
// For partial updates - tentative transcript text
string partial_text = 3;
// For final segments - confirmed transcript
FinalSegment segment = 4;
// Server-side processing timestamp
double server_timestamp = 5;
}
enum UpdateType {
UPDATE_TYPE_UNSPECIFIED = 0;
UPDATE_TYPE_PARTIAL = 1; // Tentative, may change
UPDATE_TYPE_FINAL = 2; // Confirmed segment
UPDATE_TYPE_VAD_START = 3; // Voice activity started
UPDATE_TYPE_VAD_END = 4; // Voice activity ended
}
message FinalSegment {
// Segment ID (sequential within meeting)
int32 segment_id = 1;
// Transcript text
string text = 2;
// Start time relative to meeting start (seconds)
double start_time = 3;
// End time relative to meeting start (seconds)
double end_time = 4;
// Word-level timestamps
repeated WordTiming words = 5;
// Detected language
string language = 6;
// Language detection confidence (0.0-1.0)
float language_confidence = 7;
// Average log probability (quality indicator)
float avg_logprob = 8;
// Probability that segment contains no speech
float no_speech_prob = 9;
// Speaker identification (from diarization)
string speaker_id = 10;
// Speaker assignment confidence (0.0-1.0)
float speaker_confidence = 11;
}
message WordTiming {
string word = 1;
double start_time = 2;
double end_time = 3;
float probability = 4;
}
// =============================================================================
// Meeting Management Messages
// =============================================================================
message Meeting {
// Unique meeting identifier
string id = 1;
// User-provided title
string title = 2;
// Meeting state
MeetingState state = 3;
// Creation timestamp (Unix epoch seconds)
double created_at = 4;
// Start timestamp (when recording began)
double started_at = 5;
// End timestamp (when recording stopped)
double ended_at = 6;
// Duration in seconds
double duration_seconds = 7;
// Full transcript segments
repeated FinalSegment segments = 8;
// Generated summary (if available)
Summary summary = 9;
// Metadata
map<string, string> metadata = 10;
}
enum MeetingState {
MEETING_STATE_UNSPECIFIED = 0;
MEETING_STATE_CREATED = 1; // Created but not started
MEETING_STATE_RECORDING = 2; // Actively recording
MEETING_STATE_STOPPED = 3; // Recording stopped, processing may continue
MEETING_STATE_COMPLETED = 4; // All processing complete
MEETING_STATE_ERROR = 5; // Error occurred
}
message CreateMeetingRequest {
// Optional title (generated if not provided)
string title = 1;
// Optional metadata
map<string, string> metadata = 2;
}
message StopMeetingRequest {
string meeting_id = 1;
}
message ListMeetingsRequest {
// Optional filter by state
repeated MeetingState states = 1;
// Pagination
int32 limit = 2;
int32 offset = 3;
// Sort order
SortOrder sort_order = 4;
}
enum SortOrder {
SORT_ORDER_UNSPECIFIED = 0;
SORT_ORDER_CREATED_DESC = 1; // Newest first (default)
SORT_ORDER_CREATED_ASC = 2; // Oldest first
}
message ListMeetingsResponse {
repeated Meeting meetings = 1;
int32 total_count = 2;
}
message GetMeetingRequest {
string meeting_id = 1;
// Whether to include full transcript segments
bool include_segments = 2;
// Whether to include summary
bool include_summary = 3;
}
message DeleteMeetingRequest {
string meeting_id = 1;
}
message DeleteMeetingResponse {
bool success = 1;
}
// =============================================================================
// Summary Messages
// =============================================================================
message Summary {
// Meeting this summary belongs to
string meeting_id = 1;
// Executive summary (2-3 sentences)
string executive_summary = 2;
// Key points / highlights
repeated KeyPoint key_points = 3;
// Action items extracted
repeated ActionItem action_items = 4;
// Generated timestamp
double generated_at = 5;
// Model/version used for generation
string model_version = 6;
}
message KeyPoint {
// The key point text
string text = 1;
// Segment IDs that support this point (evidence linking)
repeated int32 segment_ids = 2;
// Timestamp range this point covers
double start_time = 3;
double end_time = 4;
}
message ActionItem {
// Action item text
string text = 1;
// Assigned to (if mentioned)
string assignee = 2;
// Due date (if mentioned, Unix epoch)
double due_date = 3;
// Priority level
Priority priority = 4;
// Segment IDs that mention this action
repeated int32 segment_ids = 5;
}
enum Priority {
PRIORITY_UNSPECIFIED = 0;
PRIORITY_LOW = 1;
PRIORITY_MEDIUM = 2;
PRIORITY_HIGH = 3;
}
message GenerateSummaryRequest {
string meeting_id = 1;
// Force regeneration even if summary exists
bool force_regenerate = 2;
}
// =============================================================================
// Server Info Messages
// =============================================================================
message ServerInfoRequest {}
message ServerInfo {
// Server version
string version = 1;
// ASR model loaded
string asr_model = 2;
// Whether ASR is ready
bool asr_ready = 3;
// Supported sample rates
repeated int32 supported_sample_rates = 4;
// Maximum audio chunk size in bytes
int32 max_chunk_size = 5;
// Server uptime in seconds
double uptime_seconds = 6;
// Number of active meetings
int32 active_meetings = 7;
// Whether diarization is enabled
bool diarization_enabled = 8;
// Whether diarization models are ready
bool diarization_ready = 9;
}
// =============================================================================
// Annotation Messages
// =============================================================================
enum AnnotationType {
ANNOTATION_TYPE_UNSPECIFIED = 0;
ANNOTATION_TYPE_ACTION_ITEM = 1;
ANNOTATION_TYPE_DECISION = 2;
ANNOTATION_TYPE_NOTE = 3;
ANNOTATION_TYPE_RISK = 4;
}
message Annotation {
// Unique annotation identifier
string id = 1;
// Meeting this annotation belongs to
string meeting_id = 2;
// Type of annotation
AnnotationType annotation_type = 3;
// Annotation text
string text = 4;
// Start time relative to meeting start (seconds)
double start_time = 5;
// End time relative to meeting start (seconds)
double end_time = 6;
// Linked segment IDs (evidence linking)
repeated int32 segment_ids = 7;
// Creation timestamp (Unix epoch seconds)
double created_at = 8;
}
message AddAnnotationRequest {
// Meeting ID to add annotation to
string meeting_id = 1;
// Type of annotation
AnnotationType annotation_type = 2;
// Annotation text
string text = 3;
// Start time relative to meeting start (seconds)
double start_time = 4;
// End time relative to meeting start (seconds)
double end_time = 5;
// Optional linked segment IDs
repeated int32 segment_ids = 6;
}
message GetAnnotationRequest {
string annotation_id = 1;
}
message ListAnnotationsRequest {
// Meeting ID to list annotations for
string meeting_id = 1;
// Optional time range filter
double start_time = 2;
double end_time = 3;
}
message ListAnnotationsResponse {
repeated Annotation annotations = 1;
}
message UpdateAnnotationRequest {
// Annotation ID to update
string annotation_id = 1;
// Updated type (optional, keeps existing if not set)
AnnotationType annotation_type = 2;
// Updated text (optional, keeps existing if empty)
string text = 3;
// Updated start time (optional, keeps existing if 0)
double start_time = 4;
// Updated end time (optional, keeps existing if 0)
double end_time = 5;
// Updated segment IDs (replaces existing)
repeated int32 segment_ids = 6;
}
message DeleteAnnotationRequest {
string annotation_id = 1;
}
message DeleteAnnotationResponse {
bool success = 1;
}
// =============================================================================
// Export Messages
// =============================================================================
enum ExportFormat {
EXPORT_FORMAT_UNSPECIFIED = 0;
EXPORT_FORMAT_MARKDOWN = 1;
EXPORT_FORMAT_HTML = 2;
}
enum JobStatus {
JOB_STATUS_UNSPECIFIED = 0;
JOB_STATUS_QUEUED = 1;
JOB_STATUS_RUNNING = 2;
JOB_STATUS_COMPLETED = 3;
JOB_STATUS_FAILED = 4;
}
message ExportTranscriptRequest {
// Meeting ID to export
string meeting_id = 1;
// Export format
ExportFormat format = 2;
}
message ExportTranscriptResponse {
// Exported content
string content = 1;
// Format name
string format_name = 2;
// Suggested file extension
string file_extension = 3;
}
// =============================================================================
// Speaker Diarization Messages
// =============================================================================
message RefineSpeakerDiarizationRequest {
// Meeting ID to run diarization on
string meeting_id = 1;
// Optional known number of speakers (auto-detect if not set or 0)
int32 num_speakers = 2;
}
message RefineSpeakerDiarizationResponse {
// Number of segments updated with speaker labels
int32 segments_updated = 1;
// Distinct speaker IDs found
repeated string speaker_ids = 2;
// Error message if diarization failed
string error_message = 3;
// Background job identifier (empty if request failed)
string job_id = 4;
// Current job status
JobStatus status = 5;
}
message RenameSpeakerRequest {
// Meeting ID
string meeting_id = 1;
// Original speaker ID (e.g., "SPEAKER_00")
string old_speaker_id = 2;
// New speaker name (e.g., "Alice")
string new_speaker_name = 3;
}
message RenameSpeakerResponse {
// Number of segments updated
int32 segments_updated = 1;
// Success flag
bool success = 2;
}
message GetDiarizationJobStatusRequest {
// Job ID returned by RefineSpeakerDiarization
string job_id = 1;
}
message DiarizationJobStatus {
// Job ID
string job_id = 1;
// Current status
JobStatus status = 2;
// Number of segments updated (when completed)
int32 segments_updated = 3;
// Distinct speaker IDs found (when completed)
repeated string speaker_ids = 4;
// Error message if failed
string error_message = 5;
}
File: src/noteflow/infrastructure/audio/init.py
"""Audio infrastructure module.
Provide audio capture, level metering, buffering, playback, and encrypted storage.
"""
from noteflow.infrastructure.audio.capture import SoundDeviceCapture
from noteflow.infrastructure.audio.dto import (
AudioDeviceInfo,
AudioFrameCallback,
TimestampedAudio,
)
from noteflow.infrastructure.audio.levels import RmsLevelProvider, compute_rms
from noteflow.infrastructure.audio.playback import PlaybackState, SoundDevicePlayback
from noteflow.infrastructure.audio.protocols import (
AudioCapture,
AudioLevelProvider,
AudioPlayback,
RingBuffer,
)
from noteflow.infrastructure.audio.reader import MeetingAudioReader
from noteflow.infrastructure.audio.ring_buffer import TimestampedRingBuffer
from noteflow.infrastructure.audio.writer import MeetingAudioWriter
__all__ = [
"AudioCapture",
"AudioDeviceInfo",
"AudioFrameCallback",
"AudioLevelProvider",
"AudioPlayback",
"MeetingAudioReader",
"MeetingAudioWriter",
"PlaybackState",
"RingBuffer",
"RmsLevelProvider",
"SoundDeviceCapture",
"SoundDevicePlayback",
"TimestampedAudio",
"TimestampedRingBuffer",
"compute_rms",
]
File: src/noteflow/infrastructure/converters/orm_converters.py
"""Convert between ORM models and domain entities."""
from __future__ import annotations
from typing import TYPE_CHECKING
from noteflow.domain.entities import (
ActionItem,
Annotation,
KeyPoint,
Meeting,
Segment,
Summary,
)
from noteflow.domain.entities import (
WordTiming as DomainWordTiming,
)
from noteflow.domain.value_objects import (
AnnotationId,
AnnotationType,
MeetingId,
MeetingState,
)
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models import (
ActionItemModel,
AnnotationModel,
KeyPointModel,
MeetingModel,
SegmentModel,
SummaryModel,
WordTimingModel,
)
class OrmConverter:
"""Convert between ORM models and domain entities."""
# --- WordTiming ---
@staticmethod
def word_timing_to_domain(model: WordTimingModel) -> DomainWordTiming:
"""Convert ORM WordTiming model to domain entity.
Args:
model: SQLAlchemy WordTimingModel instance.
Returns:
Domain WordTiming entity.
Raises:
ValueError: If timing validation fails during entity construction.
"""
return DomainWordTiming(
word=model.word,
start_time=model.start_time,
end_time=model.end_time,
probability=model.probability,
)
@staticmethod
def word_timing_to_orm_kwargs(word: DomainWordTiming) -> dict[str, str | float]:
"""Convert domain WordTiming to ORM model kwargs.
Return a dict of kwargs rather than instantiating WordTimingModel directly
to avoid circular imports and allow the repository to handle ORM construction.
Args:
word: Domain WordTiming entity.
Returns:
Dict with word, start_time, end_time, probability for ORM construction.
"""
return {
"word": word.word,
"start_time": word.start_time,
"end_time": word.end_time,
"probability": word.probability,
}
# --- Meeting ---
@staticmethod
def meeting_to_domain(model: MeetingModel) -> Meeting:
"""Convert ORM Meeting model to domain entity.
Args:
model: SQLAlchemy MeetingModel instance.
Returns:
Domain Meeting entity.
"""
return Meeting(
id=MeetingId(model.id),
title=model.title,
state=MeetingState(model.state),
created_at=model.created_at,
started_at=model.started_at,
ended_at=model.ended_at,
metadata=model.metadata_,
wrapped_dek=model.wrapped_dek,
)
# --- Segment ---
@staticmethod
def segment_to_domain(model: SegmentModel, include_words: bool = True) -> Segment:
"""Convert ORM Segment model to domain entity.
Args:
model: SQLAlchemy SegmentModel instance.
include_words: Whether to include word-level timing.
Returns:
Domain Segment entity.
"""
words: list[DomainWordTiming] = []
if include_words:
words = [OrmConverter.word_timing_to_domain(w) for w in model.words]
embedding = list(model.embedding) if model.embedding is not None else None
return Segment(
segment_id=model.segment_id,
text=model.text,
start_time=model.start_time,
end_time=model.end_time,
meeting_id=MeetingId(model.meeting_id),
words=words,
language=model.language,
language_confidence=model.language_confidence,
avg_logprob=model.avg_logprob,
no_speech_prob=model.no_speech_prob,
embedding=embedding,
speaker_id=model.speaker_id,
speaker_confidence=model.speaker_confidence,
db_id=model.id,
)
# --- Annotation ---
@staticmethod
def annotation_to_domain(model: AnnotationModel) -> Annotation:
"""Convert ORM Annotation model to domain entity.
Args:
model: SQLAlchemy AnnotationModel instance.
Returns:
Domain Annotation entity.
"""
return Annotation(
id=AnnotationId(model.annotation_id),
meeting_id=MeetingId(model.meeting_id),
annotation_type=AnnotationType(model.annotation_type),
text=model.text,
start_time=model.start_time,
end_time=model.end_time,
segment_ids=model.segment_ids,
created_at=model.created_at,
db_id=model.id,
)
# --- Summary ---
@staticmethod
def key_point_to_domain(model: KeyPointModel) -> KeyPoint:
"""Convert ORM KeyPoint model to domain entity.
Args:
model: SQLAlchemy KeyPointModel instance.
Returns:
Domain KeyPoint entity.
"""
return KeyPoint(
text=model.text,
segment_ids=model.segment_ids,
start_time=model.start_time,
end_time=model.end_time,
db_id=model.id,
)
@staticmethod
def action_item_to_domain(model: ActionItemModel) -> ActionItem:
"""Convert ORM ActionItem model to domain entity.
Args:
model: SQLAlchemy ActionItemModel instance.
Returns:
Domain ActionItem entity.
"""
return ActionItem(
text=model.text,
assignee=model.assignee,
due_date=model.due_date,
priority=model.priority,
segment_ids=model.segment_ids,
db_id=model.id,
)
@staticmethod
def summary_to_domain(model: SummaryModel, meeting_id: MeetingId) -> Summary:
"""Convert ORM Summary model to domain entity.
Args:
model: SQLAlchemy SummaryModel instance.
meeting_id: Meeting identifier (passed for type safety).
Returns:
Domain Summary entity.
"""
return Summary(
meeting_id=meeting_id,
executive_summary=model.executive_summary or "",
key_points=[OrmConverter.key_point_to_domain(kp) for kp in model.key_points],
action_items=[OrmConverter.action_item_to_domain(ai) for ai in model.action_items],
generated_at=model.generated_at,
model_version=model.model_version or "",
db_id=model.id,
)
File: src/noteflow/infrastructure/export/html.py
"""HTML exporter implementation.
Export meeting transcripts to HTML format.
"""
from __future__ import annotations
import html
from datetime import datetime
from typing import TYPE_CHECKING
from noteflow.infrastructure.export._formatting import format_datetime, format_timestamp
if TYPE_CHECKING:
from collections.abc import Sequence
from noteflow.domain.entities.meeting import Meeting
from noteflow.domain.entities.segment import Segment
def _escape(text: str) -> str:
"""Escape HTML special characters.
Args:
text: Raw text to escape.
Returns:
HTML-safe text.
"""
return html.escape(text)
# HTML template with embedded CSS for print-friendly output
_HTML_TEMPLATE = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 0 auto;
padding: 2rem;
color: #333;
}}
h1 {{ color: #1a1a1a; border-bottom: 2px solid #e0e0e0; padding-bottom: 0.5rem; }}
h2 {{ color: #2c2c2c; margin-top: 2rem; }}
h3 {{ color: #444; }}
.metadata {{ background: #f5f5f5; padding: 1rem; border-radius: 8px; margin-bottom: 2rem; }}
.metadata dt {{ font-weight: bold; display: inline; }}
.metadata dd {{ display: inline; margin: 0 1rem 0 0.5rem; }}
.transcript {{ margin: 1rem 0; }}
.segment {{ margin-bottom: 1rem; padding: 0.5rem; border-left: 3px solid #e0e0e0; }}
.segment:hover {{ background: #f9f9f9; }}
.timestamp {{ color: #666; font-size: 0.9em; font-weight: bold; margin-right: 0.5rem; }}
.summary {{ background: #f0f7ff; padding: 1rem; border-radius: 8px; margin-top: 2rem; }}
.key-points li, .action-items li {{ margin-bottom: 0.5rem; }}
.action-items li {{ list-style-type: none; }}
.action-items li::before {{ content: '☐ '; }}
.assignee {{ color: #0066cc; font-size: 0.9em; }}
footer {{ margin-top: 3rem; padding-top: 1rem; border-top: 1px solid #e0e0e0; color: #888; font-size: 0.9em; }}
@media print {{
body {{ max-width: none; padding: 1cm; }}
.segment:hover {{ background: none; }}
}}
</style>
</head>
<body>
{content}
</body>
</html>"""
class HtmlExporter:
"""Export meeting transcripts to HTML format.
Produces clean, print-friendly HTML with embedded CSS styling,
meeting metadata, transcript with timestamps, and optional summary.
"""
@property
def format_name(self) -> str:
"""Human-readable format name."""
return "HTML"
@property
def file_extension(self) -> str:
"""File extension for HTML."""
return ".html"
def export(
self,
meeting: Meeting,
segments: Sequence[Segment],
) -> str:
"""Export meeting transcript to HTML.
Args:
meeting: Meeting entity with metadata.
segments: Ordered list of transcript segments.
Returns:
HTML-formatted transcript string.
"""
content_parts: list[str] = [
f"<h1>{_escape(meeting.title)}</h1>",
'<div class="metadata">',
"<dl>",
]
content_parts.append(
f"<dt>Date:</dt><dd>{_escape(format_datetime(meeting.created_at))}</dd>"
)
if meeting.started_at:
content_parts.append(
f"<dt>Started:</dt><dd>{_escape(format_datetime(meeting.started_at))}</dd>"
)
if meeting.ended_at:
content_parts.append(
f"<dt>Ended:</dt><dd>{_escape(format_datetime(meeting.ended_at))}</dd>"
)
content_parts.append(
f"<dt>Duration:</dt><dd>{format_timestamp(meeting.duration_seconds)}</dd>"
)
content_parts.extend(
(
f"<dt>Segments:</dt><dd>{len(segments)}</dd>",
"</dl>",
"</div>",
"<h2>Transcript</h2>",
'<div class="transcript">',
)
)
for segment in segments:
timestamp = format_timestamp(segment.start_time)
content_parts.append('<div class="segment">')
content_parts.append(f'<span class="timestamp">[{timestamp}]</span>')
content_parts.extend((f"<span>{_escape(segment.text)}</span>", "</div>"))
content_parts.append("</div>")
# Summary section (if available)
if meeting.summary:
content_parts.extend(('<div class="summary">', "<h2>Summary</h2>"))
if meeting.summary.executive_summary:
content_parts.append(f"<p>{_escape(meeting.summary.executive_summary)}</p>")
if meeting.summary.key_points:
content_parts.extend(("<h3>Key Points</h3>", '<ul class="key-points">'))
content_parts.extend(
f"<li>{_escape(point.text)}</li>" for point in meeting.summary.key_points
)
content_parts.append("</ul>")
if meeting.summary.action_items:
content_parts.extend(("<h3>Action Items</h3>", '<ul class="action-items">'))
for item in meeting.summary.action_items:
assignee = (
f' <span class="assignee">@{_escape(item.assignee)}</span>'
if item.assignee
else ""
)
content_parts.append(f"<li>{_escape(item.text)}{assignee}</li>")
content_parts.append("</ul>")
content_parts.append("</div>")
# Footer
content_parts.append("<footer>")
content_parts.extend(
(
f"Exported from NoteFlow on {_escape(format_datetime(datetime.now()))}",
"</footer>",
)
)
content = "\n".join(content_parts)
return _HTML_TEMPLATE.format(title=_escape(meeting.title), content=content)
File: src/noteflow/infrastructure/persistence/repositories/meeting_repo.py
"""SQLAlchemy implementation of MeetingRepository."""
from __future__ import annotations
from collections.abc import Sequence
from datetime import datetime
from uuid import UUID
from sqlalchemy import func, select
from noteflow.domain.entities import Meeting
from noteflow.domain.value_objects import MeetingId, MeetingState
from noteflow.infrastructure.converters import OrmConverter
from noteflow.infrastructure.persistence.models import MeetingModel
from noteflow.infrastructure.persistence.repositories._base import BaseRepository
class SqlAlchemyMeetingRepository(BaseRepository):
"""SQLAlchemy implementation of MeetingRepository."""
async def create(self, meeting: Meeting) -> Meeting:
"""Persist a new meeting.
Args:
meeting: Meeting to create.
Returns:
Created meeting.
"""
model = MeetingModel(
id=UUID(str(meeting.id)),
title=meeting.title,
state=int(meeting.state),
created_at=meeting.created_at,
started_at=meeting.started_at,
ended_at=meeting.ended_at,
metadata_=meeting.metadata,
wrapped_dek=meeting.wrapped_dek,
)
self._session.add(model)
await self._session.flush()
return meeting
async def get(self, meeting_id: MeetingId) -> Meeting | None:
"""Retrieve a meeting by ID.
Args:
meeting_id: Meeting identifier.
Returns:
Meeting if found, None otherwise.
"""
stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting_id)))
model = await self._execute_scalar(stmt)
return None if model is None else OrmConverter.meeting_to_domain(model)
async def update(self, meeting: Meeting) -> Meeting:
"""Update an existing meeting.
Args:
meeting: Meeting with updated fields.
Returns:
Updated meeting.
Raises:
ValueError: If meeting does not exist.
"""
stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting.id)))
model = await self._execute_scalar(stmt)
if model is None:
raise ValueError(f"Meeting {meeting.id} not found")
model.title = meeting.title
model.state = int(meeting.state)
model.started_at = meeting.started_at
model.ended_at = meeting.ended_at
model.metadata_ = meeting.metadata
model.wrapped_dek = meeting.wrapped_dek
await self._session.flush()
return meeting
async def delete(self, meeting_id: MeetingId) -> bool:
"""Delete a meeting and all associated data.
Args:
meeting_id: Meeting identifier.
Returns:
True if deleted, False if not found.
"""
stmt = select(MeetingModel).where(MeetingModel.id == UUID(str(meeting_id)))
model = await self._execute_scalar(stmt)
if model is None:
return False
await self._delete_and_flush(model)
return True
async def list_all(
self,
states: list[MeetingState] | None = None,
limit: int = 100,
offset: int = 0,
sort_desc: bool = True,
) -> tuple[Sequence[Meeting], int]:
"""List meetings with optional filtering.
Args:
states: Optional list of states to filter by.
limit: Maximum number of meetings to return.
offset: Number of meetings to skip.
sort_desc: Sort by created_at descending if True.
Returns:
Tuple of (meetings list, total count matching filter).
"""
# Build base query
stmt = select(MeetingModel)
# Filter by states
if states:
state_values = [int(s) for s in states]
stmt = stmt.where(MeetingModel.state.in_(state_values))
# Count total
count_stmt = select(func.count()).select_from(stmt.subquery())
total_result = await self._session.execute(count_stmt)
total = total_result.scalar() or 0
# Sort and paginate
order_col = MeetingModel.created_at.desc() if sort_desc else MeetingModel.created_at.asc()
stmt = stmt.order_by(order_col).offset(offset).limit(limit)
result = await self._session.execute(stmt)
models = result.scalars().all()
meetings = [OrmConverter.meeting_to_domain(m) for m in models]
return meetings, total
async def count_by_state(self, state: MeetingState) -> int:
"""Count meetings in a specific state.
Args:
state: Meeting state to count.
Returns:
Number of meetings in the specified state.
"""
stmt = (
select(func.count()).select_from(MeetingModel).where(MeetingModel.state == int(state))
)
result = await self._session.execute(stmt)
return result.scalar() or 0
async def find_older_than(self, cutoff: datetime) -> Sequence[Meeting]:
"""Find completed meetings older than cutoff date.
Args:
cutoff: Cutoff datetime; meetings ended before this are returned.
Returns:
Sequence of meetings with ended_at before cutoff.
"""
# Only consider completed meetings (have ended_at set)
stmt = (
select(MeetingModel)
.where(MeetingModel.ended_at.isnot(None))
.where(MeetingModel.ended_at < cutoff)
.order_by(MeetingModel.ended_at.asc())
)
result = await self._session.execute(stmt)
models = result.scalars().all()
return [OrmConverter.meeting_to_domain(m) for m in models]
File: src/noteflow/infrastructure/persistence/repositories/segment_repo.py
"""SQLAlchemy implementation of SegmentRepository."""
from __future__ import annotations
from collections.abc import Sequence
from uuid import UUID
from sqlalchemy import func, select
from noteflow.domain.entities import Segment
from noteflow.domain.value_objects import MeetingId
from noteflow.infrastructure.converters import OrmConverter
from noteflow.infrastructure.persistence.models import SegmentModel, WordTimingModel
from noteflow.infrastructure.persistence.repositories._base import BaseRepository
class SqlAlchemySegmentRepository(BaseRepository):
"""SQLAlchemy implementation of SegmentRepository."""
async def add(self, meeting_id: MeetingId, segment: Segment) -> Segment:
"""Add a segment to a meeting.
Args:
meeting_id: Meeting identifier.
segment: Segment to add.
Returns:
Added segment with db_id populated.
"""
model = SegmentModel(
meeting_id=UUID(str(meeting_id)),
segment_id=segment.segment_id,
text=segment.text,
start_time=segment.start_time,
end_time=segment.end_time,
language=segment.language,
language_confidence=segment.language_confidence,
avg_logprob=segment.avg_logprob,
no_speech_prob=segment.no_speech_prob,
embedding=segment.embedding,
speaker_id=segment.speaker_id,
speaker_confidence=segment.speaker_confidence,
)
# Add word timings
for word in segment.words:
word_kwargs = OrmConverter.word_timing_to_orm_kwargs(word)
word_model = WordTimingModel(**word_kwargs)
model.words.append(word_model)
self._session.add(model)
await self._session.flush()
# Update segment with db_id
segment.db_id = model.id
segment.meeting_id = meeting_id
return segment
async def add_batch(
self,
meeting_id: MeetingId,
segments: Sequence[Segment],
) -> Sequence[Segment]:
"""Add multiple segments to a meeting in batch.
Args:
meeting_id: Meeting identifier.
segments: Segments to add.
Returns:
Added segments with db_ids populated.
"""
result_segments: list[Segment] = []
for segment in segments:
added = await self.add(meeting_id, segment)
result_segments.append(added)
return result_segments
async def get_by_meeting(
self,
meeting_id: MeetingId,
include_words: bool = True,
) -> Sequence[Segment]:
"""Get all segments for a meeting.
Args:
meeting_id: Meeting identifier.
include_words: Include word-level timing.
Returns:
List of segments ordered by segment_id.
"""
stmt = (
select(SegmentModel)
.where(SegmentModel.meeting_id == UUID(str(meeting_id)))
.order_by(SegmentModel.segment_id)
)
models = await self._execute_scalars(stmt)
return [OrmConverter.segment_to_domain(m, include_words) for m in models]
async def search_semantic(
self,
query_embedding: list[float],
limit: int = 10,
meeting_id: MeetingId | None = None,
) -> Sequence[tuple[Segment, float]]:
"""Search segments by semantic similarity.
Args:
query_embedding: Query embedding vector.
limit: Maximum number of results.
meeting_id: Optional meeting to restrict search to.
Returns:
List of (segment, similarity_score) tuples.
"""
# Build query with cosine similarity
similarity = SegmentModel.embedding.cosine_distance(query_embedding)
stmt = select(SegmentModel, similarity.label("distance")).where(
SegmentModel.embedding.is_not(None)
)
if meeting_id:
stmt = stmt.where(SegmentModel.meeting_id == UUID(str(meeting_id)))
stmt = stmt.order_by(similarity).limit(limit)
result = await self._session.execute(stmt)
rows = result.all()
results: list[tuple[Segment, float]] = []
for row in rows:
model = row[0]
distance = row[1]
# Convert distance to similarity (1 - distance for cosine)
similarity_score = 1.0 - float(distance)
segment = OrmConverter.segment_to_domain(model, include_words=False)
results.append((segment, similarity_score))
return results
async def update_embedding(
self,
segment_db_id: int,
embedding: list[float],
) -> None:
"""Update the embedding for a segment.
Args:
segment_db_id: Segment database primary key.
embedding: New embedding vector.
"""
stmt = select(SegmentModel).where(SegmentModel.id == segment_db_id)
result = await self._session.execute(stmt)
if model := result.scalar_one_or_none():
model.embedding = embedding
await self._session.flush()
async def update_speaker(
self,
segment_db_id: int,
speaker_id: str | None,
speaker_confidence: float,
) -> None:
"""Update speaker diarization fields for a segment.
Args:
segment_db_id: Segment database primary key.
speaker_id: Speaker identifier from diarization.
speaker_confidence: Confidence of speaker assignment (0.0-1.0).
"""
stmt = select(SegmentModel).where(SegmentModel.id == segment_db_id)
result = await self._session.execute(stmt)
if model := result.scalar_one_or_none():
model.speaker_id = speaker_id
model.speaker_confidence = speaker_confidence
await self._session.flush()
async def get_next_segment_id(self, meeting_id: MeetingId) -> int:
"""Get the next segment_id for a meeting.
Args:
meeting_id: Meeting identifier.
Returns:
Next segment_id (max + 1), or 0 if no segments exist.
"""
stmt = select(func.max(SegmentModel.segment_id)).where(
SegmentModel.meeting_id == UUID(str(meeting_id))
)
result = await self._session.execute(stmt)
max_segment_id = result.scalar_one_or_none()
return 0 if max_segment_id is None else int(max_segment_id) + 1
File: src/noteflow/infrastructure/persistence/unit_of_work.py
"""SQLAlchemy implementation of Unit of Work pattern."""
from __future__ import annotations
from collections.abc import Callable
from typing import Self
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from noteflow.config.settings import Settings
from noteflow.infrastructure.persistence.database import (
create_async_engine,
get_async_session_factory,
)
from .repositories import (
SqlAlchemyAnnotationRepository,
SqlAlchemyMeetingRepository,
SqlAlchemySegmentRepository,
SqlAlchemySummaryRepository,
)
class SqlAlchemyUnitOfWork:
"""SQLAlchemy implementation of Unit of Work.
Provides transactional consistency across repositories.
Use as an async context manager for automatic commit/rollback.
Example:
async with SqlAlchemyUnitOfWork(session_factory) as uow:
meeting = await uow.meetings.get(meeting_id)
await uow.segments.add(meeting_id, segment)
await uow.commit()
"""
def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
"""Initialize unit of work with session factory.
Args:
session_factory: Factory for creating async sessions.
"""
self._session_factory = session_factory
self._session: AsyncSession | None = None
self._annotations_repo: SqlAlchemyAnnotationRepository | None = None
self._meetings_repo: SqlAlchemyMeetingRepository | None = None
self._segments_repo: SqlAlchemySegmentRepository | None = None
self._summaries_repo: SqlAlchemySummaryRepository | None = None
# --- Constructors -------------------------------------------------
@classmethod
def from_settings(cls, settings: Settings) -> SqlAlchemyUnitOfWork:
"""Create a unit of work from application settings.
Builds an async engine and session factory using configured database
settings (URL, pool size, echo), then returns a new unit of work
instance bound to that factory.
"""
engine = create_async_engine(settings)
session_factory = get_async_session_factory(engine)
return cls(session_factory)
@classmethod
def factory_from_settings(cls, settings: Settings) -> Callable[[], SqlAlchemyUnitOfWork]:
"""Create a reusable factory that yields fresh UoW instances.
The factory reuses a shared async session factory (and engine) while
returning a new `SqlAlchemyUnitOfWork` object each time. Useful when
callers need independent UoW instances for sequential operations
(e.g., retention cleanup) to avoid re-entrancy issues.
"""
engine = create_async_engine(settings)
session_factory = get_async_session_factory(engine)
def _factory() -> SqlAlchemyUnitOfWork:
return cls(session_factory)
return _factory
@property
def annotations(self) -> SqlAlchemyAnnotationRepository:
"""Get annotations repository."""
if self._annotations_repo is None:
raise RuntimeError("UnitOfWork not in context")
return self._annotations_repo
@property
def meetings(self) -> SqlAlchemyMeetingRepository:
"""Get meetings repository."""
if self._meetings_repo is None:
raise RuntimeError("UnitOfWork not in context")
return self._meetings_repo
@property
def segments(self) -> SqlAlchemySegmentRepository:
"""Get segments repository."""
if self._segments_repo is None:
raise RuntimeError("UnitOfWork not in context")
return self._segments_repo
@property
def summaries(self) -> SqlAlchemySummaryRepository:
"""Get summaries repository."""
if self._summaries_repo is None:
raise RuntimeError("UnitOfWork not in context")
return self._summaries_repo
async def __aenter__(self) -> Self:
"""Enter the unit of work context.
Creates session and caches repository instances.
Returns:
Self for use in async with statement.
"""
self._session = self._session_factory()
self._annotations_repo = SqlAlchemyAnnotationRepository(self._session)
self._meetings_repo = SqlAlchemyMeetingRepository(self._session)
self._segments_repo = SqlAlchemySegmentRepository(self._session)
self._summaries_repo = SqlAlchemySummaryRepository(self._session)
return self
async def __aexit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: object,
) -> None:
"""Exit the unit of work context.
Rolls back on exception, otherwise does nothing (explicit commit required).
Args:
exc_type: Exception type if raised.
exc_val: Exception value if raised.
exc_tb: Exception traceback if raised.
"""
if self._session is None:
return
if exc_type is not None:
await self.rollback()
await self._session.close()
self._session = None
self._annotations_repo = None
self._meetings_repo = None
self._segments_repo = None
self._summaries_repo = None
async def commit(self) -> None:
"""Commit the current transaction."""
if self._session is None:
raise RuntimeError("UnitOfWork not in context")
await self._session.commit()
async def rollback(self) -> None:
"""Rollback the current transaction."""
if self._session is None:
raise RuntimeError("UnitOfWork not in context")
await self._session.rollback()
File: tests/application/test_meeting_service.py
"""Tests for MeetingService application service."""
from __future__ import annotations
from datetime import UTC, datetime
from pathlib import Path
from typing import TYPE_CHECKING
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
import pytest
from noteflow.application.services.meeting_service import MeetingService
from noteflow.domain.entities import Annotation, Meeting, Segment, Summary
from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState
if TYPE_CHECKING:
from collections.abc import Sequence
class TestMeetingServiceCreation:
"""Tests for meeting creation operations."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.rollback = AsyncMock()
uow.meetings = MagicMock()
uow.segments = MagicMock()
uow.summaries = MagicMock()
return uow
async def test_create_meeting_success(self, mock_uow: MagicMock) -> None:
"""Test successful meeting creation."""
created_meeting = Meeting.create(title="Test Meeting")
mock_uow.meetings.create = AsyncMock(return_value=created_meeting)
service = MeetingService(mock_uow)
result = await service.create_meeting(title="Test Meeting")
assert result.title == "Test Meeting"
mock_uow.meetings.create.assert_called_once()
mock_uow.commit.assert_called_once()
async def test_create_meeting_with_metadata(self, mock_uow: MagicMock) -> None:
"""Test meeting creation with metadata."""
metadata = {"project": "NoteFlow"}
created_meeting = Meeting.create(title="Test", metadata=metadata)
mock_uow.meetings.create = AsyncMock(return_value=created_meeting)
service = MeetingService(mock_uow)
result = await service.create_meeting(title="Test", metadata=metadata)
assert result.metadata == metadata
class TestMeetingServiceRetrieval:
"""Tests for meeting retrieval operations."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.meetings = MagicMock()
uow.segments = MagicMock()
uow.summaries = MagicMock()
return uow
async def test_get_meeting_found(self, mock_uow: MagicMock) -> None:
"""Test retrieving existing meeting."""
meeting_id = MeetingId(uuid4())
expected_meeting = Meeting.create(title="Found")
mock_uow.meetings.get = AsyncMock(return_value=expected_meeting)
service = MeetingService(mock_uow)
result = await service.get_meeting(meeting_id)
assert result is not None
assert result.title == "Found"
async def test_get_meeting_not_found(self, mock_uow: MagicMock) -> None:
"""Test retrieving non-existent meeting."""
meeting_id = MeetingId(uuid4())
mock_uow.meetings.get = AsyncMock(return_value=None)
service = MeetingService(mock_uow)
result = await service.get_meeting(meeting_id)
assert result is None
async def test_list_meetings(self, mock_uow: MagicMock) -> None:
"""Test listing meetings with pagination."""
meetings: Sequence[Meeting] = [
Meeting.create(title="Meeting 1"),
Meeting.create(title="Meeting 2"),
]
mock_uow.meetings.list_all = AsyncMock(return_value=(meetings, 10))
service = MeetingService(mock_uow)
result, total = await service.list_meetings(limit=2, offset=0)
assert len(result) == 2
assert total == 10
mock_uow.meetings.list_all.assert_called_once_with(
states=None, limit=2, offset=0, sort_desc=True
)
class TestMeetingServiceStateTransitions:
"""Tests for meeting state transition operations."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.meetings = MagicMock()
return uow
async def test_start_recording_success(self, mock_uow: MagicMock) -> None:
"""Test starting recording on existing meeting."""
meeting = Meeting.create(title="Test")
meeting_id = meeting.id
mock_uow.meetings.get = AsyncMock(return_value=meeting)
mock_uow.meetings.update = AsyncMock(return_value=meeting)
service = MeetingService(mock_uow)
result = await service.start_recording(meeting_id)
assert result is not None
assert result.state == MeetingState.RECORDING
mock_uow.commit.assert_called_once()
async def test_start_recording_invalid_state_raises(self, mock_uow: MagicMock) -> None:
"""Test start_recording propagates invalid transition errors."""
meeting = Meeting.create(title="Test")
meeting.start_recording()
mock_uow.meetings.get = AsyncMock(return_value=meeting)
service = MeetingService(mock_uow)
with pytest.raises(ValueError, match="Cannot start recording"):
await service.start_recording(meeting.id)
mock_uow.commit.assert_not_called()
async def test_start_recording_not_found(self, mock_uow: MagicMock) -> None:
"""Test starting recording on non-existent meeting."""
meeting_id = MeetingId(uuid4())
mock_uow.meetings.get = AsyncMock(return_value=None)
service = MeetingService(mock_uow)
result = await service.start_recording(meeting_id)
assert result is None
mock_uow.commit.assert_not_called()
async def test_stop_meeting_success(self, mock_uow: MagicMock) -> None:
"""Test stopping recording on meeting."""
meeting = Meeting.create(title="Test")
meeting.start_recording() # Move to RECORDING state
meeting_id = meeting.id
mock_uow.meetings.get = AsyncMock(return_value=meeting)
mock_uow.meetings.update = AsyncMock(return_value=meeting)
service = MeetingService(mock_uow)
result = await service.stop_meeting(meeting_id)
assert result is not None
assert result.state == MeetingState.STOPPED
mock_uow.commit.assert_called_once()
async def test_stop_meeting_invalid_state_raises(self, mock_uow: MagicMock) -> None:
"""Test stop_meeting raises when not in RECORDING state."""
meeting = Meeting.create(title="Test")
mock_uow.meetings.get = AsyncMock(return_value=meeting)
service = MeetingService(mock_uow)
with pytest.raises(ValueError, match="Cannot begin stopping"):
await service.stop_meeting(meeting.id)
mock_uow.commit.assert_not_called()
async def test_complete_meeting_success(self, mock_uow: MagicMock) -> None:
"""Test completing a stopped meeting."""
meeting = Meeting.create(title="Test")
meeting.start_recording()
meeting.begin_stopping()
meeting.stop_recording() # Move to STOPPED state (via STOPPING)
meeting_id = meeting.id
mock_uow.meetings.get = AsyncMock(return_value=meeting)
mock_uow.meetings.update = AsyncMock(return_value=meeting)
service = MeetingService(mock_uow)
result = await service.complete_meeting(meeting_id)
assert result is not None
assert result.state == MeetingState.COMPLETED
mock_uow.commit.assert_called_once()
async def test_complete_meeting_invalid_state_raises(self, mock_uow: MagicMock) -> None:
"""Test complete_meeting raises from invalid state."""
meeting = Meeting.create(title="Test")
mock_uow.meetings.get = AsyncMock(return_value=meeting)
service = MeetingService(mock_uow)
with pytest.raises(ValueError, match="Cannot complete"):
await service.complete_meeting(meeting.id)
mock_uow.commit.assert_not_called()
class TestMeetingServiceDeletion:
"""Tests for meeting deletion operations."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.meetings = MagicMock()
return uow
async def test_delete_meeting_success(self, mock_uow: MagicMock) -> None:
"""Test successful meeting deletion."""
meeting_id = MeetingId(uuid4())
mock_meeting = Meeting.create(title="Test Meeting")
mock_uow.meetings.get = AsyncMock(return_value=mock_meeting)
mock_uow.meetings.delete = AsyncMock(return_value=True)
service = MeetingService(mock_uow)
result = await service.delete_meeting(meeting_id)
assert result is True
mock_uow.commit.assert_called_once()
async def test_delete_meeting_not_found(self, mock_uow: MagicMock) -> None:
"""Test deleting non-existent meeting returns False."""
meeting_id = MeetingId(uuid4())
mock_uow.meetings.get = AsyncMock(return_value=None)
mock_uow.meetings.delete = AsyncMock(return_value=False)
service = MeetingService(mock_uow)
result = await service.delete_meeting(meeting_id)
assert result is False
mock_uow.meetings.delete.assert_not_called()
mock_uow.commit.assert_not_called()
async def test_delete_meeting_removes_filesystem_assets(
self, mock_uow: MagicMock, tmp_path: Path
) -> None:
"""Test deletion removes filesystem assets when directory provided."""
meeting_id = MeetingId(uuid4())
mock_meeting = Meeting.create(title="Test Meeting")
mock_uow.meetings.get = AsyncMock(return_value=mock_meeting)
mock_uow.meetings.delete = AsyncMock(return_value=True)
# Create meeting directory with test files
meeting_dir = tmp_path / str(meeting_id)
meeting_dir.mkdir()
(meeting_dir / "audio.wav").touch()
(meeting_dir / "manifest.json").touch()
service = MeetingService(mock_uow)
result = await service.delete_meeting(meeting_id, meetings_dir=tmp_path)
assert result is True
assert not meeting_dir.exists()
async def test_delete_meeting_handles_missing_assets(
self, mock_uow: MagicMock, tmp_path: Path
) -> None:
"""Test deletion succeeds even when assets directory doesn't exist."""
meeting_id = MeetingId(uuid4())
mock_meeting = Meeting.create(title="Test Meeting")
mock_uow.meetings.get = AsyncMock(return_value=mock_meeting)
mock_uow.meetings.delete = AsyncMock(return_value=True)
# Don't create the meeting directory
service = MeetingService(mock_uow)
result = await service.delete_meeting(meeting_id, meetings_dir=tmp_path)
assert result is True
mock_uow.commit.assert_called_once()
async def test_delete_meeting_without_dir_only_deletes_db(self, mock_uow: MagicMock) -> None:
"""Test deletion without meetings_dir only deletes database records."""
meeting_id = MeetingId(uuid4())
mock_meeting = Meeting.create(title="Test Meeting")
mock_uow.meetings.get = AsyncMock(return_value=mock_meeting)
mock_uow.meetings.delete = AsyncMock(return_value=True)
service = MeetingService(mock_uow)
result = await service.delete_meeting(meeting_id)
assert result is True
mock_uow.meetings.delete.assert_called_once_with(meeting_id)
mock_uow.commit.assert_called_once()
class TestMeetingServiceSegments:
"""Tests for segment operations."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.segments = MagicMock()
return uow
async def test_add_segment_success(self, mock_uow: MagicMock) -> None:
"""Test adding a segment to meeting."""
meeting_id = MeetingId(uuid4())
segment = Segment(
segment_id=0, text="Hello", start_time=0.0, end_time=1.0, meeting_id=meeting_id
)
mock_uow.segments.add = AsyncMock(return_value=segment)
service = MeetingService(mock_uow)
result = await service.add_segment(
meeting_id=meeting_id,
segment_id=0,
text="Hello",
start_time=0.0,
end_time=1.0,
)
assert result.text == "Hello"
mock_uow.segments.add.assert_called_once()
mock_uow.commit.assert_called_once()
async def test_get_segments(self, mock_uow: MagicMock) -> None:
"""Test retrieving segments for meeting."""
meeting_id = MeetingId(uuid4())
segments: Sequence[Segment] = [
Segment(segment_id=0, text="First", start_time=0.0, end_time=1.0),
Segment(segment_id=1, text="Second", start_time=1.0, end_time=2.0),
]
mock_uow.segments.get_by_meeting = AsyncMock(return_value=segments)
service = MeetingService(mock_uow)
result = await service.get_segments(meeting_id)
assert len(result) == 2
mock_uow.segments.get_by_meeting.assert_called_once_with(meeting_id, include_words=True)
async def test_add_segments_batch(self, mock_uow: MagicMock) -> None:
"""Test batch adding segments commits once."""
meeting_id = MeetingId(uuid4())
segments = [
Segment(segment_id=0, text="A", start_time=0.0, end_time=1.0),
Segment(segment_id=1, text="B", start_time=1.0, end_time=2.0),
]
mock_uow.segments.add_batch = AsyncMock(return_value=segments)
service = MeetingService(mock_uow)
result = await service.add_segments_batch(meeting_id=meeting_id, segments=segments)
assert len(result) == 2
mock_uow.segments.add_batch.assert_called_once_with(meeting_id, segments)
mock_uow.commit.assert_called_once()
class TestMeetingServiceSummaries:
"""Tests for summary operations."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.summaries = MagicMock()
return uow
async def test_save_summary_success(self, mock_uow: MagicMock) -> None:
"""Test saving a meeting summary."""
meeting_id = MeetingId(uuid4())
summary = Summary(
meeting_id=meeting_id,
executive_summary="Test summary",
generated_at=datetime.now(UTC),
model_version="test-v1",
)
mock_uow.summaries.save = AsyncMock(return_value=summary)
service = MeetingService(mock_uow)
result = await service.save_summary(
meeting_id=meeting_id,
executive_summary="Test summary",
model_version="test-v1",
)
assert result.executive_summary == "Test summary"
mock_uow.summaries.save.assert_called_once()
mock_uow.commit.assert_called_once()
async def test_get_summary_found(self, mock_uow: MagicMock) -> None:
"""Test retrieving existing summary."""
meeting_id = MeetingId(uuid4())
summary = Summary(meeting_id=meeting_id, executive_summary="Found")
mock_uow.summaries.get_by_meeting = AsyncMock(return_value=summary)
service = MeetingService(mock_uow)
result = await service.get_summary(meeting_id)
assert result is not None
assert result.executive_summary == "Found"
async def test_get_summary_not_found(self, mock_uow: MagicMock) -> None:
"""Test retrieving non-existent summary."""
meeting_id = MeetingId(uuid4())
mock_uow.summaries.get_by_meeting = AsyncMock(return_value=None)
service = MeetingService(mock_uow)
result = await service.get_summary(meeting_id)
assert result is None
class TestMeetingServiceSearch:
"""Tests for semantic search operations."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.segments = MagicMock()
return uow
async def test_search_segments_delegates(self, mock_uow: MagicMock) -> None:
"""Test search_segments delegates to repository."""
meeting_id = MeetingId(uuid4())
segment = Segment(segment_id=0, text="A", start_time=0.0, end_time=1.0)
mock_uow.segments.search_semantic = AsyncMock(return_value=[(segment, 0.9)])
service = MeetingService(mock_uow)
result = await service.search_segments(query_embedding=[0.1], meeting_id=meeting_id)
assert len(result) == 1
mock_uow.segments.search_semantic.assert_called_once_with(
query_embedding=[0.1], limit=10, meeting_id=meeting_id
)
class TestMeetingServiceAnnotations:
"""Tests for annotation operations."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.annotations = MagicMock()
return uow
async def test_add_annotation_success(self, mock_uow: MagicMock) -> None:
"""Test adding an annotation commits and returns saved entity."""
meeting_id = MeetingId(uuid4())
mock_uow.annotations.add = AsyncMock()
service = MeetingService(mock_uow)
await service.add_annotation(
meeting_id=meeting_id,
annotation_type=AnnotationType.NOTE,
text="Note",
start_time=0.0,
end_time=1.0,
)
mock_uow.annotations.add.assert_called_once()
mock_uow.commit.assert_called_once()
async def test_get_annotations_in_range(self, mock_uow: MagicMock) -> None:
"""Test get_annotations_in_range delegates to repository."""
meeting_id = MeetingId(uuid4())
mock_uow.annotations.get_by_time_range = AsyncMock(return_value=[])
service = MeetingService(mock_uow)
await service.get_annotations_in_range(meeting_id, start_time=1.0, end_time=2.0)
mock_uow.annotations.get_by_time_range.assert_called_once_with(meeting_id, 1.0, 2.0)
async def test_update_annotation_not_found_raises(self, mock_uow: MagicMock) -> None:
"""Test update_annotation propagates repository errors."""
meeting_id = MeetingId(uuid4())
annotation = Annotation(
id=AnnotationId(uuid4()),
meeting_id=meeting_id,
annotation_type=AnnotationType.NOTE,
text="Note",
start_time=0.0,
end_time=1.0,
)
mock_uow.annotations.update = AsyncMock(side_effect=ValueError("Annotation not found"))
service = MeetingService(mock_uow)
with pytest.raises(ValueError, match="Annotation not found"):
await service.update_annotation(annotation)
mock_uow.commit.assert_not_called()
async def test_delete_annotation_not_found(self, mock_uow: MagicMock) -> None:
"""Test delete_annotation returns False when missing."""
annotation_id = AnnotationId(uuid4())
mock_uow.annotations.delete = AsyncMock(return_value=False)
service = MeetingService(mock_uow)
result = await service.delete_annotation(annotation_id)
assert result is False
mock_uow.commit.assert_not_called()
class TestMeetingServiceAdditionalBranches:
"""Additional branch coverage for MeetingService."""
@pytest.fixture
def mock_uow(self) -> MagicMock:
"""Create a mock UnitOfWork with all repos."""
uow = MagicMock()
uow.__aenter__ = AsyncMock(return_value=uow)
uow.__aexit__ = AsyncMock(return_value=None)
uow.commit = AsyncMock()
uow.meetings = MagicMock()
uow.segments = MagicMock()
uow.summaries = MagicMock()
uow.annotations = MagicMock()
return uow
async def test_stop_meeting_not_found(self, mock_uow: MagicMock) -> None:
"""stop_meeting should return None when meeting is missing."""
mock_uow.meetings.get = AsyncMock(return_value=None)
service = MeetingService(mock_uow)
result = await service.stop_meeting(MeetingId(uuid4()))
assert result is None
mock_uow.commit.assert_not_called()
async def test_complete_meeting_not_found(self, mock_uow: MagicMock) -> None:
"""complete_meeting should return None when meeting is missing."""
mock_uow.meetings.get = AsyncMock(return_value=None)
service = MeetingService(mock_uow)
result = await service.complete_meeting(MeetingId(uuid4()))
assert result is None
mock_uow.commit.assert_not_called()
async def test_get_annotation_delegates_repository(self, mock_uow: MagicMock) -> None:
"""get_annotation should delegate to repository."""
annotation = Annotation(
id=AnnotationId(uuid4()),
meeting_id=MeetingId(uuid4()),
annotation_type=AnnotationType.NOTE,
text="note",
start_time=0.0,
end_time=1.0,
)
mock_uow.annotations.get = AsyncMock(return_value=annotation)
service = MeetingService(mock_uow)
result = await service.get_annotation(annotation.id)
assert result is annotation
mock_uow.annotations.get.assert_called_once_with(annotation.id)
async def test_get_annotations_delegates_repository(self, mock_uow: MagicMock) -> None:
"""get_annotations should delegate to repository."""
meeting_id = MeetingId(uuid4())
mock_uow.annotations.get_by_meeting = AsyncMock(return_value=[])
service = MeetingService(mock_uow)
await service.get_annotations(meeting_id)
mock_uow.annotations.get_by_meeting.assert_called_once_with(meeting_id)
async def test_delete_annotation_success_commits(self, mock_uow: MagicMock) -> None:
"""delete_annotation should commit on success."""
annotation_id = AnnotationId(uuid4())
mock_uow.annotations.delete = AsyncMock(return_value=True)
service = MeetingService(mock_uow)
result = await service.delete_annotation(annotation_id)
assert result is True
mock_uow.commit.assert_called_once()
File: tests/application/test_summarization_service.py
"""Tests for summarization service."""
from __future__ import annotations
from datetime import UTC, datetime
from uuid import uuid4
import pytest
from noteflow.application.services import (
SummarizationMode,
SummarizationService,
SummarizationServiceSettings,
)
from noteflow.domain.entities import KeyPoint, Segment, Summary
from noteflow.domain.summarization import (
CitationVerificationResult,
ProviderUnavailableError,
SummarizationRequest,
SummarizationResult,
)
from noteflow.domain.value_objects import MeetingId
def _segment(segment_id: int, text: str = "Test") -> Segment:
"""Create a test segment."""
return Segment(
segment_id=segment_id,
text=text,
start_time=segment_id * 5.0,
end_time=(segment_id + 1) * 5.0,
)
class MockProvider:
"""Mock summarizer provider for testing."""
def __init__(
self,
name: str = "mock",
available: bool = True,
requires_consent: bool = False,
) -> None:
self._name = name
self._available = available
self._requires_consent = requires_consent
self.call_count = 0
@property
def provider_name(self) -> str:
return self._name
@property
def is_available(self) -> bool:
return self._available
@property
def requires_cloud_consent(self) -> bool:
return self._requires_consent
async def summarize(self, request: SummarizationRequest) -> SummarizationResult:
self.call_count += 1
summary = Summary(
meeting_id=request.meeting_id,
executive_summary=f"Summary from {self._name}",
key_points=[KeyPoint(text=f"Point from {self._name}", segment_ids=[0])],
generated_at=datetime.now(UTC),
)
return SummarizationResult(
summary=summary,
model_name=f"{self._name}-model",
provider_name=self._name,
)
class MockVerifier:
"""Mock citation verifier for testing."""
def __init__(self, is_valid: bool = True) -> None:
self._is_valid = is_valid
self.verify_call_count = 0
self.filter_call_count = 0
def verify_citations(
self, summary: Summary, segments: list[Segment]
) -> CitationVerificationResult:
self.verify_call_count += 1
if self._is_valid:
return CitationVerificationResult(is_valid=True)
return CitationVerificationResult(
is_valid=False,
invalid_key_point_indices=(0,),
missing_segment_ids=(99,),
)
def filter_invalid_citations(self, summary: Summary, segments: list[Segment]) -> Summary:
self.filter_call_count += 1
# Return summary with empty segment_ids for key points
return Summary(
meeting_id=summary.meeting_id,
executive_summary=summary.executive_summary,
key_points=[KeyPoint(text=kp.text, segment_ids=[]) for kp in summary.key_points],
action_items=[],
generated_at=summary.generated_at,
)
class TestSummarizationServiceConfiguration:
"""Tests for SummarizationService configuration."""
def test_register_provider(self) -> None:
"""Provider should be registered for mode."""
service = SummarizationService()
provider = MockProvider()
service.register_provider(SummarizationMode.LOCAL, provider)
assert SummarizationMode.LOCAL in service.providers
def test_set_verifier(self) -> None:
"""Verifier should be set."""
service = SummarizationService()
verifier = MockVerifier()
service.set_verifier(verifier)
assert service.verifier is verifier
def test_get_available_modes_with_local(self) -> None:
"""Available modes should include local when provider is available."""
service = SummarizationService()
service.register_provider(SummarizationMode.LOCAL, MockProvider())
available = service.get_available_modes()
assert SummarizationMode.LOCAL in available
def test_get_available_modes_excludes_unavailable(self) -> None:
"""Unavailable providers should not be in available modes."""
service = SummarizationService()
service.register_provider(SummarizationMode.LOCAL, MockProvider(available=False))
available = service.get_available_modes()
assert SummarizationMode.LOCAL not in available
def test_cloud_requires_consent(self) -> None:
"""Cloud mode should require consent to be available."""
service = SummarizationService()
service.register_provider(
SummarizationMode.CLOUD,
MockProvider(name="cloud", requires_consent=True),
)
available_without_consent = service.get_available_modes()
service.grant_cloud_consent()
available_with_consent = service.get_available_modes()
assert SummarizationMode.CLOUD not in available_without_consent
assert SummarizationMode.CLOUD in available_with_consent
def test_revoke_cloud_consent(self) -> None:
"""Revoking consent should remove cloud from available modes."""
service = SummarizationService()
service.register_provider(
SummarizationMode.CLOUD,
MockProvider(name="cloud", requires_consent=True),
)
service.grant_cloud_consent()
service.revoke_cloud_consent()
available = service.get_available_modes()
assert SummarizationMode.CLOUD not in available
class TestSummarizationServiceSummarize:
"""Tests for SummarizationService.summarize method."""
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Create test meeting ID."""
return MeetingId(uuid4())
@pytest.mark.asyncio
async def test_summarize_uses_default_mode(self, meeting_id: MeetingId) -> None:
"""Summarize should use default mode when not specified."""
provider = MockProvider()
service = SummarizationService(
settings=SummarizationServiceSettings(default_mode=SummarizationMode.LOCAL)
)
service.register_provider(SummarizationMode.LOCAL, provider)
segments = [_segment(0)]
result = await service.summarize(meeting_id, segments)
assert result.provider_used == "mock"
assert provider.call_count == 1
@pytest.mark.asyncio
async def test_summarize_uses_specified_mode(self, meeting_id: MeetingId) -> None:
"""Summarize should use specified mode."""
local_provider = MockProvider(name="local")
mock_provider = MockProvider(name="mock")
service = SummarizationService()
service.register_provider(SummarizationMode.LOCAL, local_provider)
service.register_provider(SummarizationMode.MOCK, mock_provider)
segments = [_segment(0)]
result = await service.summarize(meeting_id, segments, mode=SummarizationMode.MOCK)
assert result.provider_used == "mock"
assert mock_provider.call_count == 1
assert local_provider.call_count == 0
@pytest.mark.asyncio
async def test_summarize_falls_back_on_unavailable(self, meeting_id: MeetingId) -> None:
"""Should fall back to available provider when primary unavailable."""
unavailable = MockProvider(name="cloud", available=False)
fallback = MockProvider(name="local")
service = SummarizationService(
settings=SummarizationServiceSettings(
fallback_to_local=True,
cloud_consent_granted=True,
)
)
service.register_provider(SummarizationMode.CLOUD, unavailable)
service.register_provider(SummarizationMode.LOCAL, fallback)
segments = [_segment(0)]
result = await service.summarize(meeting_id, segments, mode=SummarizationMode.CLOUD)
assert result.provider_used == "local"
assert result.fallback_used is True
@pytest.mark.asyncio
async def test_summarize_raises_when_no_fallback(self, meeting_id: MeetingId) -> None:
"""Should raise error when no fallback available."""
unavailable = MockProvider(name="local", available=False)
service = SummarizationService(
settings=SummarizationServiceSettings(fallback_to_local=False)
)
service.register_provider(SummarizationMode.LOCAL, unavailable)
segments = [_segment(0)]
with pytest.raises(ProviderUnavailableError):
await service.summarize(meeting_id, segments, mode=SummarizationMode.LOCAL)
@pytest.mark.asyncio
async def test_summarize_verifies_citations(self, meeting_id: MeetingId) -> None:
"""Citations should be verified when enabled."""
provider = MockProvider()
verifier = MockVerifier(is_valid=True)
service = SummarizationService(settings=SummarizationServiceSettings(verify_citations=True))
service.register_provider(SummarizationMode.LOCAL, provider)
service.set_verifier(verifier)
segments = [_segment(0)]
result = await service.summarize(meeting_id, segments)
assert verifier.verify_call_count == 1
assert result.verification is not None
assert result.verification.is_valid is True
@pytest.mark.asyncio
async def test_summarize_filters_invalid_citations(self, meeting_id: MeetingId) -> None:
"""Invalid citations should be filtered when enabled."""
provider = MockProvider()
verifier = MockVerifier(is_valid=False)
service = SummarizationService(
settings=SummarizationServiceSettings(
verify_citations=True,
filter_invalid_citations=True,
)
)
service.register_provider(SummarizationMode.LOCAL, provider)
service.set_verifier(verifier)
segments = [_segment(0)]
result = await service.summarize(meeting_id, segments)
assert verifier.filter_call_count == 1
assert result.filtered_summary is not None
assert result.has_invalid_citations is True
@pytest.mark.asyncio
async def test_summarize_passes_max_limits(self, meeting_id: MeetingId) -> None:
"""Max limits should be passed to provider."""
captured_request: SummarizationRequest | None = None
class CapturingProvider(MockProvider):
async def summarize(self, request: SummarizationRequest) -> SummarizationResult:
nonlocal captured_request
captured_request = request
return await super().summarize(request)
provider = CapturingProvider()
service = SummarizationService()
service.register_provider(SummarizationMode.LOCAL, provider)
segments = [_segment(0)]
await service.summarize(meeting_id, segments, max_key_points=3, max_action_items=5)
assert captured_request is not None
assert captured_request.max_key_points == 3
assert captured_request.max_action_items == 5
@pytest.mark.asyncio
async def test_summarize_requires_cloud_consent(self, meeting_id: MeetingId) -> None:
"""Cloud mode should require consent."""
cloud = MockProvider(name="cloud", requires_consent=True)
fallback = MockProvider(name="local")
service = SummarizationService(
settings=SummarizationServiceSettings(
cloud_consent_granted=False, fallback_to_local=True
)
)
service.register_provider(SummarizationMode.CLOUD, cloud)
service.register_provider(SummarizationMode.LOCAL, fallback)
segments = [_segment(0)]
result = await service.summarize(meeting_id, segments, mode=SummarizationMode.CLOUD)
assert result.provider_used == "local"
assert result.fallback_used is True
assert cloud.call_count == 0
@pytest.mark.asyncio
async def test_summarize_calls_persist_callback(self, meeting_id: MeetingId) -> None:
"""Persist callback should be called with final summary."""
persisted: list[Summary] = []
async def mock_persist(summary: Summary) -> None:
persisted.append(summary)
provider = MockProvider()
service = SummarizationService(on_persist=mock_persist)
service.register_provider(SummarizationMode.LOCAL, provider)
segments = [_segment(0)]
await service.summarize(meeting_id, segments)
assert len(persisted) == 1
assert persisted[0].meeting_id == meeting_id
@pytest.mark.asyncio
async def test_summarize_persist_callback_receives_filtered_summary(
self, meeting_id: MeetingId
) -> None:
"""Persist callback should receive filtered summary when available."""
persisted: list[Summary] = []
async def mock_persist(summary: Summary) -> None:
persisted.append(summary)
provider = MockProvider()
verifier = MockVerifier(is_valid=False)
service = SummarizationService(
settings=SummarizationServiceSettings(
verify_citations=True,
filter_invalid_citations=True,
),
on_persist=mock_persist,
)
service.register_provider(SummarizationMode.LOCAL, provider)
service.set_verifier(verifier)
segments = [_segment(0)]
result = await service.summarize(meeting_id, segments)
assert len(persisted) == 1
# Should persist the filtered summary, not original
assert persisted[0] is result.filtered_summary
class TestSummarizationServiceResult:
"""Tests for SummarizationServiceResult."""
def test_summary_returns_filtered_when_available(self) -> None:
"""summary property should return filtered_summary if available."""
from noteflow.application.services import SummarizationServiceResult
original = Summary(
meeting_id=MeetingId(uuid4()),
executive_summary="Original",
key_points=[KeyPoint(text="Point", segment_ids=[99])],
)
filtered = Summary(
meeting_id=original.meeting_id,
executive_summary="Original",
key_points=[KeyPoint(text="Point", segment_ids=[])],
)
result = SummarizationServiceResult(
result=SummarizationResult(
summary=original,
model_name="test",
provider_name="test",
),
filtered_summary=filtered,
)
assert result.summary is filtered
def test_summary_returns_original_when_no_filter(self) -> None:
"""summary property should return original when no filter applied."""
from noteflow.application.services import SummarizationServiceResult
original = Summary(
meeting_id=MeetingId(uuid4()),
executive_summary="Original",
key_points=[],
)
result = SummarizationServiceResult(
result=SummarizationResult(
summary=original,
model_name="test",
provider_name="test",
),
)
assert result.summary is original
def test_has_invalid_citations_true(self) -> None:
"""has_invalid_citations should be True when verification fails."""
from noteflow.application.services import SummarizationServiceResult
result = SummarizationServiceResult(
result=SummarizationResult(
summary=Summary(
meeting_id=MeetingId(uuid4()),
executive_summary="Test",
key_points=[],
),
model_name="test",
provider_name="test",
),
verification=CitationVerificationResult(is_valid=False, invalid_key_point_indices=(0,)),
)
assert result.has_invalid_citations is True
def test_has_invalid_citations_false_when_valid(self) -> None:
"""has_invalid_citations should be False when verification passes."""
from noteflow.application.services import SummarizationServiceResult
result = SummarizationServiceResult(
result=SummarizationResult(
summary=Summary(
meeting_id=MeetingId(uuid4()),
executive_summary="Test",
key_points=[],
),
model_name="test",
provider_name="test",
),
verification=CitationVerificationResult(is_valid=True),
)
assert result.has_invalid_citations is False
class TestSummarizationServiceAdditionalBranches:
"""Additional branch and utility coverage."""
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Create test meeting ID."""
return MeetingId(uuid4())
def test_is_mode_available_false_when_not_registered(self) -> None:
"""is_mode_available should respect registered providers."""
service = SummarizationService()
assert service.is_mode_available(SummarizationMode.LOCAL) is False
@pytest.mark.asyncio
async def test_cloud_without_consent_and_no_fallback_raises(
self, meeting_id: MeetingId
) -> None:
"""Requesting cloud without consent should raise when fallback disabled."""
provider = MockProvider(name="cloud", available=True)
service = SummarizationService(
providers={SummarizationMode.CLOUD: provider},
settings=SummarizationServiceSettings(
default_mode=SummarizationMode.CLOUD,
cloud_consent_granted=False,
fallback_to_local=False,
),
)
with pytest.raises(ProviderUnavailableError):
await service.summarize(meeting_id, [_segment(0)], mode=SummarizationMode.CLOUD)
@pytest.mark.asyncio
async def test_no_fallback_provider_available_raises(self, meeting_id: MeetingId) -> None:
"""When no fallback provider exists, provider selection should fail."""
unavailable = MockProvider(name="cloud", available=False)
service = SummarizationService(
providers={SummarizationMode.CLOUD: unavailable},
settings=SummarizationServiceSettings(fallback_to_local=True),
)
with pytest.raises(ProviderUnavailableError):
await service.summarize(meeting_id, [_segment(0)], mode=SummarizationMode.CLOUD)
def test_filter_citations_returns_summary_when_no_verifier(self) -> None:
"""_filter_citations should return original summary when verifier is absent."""
summary = Summary(
meeting_id=MeetingId(uuid4()),
executive_summary="Exec",
generated_at=datetime.now(UTC),
)
service = SummarizationService()
result = service._filter_citations(summary, [])
assert result is summary
def test_set_default_mode_updates_settings(self) -> None:
"""set_default_mode should update default mode."""
service = SummarizationService()
service.set_default_mode(SummarizationMode.MOCK)
assert service.settings.default_mode == SummarizationMode.MOCK
def test_set_persist_callback_updates_callback(self) -> None:
"""set_persist_callback should update on_persist field."""
async def callback(summary: Summary) -> None:
pass
service = SummarizationService()
assert service.on_persist is None
service.set_persist_callback(callback)
assert service.on_persist is callback
service.set_persist_callback(None)
assert service.on_persist is None
File: tests/application/test_trigger_service.py
"""Tests for TriggerService application logic."""
from __future__ import annotations
import time
from dataclasses import dataclass
import pytest
from noteflow.application.services.trigger_service import (
TriggerService,
TriggerServiceSettings,
)
from noteflow.domain.triggers import TriggerAction, TriggerSignal, TriggerSource
@dataclass
class FakeProvider:
"""Simple signal provider for testing."""
signal: TriggerSignal | None
enabled: bool = True
calls: int = 0
@property
def source(self) -> TriggerSource:
return TriggerSource.AUDIO_ACTIVITY
@property
def max_weight(self) -> float:
return 1.0
def is_enabled(self) -> bool:
return self.enabled
def get_signal(self) -> TriggerSignal | None:
self.calls += 1
return self.signal
def _settings(
*,
enabled: bool = True,
auto_start: bool = False,
rate_limit_seconds: int = 60,
snooze_seconds: int = 30,
threshold_ignore: float = 0.2,
threshold_auto: float = 0.8,
) -> TriggerServiceSettings:
return TriggerServiceSettings(
enabled=enabled,
auto_start_enabled=auto_start,
rate_limit_seconds=rate_limit_seconds,
snooze_seconds=snooze_seconds,
threshold_ignore=threshold_ignore,
threshold_auto_start=threshold_auto,
)
def test_trigger_service_disabled_skips_providers() -> None:
"""Disabled trigger service should ignore without evaluating providers."""
provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5))
service = TriggerService([provider], settings=_settings(enabled=False))
decision = service.evaluate()
assert decision.action == TriggerAction.IGNORE
assert decision.confidence == 0.0
assert decision.signals == ()
assert provider.calls == 0
def test_trigger_service_snooze_ignores_signals(monkeypatch: pytest.MonkeyPatch) -> None:
"""Snoozed trigger service ignores signals until snooze expires."""
provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5))
service = TriggerService([provider], settings=_settings())
monkeypatch.setattr(time, "monotonic", lambda: 100.0)
service.snooze(seconds=20)
monkeypatch.setattr(time, "monotonic", lambda: 110.0)
decision = service.evaluate()
assert decision.action == TriggerAction.IGNORE
monkeypatch.setattr(time, "monotonic", lambda: 130.0)
decision = service.evaluate()
assert decision.action == TriggerAction.NOTIFY
def test_trigger_service_rate_limit(monkeypatch: pytest.MonkeyPatch) -> None:
"""TriggerService enforces rate limit between prompts."""
provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.5))
service = TriggerService([provider], settings=_settings(rate_limit_seconds=60))
monkeypatch.setattr(time, "monotonic", lambda: 100.0)
first = service.evaluate()
assert first.action == TriggerAction.NOTIFY
monkeypatch.setattr(time, "monotonic", lambda: 120.0)
second = service.evaluate()
assert second.action == TriggerAction.IGNORE
monkeypatch.setattr(time, "monotonic", lambda: 200.0)
third = service.evaluate()
assert third.action == TriggerAction.NOTIFY
def test_trigger_service_auto_start(monkeypatch: pytest.MonkeyPatch) -> None:
"""Auto-start fires when confidence passes threshold and auto-start is enabled."""
provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9))
service = TriggerService([provider], settings=_settings(auto_start=True, threshold_auto=0.8))
monkeypatch.setattr(time, "monotonic", lambda: 100.0)
decision = service.evaluate()
assert decision.action == TriggerAction.AUTO_START
def test_trigger_service_auto_start_disabled_notifies(monkeypatch: pytest.MonkeyPatch) -> None:
"""High confidence should still notify when auto-start is disabled."""
provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9))
service = TriggerService([provider], settings=_settings(auto_start=False, threshold_auto=0.8))
monkeypatch.setattr(time, "monotonic", lambda: 100.0)
decision = service.evaluate()
assert decision.action == TriggerAction.NOTIFY
def test_trigger_service_below_ignore_threshold(monkeypatch: pytest.MonkeyPatch) -> None:
"""Signals below ignore threshold should be ignored."""
provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.1))
service = TriggerService([provider], settings=_settings(threshold_ignore=0.2))
monkeypatch.setattr(time, "monotonic", lambda: 100.0)
decision = service.evaluate()
assert decision.action == TriggerAction.IGNORE
def test_trigger_service_threshold_validation() -> None:
"""Invalid threshold ordering should raise."""
with pytest.raises(ValueError, match="threshold_auto_start"):
TriggerServiceSettings(
enabled=True,
auto_start_enabled=False,
rate_limit_seconds=10,
snooze_seconds=5,
threshold_ignore=0.9,
threshold_auto_start=0.2,
)
def test_trigger_service_skips_disabled_providers() -> None:
"""Disabled providers should be skipped when evaluating."""
enabled_signal = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.3))
disabled_signal = FakeProvider(
signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.7), enabled=False
)
service = TriggerService([enabled_signal, disabled_signal], settings=_settings())
decision = service.evaluate()
assert decision.confidence == pytest.approx(0.3)
assert enabled_signal.calls == 1
assert disabled_signal.calls == 0
def test_trigger_service_snooze_state_properties(monkeypatch: pytest.MonkeyPatch) -> None:
"""is_snoozed and remaining seconds should reflect snooze window."""
service = TriggerService([], settings=_settings())
monkeypatch.setattr(time, "monotonic", lambda: 50.0)
service.snooze(seconds=10)
monkeypatch.setattr(time, "monotonic", lambda: 55.0)
assert service.is_snoozed is True
assert service.snooze_remaining_seconds == pytest.approx(5.0)
service.clear_snooze()
assert service.is_snoozed is False
assert service.snooze_remaining_seconds == 0.0
def test_trigger_service_rate_limit_with_existing_prompt(monkeypatch: pytest.MonkeyPatch) -> None:
"""Existing prompt time inside rate limit should short-circuit to IGNORE."""
provider = FakeProvider(signal=TriggerSignal(TriggerSource.AUDIO_ACTIVITY, weight=0.9))
service = TriggerService([provider], settings=_settings(rate_limit_seconds=30))
monkeypatch.setattr(time, "monotonic", lambda: 100.0)
service._last_prompt = 90.0 # Pretend we prompted 10s ago
decision = service.evaluate()
assert decision.action == TriggerAction.IGNORE
assert service.is_enabled is True
def test_trigger_service_enable_toggles() -> None:
"""set_enabled and set_auto_start should update settings."""
service = TriggerService([], settings=_settings(enabled=True, auto_start=False))
service.set_enabled(False)
assert service.is_enabled is False
service.set_auto_start(True)
assert service._settings.auto_start_enabled is True
File: tests/client/test_async_mixin.py
"""Tests for AsyncOperationMixin."""
from __future__ import annotations
import asyncio
from unittest.mock import AsyncMock, MagicMock
import pytest
from noteflow.client.components._async_mixin import AsyncOperationMixin
class ConcreteComponent(AsyncOperationMixin[str]):
"""Concrete implementation for testing."""
def __init__(self, page: MagicMock | None = None) -> None:
self._page = page
class TestAsyncOperationMixin:
"""Tests for AsyncOperationMixin."""
@pytest.fixture
def mock_page(self) -> MagicMock:
"""Create mock Flet page."""
page = MagicMock()
def _run_task(fn):
try:
loop = asyncio.get_running_loop()
return loop.create_task(fn())
except RuntimeError:
# No running loop (sync tests); run immediately
return asyncio.run(fn())
page.run_task = MagicMock(side_effect=_run_task)
return page
@pytest.fixture
def component(self, mock_page: MagicMock) -> ConcreteComponent:
"""Create component with mock page."""
return ConcreteComponent(page=mock_page)
@pytest.mark.asyncio
async def test_run_async_operation_success_calls_callbacks(
self, component: ConcreteComponent
) -> None:
"""Successful operation calls on_success and set_loading."""
operation = AsyncMock(return_value="result")
on_success = MagicMock()
on_error = MagicMock()
set_loading = MagicMock()
result = await component.run_async_operation(
operation=operation,
on_success=on_success,
on_error=on_error,
set_loading=set_loading,
)
await asyncio.sleep(0)
assert result == "result"
operation.assert_awaited_once()
on_success.assert_called_once_with("result")
on_error.assert_not_called()
# Loading: True then False
assert set_loading.call_count == 2
set_loading.assert_any_call(True)
set_loading.assert_any_call(False)
@pytest.mark.asyncio
async def test_run_async_operation_error_calls_on_error(
self, component: ConcreteComponent
) -> None:
"""Failed operation calls on_error and returns None."""
operation = AsyncMock(side_effect=ValueError("test error"))
on_success = MagicMock()
on_error = MagicMock()
set_loading = MagicMock()
result = await component.run_async_operation(
operation=operation,
on_success=on_success,
on_error=on_error,
set_loading=set_loading,
)
await asyncio.sleep(0)
assert result is None
on_success.assert_not_called()
on_error.assert_called_once_with("test error")
# Loading: True then False (finally block)
assert set_loading.call_count == 2
@pytest.mark.asyncio
async def test_run_async_operation_always_clears_loading(
self, component: ConcreteComponent
) -> None:
"""Loading state always cleared in finally block."""
operation = AsyncMock(side_effect=RuntimeError("boom"))
set_loading = MagicMock()
await component.run_async_operation(
operation=operation,
on_success=MagicMock(),
on_error=MagicMock(),
set_loading=set_loading,
)
await asyncio.sleep(0)
# Final call should be set_loading(False)
assert set_loading.call_args_list[-1][0][0] is False
def test_dispatch_ui_no_page_is_noop(self) -> None:
"""Dispatch with no page does nothing."""
component = ConcreteComponent(page=None)
callback = MagicMock()
# Should not raise
component._dispatch_ui(callback)
callback.assert_not_called()
def test_dispatch_ui_with_page_calls_run_task(
self, component: ConcreteComponent, mock_page: MagicMock
) -> None:
"""Dispatch with page calls page.run_task."""
callback = MagicMock()
component._dispatch_ui(callback)
mock_page.run_task.assert_called_once()
callback.assert_called_once()
File: tests/client/test_summary_panel.py
"""Tests for SummaryPanelComponent."""
from __future__ import annotations
from dataclasses import dataclass, field
from unittest.mock import Mock
from uuid import uuid4
import flet as ft
import pytest
from noteflow.client.components.summary_panel import (
PRIORITY_COLORS,
PRIORITY_LABELS,
SummaryPanelComponent,
)
from noteflow.domain.entities import ActionItem, KeyPoint, Summary
from noteflow.domain.value_objects import MeetingId
@dataclass
class MockAppState:
"""Minimal mock AppState for testing."""
transcript_segments: list = field(default_factory=list)
current_meeting: Mock | None = None
current_summary: Summary | None = None
summary_loading: bool = False
summary_error: str | None = None
_page: Mock | None = None
def request_update(self) -> None:
"""No-op for tests."""
def run_on_ui_thread(self, callback) -> None:
"""Execute callback immediately for tests."""
callback() if callable(callback) else None
def _create_mock_state() -> MockAppState:
"""Create mock AppState with meeting."""
state = MockAppState()
state.current_meeting = Mock()
state.current_meeting.id = str(uuid4())
return state
def _create_summary(
key_points: list[KeyPoint] | None = None,
action_items: list[ActionItem] | None = None,
) -> Summary:
"""Create test Summary."""
return Summary(
meeting_id=MeetingId(uuid4()),
executive_summary="Test executive summary.",
key_points=key_points or [],
action_items=action_items or [],
)
class TestSummaryPanelBuild:
"""Tests for SummaryPanelComponent.build()."""
def test_build_returns_container(self) -> None:
"""build() should return ft.Container."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
result = panel.build()
assert isinstance(result, ft.Container)
def test_build_initially_hidden(self) -> None:
"""Panel should be hidden by default."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
container = panel.build()
assert container.visible is False
def test_build_creates_ui_elements(self) -> None:
"""build() should create all UI elements."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
assert panel._summary_text is not None
assert panel._key_points_list is not None
assert panel._action_items_list is not None
assert panel._generate_btn is not None
assert panel._loading_indicator is not None
assert panel._error_text is not None
class TestSummaryPanelVisibility:
"""Tests for visibility control."""
def test_set_visible_shows_panel(self) -> None:
"""set_visible(True) should show panel."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel.set_visible(True)
assert panel._container is not None
assert panel._container.visible is True
def test_set_visible_hides_panel(self) -> None:
"""set_visible(False) should hide panel."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel.set_visible(True)
panel.set_visible(False)
assert panel._container is not None
assert panel._container.visible is False
class TestSummaryPanelEnabled:
"""Tests for enabled state control."""
def test_set_enabled_enables_button(self) -> None:
"""set_enabled(True) should enable generate button."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel.set_enabled(True)
assert panel._generate_btn is not None
assert panel._generate_btn.disabled is False
def test_set_enabled_disables_button(self) -> None:
"""set_enabled(False) should disable generate button."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel.set_enabled(True)
panel.set_enabled(False)
assert panel._generate_btn is not None
assert panel._generate_btn.disabled is True
class TestSummaryPanelRender:
"""Tests for rendering summary content."""
def test_render_summary_shows_executive_summary(self) -> None:
"""_render_summary should display executive summary text."""
state = _create_mock_state()
state.current_summary = _create_summary()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._render_summary()
assert panel._summary_text is not None
assert panel._summary_text.value == "Test executive summary."
def test_render_summary_populates_key_points(self) -> None:
"""_render_summary should populate key points list."""
state = _create_mock_state()
state.current_summary = _create_summary(
key_points=[
KeyPoint(text="Point 1", segment_ids=[0]),
KeyPoint(text="Point 2", segment_ids=[1]),
]
)
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._render_summary()
assert panel._key_points_list is not None
assert len(panel._key_points_list.controls) == 2
def test_render_summary_populates_action_items(self) -> None:
"""_render_summary should populate action items list."""
state = _create_mock_state()
state.current_summary = _create_summary(
action_items=[
ActionItem(text="Action 1", segment_ids=[0], priority=1),
ActionItem(text="Action 2", segment_ids=[1], priority=2, assignee="Alice"),
]
)
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._render_summary()
assert panel._action_items_list is not None
assert len(panel._action_items_list.controls) == 2
class TestCitationChips:
"""Tests for citation chip functionality."""
def test_create_citation_chip_returns_container(self) -> None:
"""_create_citation_chip should return Container."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
chip = panel._create_citation_chip(5)
assert isinstance(chip, ft.Container)
def test_citation_chip_has_correct_label(self) -> None:
"""Citation chip should display [#N] format."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
chip = panel._create_citation_chip(42)
text = chip.content
assert isinstance(text, ft.Text)
assert text.value == "[#42]"
def test_citation_chip_click_calls_callback(self) -> None:
"""Clicking citation chip should call on_citation_click."""
clicked_ids: list[int] = []
state = _create_mock_state()
panel = SummaryPanelComponent(
state,
get_service=lambda: None,
on_citation_click=lambda sid: clicked_ids.append(sid),
)
panel._handle_citation_click(7)
assert clicked_ids == [7]
def test_citation_click_no_callback_is_noop(self) -> None:
"""Citation click with no callback should not raise."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None, on_citation_click=None)
panel._handle_citation_click(5) # Should not raise
class TestPriorityBadge:
"""Tests for priority badge functionality."""
@pytest.mark.parametrize(
("priority", "expected_label"),
[
(0, "—"),
(1, "Low"),
(2, "Med"),
(3, "High"),
],
)
def test_priority_badge_labels(self, priority: int, expected_label: str) -> None:
"""Priority badge should show correct label."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
badge = panel._create_priority_badge(priority)
text = badge.content
assert isinstance(text, ft.Text)
assert text.value == expected_label
@pytest.mark.parametrize(
("priority", "expected_color"),
[
(0, ft.Colors.GREY_400),
(1, ft.Colors.BLUE_400),
(2, ft.Colors.ORANGE_400),
(3, ft.Colors.RED_400),
],
)
def test_priority_badge_colors(self, priority: int, expected_color: str) -> None:
"""Priority badge should have correct background color."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
badge = panel._create_priority_badge(priority)
assert badge.bgcolor == expected_color
class TestLoadingAndError:
"""Tests for loading and error states."""
def test_update_loading_state_shows_indicator(self) -> None:
"""Loading indicator should be visible when loading."""
state = _create_mock_state()
state.summary_loading = True
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._update_loading_state()
assert panel._loading_indicator is not None
assert panel._generate_btn is not None
assert panel._loading_indicator.visible is True
assert panel._generate_btn.disabled is True
def test_update_loading_state_hides_indicator(self) -> None:
"""Loading indicator should be hidden when not loading."""
state = _create_mock_state()
state.summary_loading = False
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
assert panel._loading_indicator is not None
panel._loading_indicator.visible = True
panel._update_loading_state()
assert not panel._loading_indicator.visible
def test_show_error_displays_message(self) -> None:
"""_show_error should display error message."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._show_error("Test error message")
assert panel._error_text is not None
assert panel._error_text.value == "Test error message"
assert panel._error_text.visible is True
def test_clear_error_hides_message(self) -> None:
"""_clear_error should hide error message."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._show_error("Error")
panel._clear_error()
assert panel._error_text is not None
assert panel._error_text.value == ""
assert panel._error_text.visible is False
class TestPriorityConstants:
"""Tests for priority constant values."""
def test_priority_colors_has_all_levels(self) -> None:
"""PRIORITY_COLORS should have entries for all priority levels."""
assert 0 in PRIORITY_COLORS
assert 1 in PRIORITY_COLORS
assert 2 in PRIORITY_COLORS
assert 3 in PRIORITY_COLORS
def test_priority_labels_has_all_levels(self) -> None:
"""PRIORITY_LABELS should have entries for all priority levels."""
assert 0 in PRIORITY_LABELS
assert 1 in PRIORITY_LABELS
assert 2 in PRIORITY_LABELS
assert 3 in PRIORITY_LABELS
class TestUncitedDraftsToggle:
"""Tests for uncited drafts toggle functionality."""
def test_build_creates_toggle_ui(self) -> None:
"""build() should create uncited toggle and count text."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
assert panel._uncited_toggle is not None
assert panel._uncited_count_text is not None
def test_toggle_initially_hidden(self) -> None:
"""Uncited toggle should be hidden by default."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
assert panel._uncited_toggle is not None
assert panel._uncited_toggle.visible is False
def test_calculate_uncited_counts_with_no_summaries(self) -> None:
"""Uncited counts should be zero when no summaries."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel._calculate_uncited_counts()
assert panel._uncited_key_points == 0
assert panel._uncited_action_items == 0
def test_calculate_uncited_counts_with_filtered_items(self) -> None:
"""Uncited counts should reflect difference between original and filtered."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
# Original has 3 key points
panel._original_summary = _create_summary(
key_points=[
KeyPoint(text="Point 1", segment_ids=[0]),
KeyPoint(text="Point 2", segment_ids=[1]),
KeyPoint(text="Point 3", segment_ids=[]), # uncited
],
action_items=[
ActionItem(text="Action 1", segment_ids=[0]),
ActionItem(text="Action 2", segment_ids=[]), # uncited
],
)
# Filtered has 2 key points (1 filtered out)
panel._filtered_summary = _create_summary(
key_points=[
KeyPoint(text="Point 1", segment_ids=[0]),
KeyPoint(text="Point 2", segment_ids=[1]),
],
action_items=[
ActionItem(text="Action 1", segment_ids=[0]),
],
)
panel._calculate_uncited_counts()
assert panel._uncited_key_points == 1
assert panel._uncited_action_items == 1
def test_has_uncited_items_true_when_filtered(self) -> None:
"""_has_uncited_items should return True when items filtered."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel._uncited_key_points = 2
panel._uncited_action_items = 0
assert panel._has_uncited_items() is True
def test_has_uncited_items_false_when_none_filtered(self) -> None:
"""_has_uncited_items should return False when nothing filtered."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel._uncited_key_points = 0
panel._uncited_action_items = 0
assert panel._has_uncited_items() is False
def test_update_uncited_ui_shows_toggle_when_uncited(self) -> None:
"""Toggle should be visible when uncited items exist."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._uncited_key_points = 2
panel._uncited_action_items = 1
panel._update_uncited_ui()
assert panel._uncited_toggle is not None
assert panel._uncited_toggle.visible is True
def test_update_uncited_ui_hides_toggle_when_no_uncited(self) -> None:
"""Toggle should be hidden when no uncited items."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._uncited_key_points = 0
panel._uncited_action_items = 0
panel._update_uncited_ui()
assert panel._uncited_toggle is not None
assert panel._uncited_toggle.visible is False
def test_update_uncited_ui_shows_count_text(self) -> None:
"""Count text should show total uncited when toggle is off."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._uncited_key_points = 2
panel._uncited_action_items = 3
panel._show_uncited = False
panel._update_uncited_ui()
assert panel._uncited_count_text is not None
assert panel._uncited_count_text.visible is True
assert panel._uncited_count_text.value == "(5 hidden)"
def test_update_uncited_ui_hides_count_when_showing_uncited(self) -> None:
"""Count text should be hidden when showing uncited items."""
state = _create_mock_state()
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._uncited_key_points = 2
panel._uncited_action_items = 0
panel._show_uncited = True
panel._update_uncited_ui()
assert panel._uncited_count_text is not None
assert panel._uncited_count_text.visible is False
def test_get_display_summary_returns_original_when_toggled(self) -> None:
"""_get_display_summary should return original when showing uncited."""
state = _create_mock_state()
original = _create_summary(key_points=[KeyPoint(text="Original", segment_ids=[])])
filtered = _create_summary(key_points=[])
state.current_summary = filtered
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel._original_summary = original
panel._filtered_summary = filtered
panel._show_uncited = True
result = panel._get_display_summary()
assert result is original
def test_get_display_summary_returns_current_when_not_toggled(self) -> None:
"""_get_display_summary should return current_summary when toggle off."""
state = _create_mock_state()
original = _create_summary(key_points=[KeyPoint(text="Original", segment_ids=[])])
filtered = _create_summary(key_points=[])
state.current_summary = filtered
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel._original_summary = original
panel._filtered_summary = filtered
panel._show_uncited = False
result = panel._get_display_summary()
assert result is filtered
def test_render_summary_switches_on_toggle(self) -> None:
"""Rendering should switch content based on toggle state."""
state = _create_mock_state()
original = _create_summary(
key_points=[
KeyPoint(text="Point 1", segment_ids=[0]),
KeyPoint(text="Uncited", segment_ids=[]),
]
)
filtered = _create_summary(key_points=[KeyPoint(text="Point 1", segment_ids=[0])])
state.current_summary = filtered
panel = SummaryPanelComponent(state, get_service=lambda: None)
panel.build()
panel._original_summary = original
panel._filtered_summary = filtered
panel._uncited_key_points = 1
# First render with toggle off
panel._show_uncited = False
panel._render_summary()
assert panel._key_points_list is not None
assert len(panel._key_points_list.controls) == 1
# Toggle on and re-render
panel._show_uncited = True
panel._render_summary()
assert len(panel._key_points_list.controls) == 2
File: tests/client/test_transcript_component.py
"""Tests for TranscriptComponent including partial rendering."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TYPE_CHECKING
from unittest.mock import MagicMock
import flet as ft
if TYPE_CHECKING:
from collections.abc import Callable
from noteflow.client.components.transcript import TranscriptComponent
@dataclass
class MockTranscriptSegment:
"""Mock TranscriptSegment for testing."""
text: str
start_time: float
end_time: float
is_final: bool = True
speaker_id: str = ""
speaker_confidence: float = 0.0
@dataclass
class MockServerInfo:
"""Mock ServerInfo for testing."""
version: str = "1.0.0"
asr_model: str = "base"
asr_ready: bool = True
active_meetings: int = 0
@dataclass
class MockAppState:
"""Minimal mock AppState for testing transcript component."""
transcript_segments: list[MockTranscriptSegment] = field(default_factory=list)
current_partial_text: str = ""
_page: MagicMock | None = None
def request_update(self) -> None:
"""No-op for tests."""
def run_on_ui_thread(self, callback: Callable[[], None]) -> None:
"""Execute callback immediately for tests."""
callback()
def clear_transcript(self) -> None:
"""Clear transcript segments and partial text."""
self.transcript_segments.clear()
self.current_partial_text = ""
class TestTranscriptComponentBuild:
"""Tests for TranscriptComponent.build()."""
def test_build_returns_column(self) -> None:
"""build() should return ft.Column."""
state = MockAppState()
component = TranscriptComponent(state)
result = component.build()
assert isinstance(result, ft.Column)
def test_build_creates_search_field(self) -> None:
"""build() should create search field."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
assert component._search_field is not None
assert isinstance(component._search_field, ft.TextField)
def test_build_creates_list_view(self) -> None:
"""build() should create ListView."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
assert component._list_view is not None
assert isinstance(component._list_view, ft.ListView)
class TestTranscriptPartialRendering:
"""Tests for partial transcript rendering."""
def test_add_partial_segment_updates_state(self) -> None:
"""Adding partial segment should update state partial text."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
partial = MockTranscriptSegment(
text="Hello, I am speaking...",
start_time=0.0,
end_time=1.0,
is_final=False,
)
component.add_segment(partial)
assert state.current_partial_text == "Hello, I am speaking..."
def test_add_partial_creates_partial_row(self) -> None:
"""Adding partial segment should create partial row in ListView."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
partial = MockTranscriptSegment(
text="Speaking now...",
start_time=0.0,
end_time=1.0,
is_final=False,
)
component.add_segment(partial)
assert component._partial_row is not None
assert component._list_view is not None
assert component._partial_row in component._list_view.controls
def test_partial_row_has_live_indicator(self) -> None:
"""Partial row should contain [LIVE] indicator."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
partial = MockTranscriptSegment(
text="Testing...",
start_time=0.0,
end_time=1.0,
is_final=False,
)
component.add_segment(partial)
# Check that partial row content contains LIVE indicator
assert component._partial_row is not None
partial_content = component._partial_row.content
assert isinstance(partial_content, ft.Row)
# First element should be the LIVE text
live_text = partial_content.controls[0]
assert isinstance(live_text, ft.Text)
assert live_text.value is not None
assert "[LIVE]" in live_text.value
def test_partial_row_has_italic_styling(self) -> None:
"""Partial row text should be italicized."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
partial = MockTranscriptSegment(
text="Testing...",
start_time=0.0,
end_time=1.0,
is_final=False,
)
component.add_segment(partial)
assert component._partial_row is not None
partial_content = component._partial_row.content
assert isinstance(partial_content, ft.Row)
text_element = partial_content.controls[1]
assert isinstance(text_element, ft.Text)
assert text_element.italic is True
def test_partial_row_updated_on_new_partial(self) -> None:
"""Subsequent partials should update existing row, not create new."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
# First partial
component.add_segment(
MockTranscriptSegment(text="First", start_time=0.0, end_time=1.0, is_final=False)
)
first_row = component._partial_row
assert component._list_view is not None
initial_count = len(component._list_view.controls)
# Second partial
component.add_segment(
MockTranscriptSegment(text="Second", start_time=1.0, end_time=2.0, is_final=False)
)
# Should update same row, not add new
assert component._partial_row is first_row
assert component._list_view is not None
assert len(component._list_view.controls) == initial_count
class TestTranscriptFinalSegment:
"""Tests for final segment handling."""
def test_add_final_segment_clears_partial_text(self) -> None:
"""Adding final segment should clear partial text state."""
state = MockAppState()
state.current_partial_text = "Partial text..."
component = TranscriptComponent(state)
component.build()
final = MockTranscriptSegment(
text="Final transcript.",
start_time=0.0,
end_time=2.0,
is_final=True,
)
component.add_segment(final)
assert not state.current_partial_text
def test_add_final_removes_partial_row(self) -> None:
"""Adding final segment should remove partial row."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
# Add partial first
partial = MockTranscriptSegment(
text="Speaking...",
start_time=0.0,
end_time=1.0,
is_final=False,
)
component.add_segment(partial)
assert component._partial_row is not None
# Add final
final = MockTranscriptSegment(
text="Final text.",
start_time=0.0,
end_time=2.0,
is_final=True,
)
component.add_segment(final)
# Partial row should be removed
assert component._partial_row is None
def test_add_final_appends_to_segments(self) -> None:
"""Adding final segment should append to state transcript_segments."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
final = MockTranscriptSegment(
text="Final text.",
start_time=0.0,
end_time=2.0,
is_final=True,
)
component.add_segment(final)
assert len(state.transcript_segments) == 1
assert state.transcript_segments[0].text == "Final text."
class TestTranscriptClear:
"""Tests for transcript clearing."""
def test_clear_removes_partial_row(self) -> None:
"""clear() should remove partial row."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
# Add partial
partial = MockTranscriptSegment(
text="Partial...",
start_time=0.0,
end_time=1.0,
is_final=False,
)
component.add_segment(partial)
component.clear()
assert component._partial_row is None
def test_clear_empties_list_view(self) -> None:
"""clear() should empty ListView controls."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
# Add some segments
component.add_segment(
MockTranscriptSegment(text="First", start_time=0.0, end_time=1.0, is_final=True)
)
component.add_segment(
MockTranscriptSegment(text="Second", start_time=1.0, end_time=2.0, is_final=True)
)
component.clear()
assert component._list_view is not None
assert len(component._list_view.controls) == 0
def test_clear_clears_search_field(self) -> None:
"""clear() should clear search field."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
assert component._search_field is not None
component._search_field.value = "test query"
component.clear()
assert component._search_field is not None
assert not component._search_field.value
class TestTranscriptSearch:
"""Tests for transcript search functionality."""
def test_search_filters_segments(self) -> None:
"""Search should filter visible segments."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
# Add segments to state
state.transcript_segments = [
MockTranscriptSegment(text="Hello world", start_time=0.0, end_time=1.0),
MockTranscriptSegment(text="Goodbye world", start_time=1.0, end_time=2.0),
MockTranscriptSegment(text="Something else", start_time=2.0, end_time=3.0),
]
# Simulate search
component._search_query = "world"
component._rerender_all_segments()
# Should only show segments containing "world"
visible_count = sum(row is not None for row in component._segment_rows)
assert visible_count == 2
def test_search_is_case_insensitive(self) -> None:
"""Search should be case-insensitive."""
state = MockAppState()
component = TranscriptComponent(state)
component.build()
state.transcript_segments = [
MockTranscriptSegment(text="Hello WORLD", start_time=0.0, end_time=1.0),
MockTranscriptSegment(text="something else", start_time=1.0, end_time=2.0),
]
component._search_query = "world"
component._rerender_all_segments()
visible_count = sum(row is not None for row in component._segment_rows)
assert visible_count == 1
class TestTranscriptSegmentClick:
"""Tests for segment click handling."""
def test_click_callback_receives_segment_index(self) -> None:
"""Clicking segment should call callback with segment index."""
clicked_indices: list[int] = []
state = MockAppState()
component = TranscriptComponent(
state,
on_segment_click=lambda idx: clicked_indices.append(idx),
)
component.build()
component._handle_click(5)
assert clicked_indices == [5]
def test_click_without_callback_is_noop(self) -> None:
"""Click without callback should not raise."""
state = MockAppState()
component = TranscriptComponent(state, on_segment_click=None)
component.build()
component._handle_click(3) # Should not raise
File: tests/infrastructure/asr/test_engine.py
"""Tests for FasterWhisperEngine behavior without loading models."""
from __future__ import annotations
import sys
import types
import numpy as np
import pytest
from noteflow.infrastructure.asr.engine import FasterWhisperEngine
class TestFasterWhisperEngine:
"""Tests for FasterWhisperEngine."""
def test_transcribe_without_load_raises(self) -> None:
"""Calling transcribe before load_model raises RuntimeError."""
engine = FasterWhisperEngine()
audio = np.zeros(1600, dtype=np.float32)
with pytest.raises(RuntimeError, match="Model not loaded"):
list(engine.transcribe(audio))
def test_load_invalid_model_size_raises(self) -> None:
"""Invalid model size raises ValueError when faster-whisper is available."""
pytest.importorskip("faster_whisper")
engine = FasterWhisperEngine()
with pytest.raises(ValueError, match="Invalid model size"):
engine.load_model(model_size="not-a-model")
def test_load_model_with_stub_sets_state(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""load_model should set model and size when stubbed module is present."""
class DummyModel:
def __init__(
self, model_size: str, device: str, compute_type: str, num_workers: int
) -> None:
self.args = (model_size, device, compute_type, num_workers)
fake_module = types.SimpleNamespace(WhisperModel=DummyModel)
monkeypatch.setitem(sys.modules, "faster_whisper", fake_module)
engine = FasterWhisperEngine(compute_type="float32", device="cpu", num_workers=2)
engine.load_model(model_size="base")
assert engine.is_loaded is True
assert engine.model_size == "base"
assert engine._model.args == ("base", "cpu", "float32", 2) # type: ignore[attr-defined]
def test_load_model_wraps_errors(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""load_model should surface model construction errors as RuntimeError."""
class FailingModel:
def __init__(self, *_: object, **__: object) -> None:
raise ValueError("boom")
fake_module = types.SimpleNamespace(WhisperModel=FailingModel)
monkeypatch.setitem(sys.modules, "faster_whisper", fake_module)
engine = FasterWhisperEngine()
with pytest.raises(RuntimeError, match="Failed to load model"):
engine.load_model(model_size="base")
def test_transcribe_with_stubbed_model(self) -> None:
"""transcribe should yield AsrResult objects when model is preset."""
engine = FasterWhisperEngine()
class DummyWord:
def __init__(self) -> None:
self.word = "hi"
self.start = 0.0
self.end = 0.5
self.probability = 0.9
class DummySegment:
def __init__(self) -> None:
self.text = " hi "
self.start = 0.0
self.end = 1.0
self.words = [DummyWord()]
self.avg_logprob = -0.1
self.no_speech_prob = 0.01
class DummyInfo:
language = "en"
language_probability = 0.95
class DummyModel:
def transcribe(self, audio: np.ndarray, **_: object):
return [DummySegment()], DummyInfo()
engine._model = DummyModel()
engine._model_size = "base"
audio = np.zeros(1600, dtype=np.float32)
results = list(engine.transcribe(audio))
assert len(results) == 1
first = results[0]
assert first.text == "hi"
assert first.words[0].word == "hi"
assert engine.is_loaded is True
engine.unload()
assert engine.is_loaded is False
File: tests/infrastructure/audio/test_capture.py
"""Tests for SoundDeviceCapture."""
from __future__ import annotations
from types import SimpleNamespace
from typing import TYPE_CHECKING
import numpy as np
import pytest
from noteflow.infrastructure.audio import SoundDeviceCapture
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
class TestSoundDeviceCapture:
"""Tests for SoundDeviceCapture class."""
@pytest.fixture
def capture(self) -> SoundDeviceCapture:
"""Create SoundDeviceCapture instance."""
return SoundDeviceCapture()
def test_init_defaults(self, capture: SoundDeviceCapture) -> None:
"""Test capture initializes with correct defaults."""
assert capture.sample_rate == 16000
assert capture.channels == 1
assert capture.current_device_id is None
def test_is_capturing_initially_false(self, capture: SoundDeviceCapture) -> None:
"""Test is_capturing returns False when not started."""
assert capture.is_capturing() is False
def test_list_devices_returns_list(self, capture: SoundDeviceCapture) -> None:
"""Test list_devices returns a list (may be empty in CI)."""
devices = capture.list_devices()
assert isinstance(devices, list)
def test_get_default_device_returns_device_or_none(self, capture: SoundDeviceCapture) -> None:
"""Test get_default_device returns device info or None."""
device = capture.get_default_device()
# May be None in CI environments without audio
if device is not None:
assert device.device_id >= 0
assert isinstance(device.name, str)
assert device.channels > 0
def test_stop_when_not_capturing_is_safe(self, capture: SoundDeviceCapture) -> None:
"""Test stop() is safe to call when not capturing."""
# Should not raise
capture.stop()
assert capture.is_capturing() is False
def test_start_when_already_capturing_raises(self, capture: SoundDeviceCapture) -> None:
"""Test start() raises if already capturing.
Note: This test may be skipped in CI without audio devices.
"""
devices = capture.list_devices()
if not devices:
pytest.skip("No audio devices available")
def dummy_callback(frames: NDArray[np.float32], timestamp: float) -> None:
pass
try:
capture.start(
device_id=None,
on_frames=dummy_callback,
sample_rate=16000,
channels=1,
)
# Second start should raise
with pytest.raises(RuntimeError, match="Already capturing"):
capture.start(
device_id=None,
on_frames=dummy_callback,
)
finally:
capture.stop()
def test_properties_after_start(self, capture: SoundDeviceCapture) -> None:
"""Test properties reflect configured values after start.
Note: This test may be skipped in CI without audio devices.
"""
devices = capture.list_devices()
if not devices:
pytest.skip("No audio devices available")
def dummy_callback(frames: NDArray[np.float32], timestamp: float) -> None:
pass
try:
capture.start(
device_id=None,
on_frames=dummy_callback,
sample_rate=44100,
channels=1,
)
assert capture.sample_rate == 44100
assert capture.channels == 1
assert capture.is_capturing() is True
finally:
capture.stop()
def test_start_with_stubbed_stream_invokes_callback(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""start should configure and invoke callback when stream is stubbed."""
captured: list[np.ndarray] = []
class DummyStream:
def __init__(self, *, callback, **_: object) -> None:
self.callback = callback
self.active = False
def start(self) -> None:
self.active = True
data = np.zeros((4, 1), dtype=np.float32)
self.callback(data, len(data), None, 0)
def stop(self) -> None:
self.active = False
def close(self) -> None:
self.active = False
monkeypatch.setattr(
"noteflow.infrastructure.audio.capture.sd.InputStream",
DummyStream,
)
monkeypatch.setattr(
"noteflow.infrastructure.audio.capture.sd.PortAudioError",
RuntimeError,
)
monkeypatch.setattr(
"noteflow.infrastructure.audio.capture.sd.CallbackFlags",
int,
)
monkeypatch.setattr(
"noteflow.infrastructure.audio.capture.sd.query_devices",
lambda: [{"name": "Mic", "max_input_channels": 1, "default_samplerate": 16000}],
)
monkeypatch.setattr(
"noteflow.infrastructure.audio.capture.sd.default",
SimpleNamespace(device=(0, 1)),
)
def on_frames(frames: NDArray[np.float32], timestamp: float) -> None: # type: ignore[name-defined]
captured.append(frames)
assert isinstance(timestamp, float)
capture = SoundDeviceCapture()
capture.start(device_id=None, on_frames=on_frames, sample_rate=16000, channels=1)
assert captured, "callback should have been invoked"
assert capture.is_capturing() is True
capture.stop()
assert capture.is_capturing() is False
def test_start_wraps_portaudio_error(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""PortAudio errors should be converted to RuntimeError."""
class DummyError(Exception): ...
def failing_stream(**_: object) -> None:
raise DummyError("boom")
monkeypatch.setattr("noteflow.infrastructure.audio.capture.sd.InputStream", failing_stream)
monkeypatch.setattr("noteflow.infrastructure.audio.capture.sd.PortAudioError", DummyError)
capture = SoundDeviceCapture()
with pytest.raises(RuntimeError, match="Failed to start audio capture"):
capture.start(device_id=None, on_frames=lambda *_: None)
File: tests/infrastructure/summarization/test_cloud_provider.py
"""Tests for cloud summarization provider."""
from __future__ import annotations
import json
import sys
import types
from typing import Any
from uuid import uuid4
import pytest
from noteflow.domain.entities import Segment
from noteflow.domain.summarization import (
InvalidResponseError,
ProviderUnavailableError,
SummarizationRequest,
)
from noteflow.domain.value_objects import MeetingId
from noteflow.infrastructure.summarization import CloudBackend
def _segment(
segment_id: int,
text: str,
start: float = 0.0,
end: float = 5.0,
) -> Segment:
"""Create a test segment."""
return Segment(
segment_id=segment_id,
text=text,
start_time=start,
end_time=end,
)
def _valid_json_response(
summary: str = "Test summary.",
key_points: list[dict[str, Any]] | None = None,
action_items: list[dict[str, Any]] | None = None,
) -> str:
"""Build a valid JSON response string."""
return json.dumps(
{
"executive_summary": summary,
"key_points": key_points or [],
"action_items": action_items or [],
}
)
class TestCloudSummarizerProperties:
"""Tests for CloudSummarizer properties."""
def test_provider_name_openai(self) -> None:
"""Provider name should be 'openai' for OpenAI backend."""
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(backend=CloudBackend.OPENAI)
assert summarizer.provider_name == "openai"
def test_provider_name_anthropic(self) -> None:
"""Provider name should be 'anthropic' for Anthropic backend."""
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC)
assert summarizer.provider_name == "anthropic"
def test_requires_cloud_consent_true(self) -> None:
"""Cloud providers should require consent."""
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer()
assert summarizer.requires_cloud_consent is True
def test_is_available_with_api_key(self) -> None:
"""is_available should be True when API key is provided."""
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="test-key")
assert summarizer.is_available is True
def test_is_available_without_api_key(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""is_available should be False without API key or env var."""
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer()
assert summarizer.is_available is False
def test_is_available_with_openai_env_var(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""is_available should be True with OPENAI_API_KEY env var."""
monkeypatch.setenv("OPENAI_API_KEY", "sk-test")
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(backend=CloudBackend.OPENAI)
assert summarizer.is_available is True
def test_is_available_with_anthropic_env_var(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""is_available should be True with ANTHROPIC_API_KEY env var."""
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test")
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC)
assert summarizer.is_available is True
def test_default_model_openai(self) -> None:
"""Default model for OpenAI should be gpt-4o-mini."""
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(backend=CloudBackend.OPENAI)
assert summarizer._model == "gpt-4o-mini"
def test_default_model_anthropic(self) -> None:
"""Default model for Anthropic should be claude-3-haiku."""
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(backend=CloudBackend.ANTHROPIC)
assert summarizer._model == "claude-3-haiku-20240307"
def test_custom_model(self) -> None:
"""Custom model should override default."""
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(model="gpt-4-turbo")
assert summarizer._model == "gpt-4-turbo"
def test_openai_base_url_is_passed(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""OPENAI_BASE_URL should be forwarded to the client when provided."""
captured = {}
def fake_openai_client(**kwargs: Any) -> types.SimpleNamespace:
captured.update(kwargs)
return types.SimpleNamespace(
chat=types.SimpleNamespace(
completions=types.SimpleNamespace(
create=lambda **_: types.SimpleNamespace(
choices=[
types.SimpleNamespace(
message=types.SimpleNamespace(content=_valid_json_response())
)
],
usage=None,
)
)
)
)
mock_module = types.ModuleType("openai")
mock_module.OpenAI = fake_openai_client
monkeypatch.setitem(sys.modules, "openai", mock_module)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(
api_key="key", backend=CloudBackend.OPENAI, base_url="https://custom"
)
# Trigger client creation
_ = summarizer._get_openai_client()
assert captured.get("base_url") == "https://custom"
class TestCloudSummarizerOpenAI:
"""Tests for CloudSummarizer with OpenAI backend."""
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Create test meeting ID."""
return MeetingId(uuid4())
@pytest.fixture
def mock_openai(self, monkeypatch: pytest.MonkeyPatch) -> types.ModuleType:
"""Mock openai module."""
def create_response(content: str, tokens: int = 100) -> types.SimpleNamespace:
"""Create mock OpenAI response."""
return types.SimpleNamespace(
choices=[types.SimpleNamespace(message=types.SimpleNamespace(content=content))],
usage=types.SimpleNamespace(total_tokens=tokens),
)
mock_client = types.SimpleNamespace(
chat=types.SimpleNamespace(
completions=types.SimpleNamespace(
create=lambda **_: create_response(_valid_json_response())
)
)
)
mock_module = types.ModuleType("openai")
mock_module.OpenAI = lambda **_: mock_client
monkeypatch.setitem(sys.modules, "openai", mock_module)
return mock_module
@pytest.mark.asyncio
async def test_summarize_empty_segments(
self, meeting_id: MeetingId, mock_openai: types.ModuleType
) -> None:
"""Empty segments should return empty summary."""
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="test-key")
request = SummarizationRequest(meeting_id=meeting_id, segments=[])
result = await summarizer.summarize(request)
assert result.summary.key_points == []
assert result.summary.action_items == []
@pytest.mark.asyncio
async def test_summarize_returns_result(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Summarize should return SummarizationResult."""
response_content = _valid_json_response(
summary="Project meeting summary.",
key_points=[{"text": "Key point", "segment_ids": [0]}],
action_items=[{"text": "Action", "assignee": "Bob", "priority": 1, "segment_ids": [1]}],
)
def create_response(**_: Any) -> types.SimpleNamespace:
return types.SimpleNamespace(
choices=[
types.SimpleNamespace(message=types.SimpleNamespace(content=response_content))
],
usage=types.SimpleNamespace(total_tokens=150),
)
mock_client = types.SimpleNamespace(
chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response))
)
mock_module = types.ModuleType("openai")
mock_module.OpenAI = lambda **_: mock_client
monkeypatch.setitem(sys.modules, "openai", mock_module)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.OPENAI)
segments = [_segment(0, "Key point"), _segment(1, "Action item")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert result.provider_name == "openai"
assert result.summary.executive_summary == "Project meeting summary."
assert result.tokens_used == 150
@pytest.mark.asyncio
async def test_raises_unavailable_on_auth_error(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Should raise ProviderUnavailableError on auth failure."""
def raise_auth_error(**_: Any) -> None:
raise ValueError("Invalid API key provided")
mock_client = types.SimpleNamespace(
chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=raise_auth_error))
)
mock_module = types.ModuleType("openai")
mock_module.OpenAI = lambda **_: mock_client
monkeypatch.setitem(sys.modules, "openai", mock_module)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="bad-key")
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
with pytest.raises(ProviderUnavailableError, match="authentication failed"):
await summarizer.summarize(request)
@pytest.mark.asyncio
async def test_raises_invalid_response_on_empty_content(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Should raise InvalidResponseError on empty response."""
def create_empty_response(**_: Any) -> types.SimpleNamespace:
return types.SimpleNamespace(
choices=[types.SimpleNamespace(message=types.SimpleNamespace(content=""))],
usage=None,
)
mock_client = types.SimpleNamespace(
chat=types.SimpleNamespace(
completions=types.SimpleNamespace(create=create_empty_response)
)
)
mock_module = types.ModuleType("openai")
mock_module.OpenAI = lambda **_: mock_client
monkeypatch.setitem(sys.modules, "openai", mock_module)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="test-key")
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
with pytest.raises(InvalidResponseError, match="Empty response"):
await summarizer.summarize(request)
class TestCloudSummarizerAnthropic:
"""Tests for CloudSummarizer with Anthropic backend."""
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Create test meeting ID."""
return MeetingId(uuid4())
@pytest.mark.asyncio
async def test_summarize_returns_result(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Summarize should return SummarizationResult."""
response_content = _valid_json_response(
summary="Anthropic summary.",
key_points=[{"text": "Point", "segment_ids": [0]}],
)
def create_response(**_: Any) -> types.SimpleNamespace:
return types.SimpleNamespace(
content=[types.SimpleNamespace(text=response_content)],
usage=types.SimpleNamespace(input_tokens=50, output_tokens=100),
)
mock_client = types.SimpleNamespace(messages=types.SimpleNamespace(create=create_response))
mock_module = types.ModuleType("anthropic")
mock_module.Anthropic = lambda **_: mock_client
monkeypatch.setitem(sys.modules, "anthropic", mock_module)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.ANTHROPIC)
segments = [_segment(0, "Test point")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert result.provider_name == "anthropic"
assert result.summary.executive_summary == "Anthropic summary."
assert result.tokens_used == 150
@pytest.mark.asyncio
async def test_raises_unavailable_when_package_missing(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Should raise ProviderUnavailableError when package not installed."""
monkeypatch.delitem(sys.modules, "anthropic", raising=False)
import builtins
original_import = builtins.__import__
def mock_import(name: str, *args: Any, **kwargs: Any) -> Any:
if name == "anthropic":
raise ImportError("No module named 'anthropic'")
return original_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", mock_import)
from noteflow.infrastructure.summarization import cloud_provider
summarizer = cloud_provider.CloudSummarizer(
api_key="test-key", backend=CloudBackend.ANTHROPIC
)
summarizer._client = None
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
with pytest.raises(ProviderUnavailableError, match="anthropic package"):
await summarizer.summarize(request)
@pytest.mark.asyncio
async def test_raises_invalid_response_on_empty_content(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Should raise InvalidResponseError on empty response."""
def create_empty_response(**_: Any) -> types.SimpleNamespace:
return types.SimpleNamespace(
content=[],
usage=types.SimpleNamespace(input_tokens=10, output_tokens=0),
)
mock_client = types.SimpleNamespace(
messages=types.SimpleNamespace(create=create_empty_response)
)
mock_module = types.ModuleType("anthropic")
mock_module.Anthropic = lambda **_: mock_client
monkeypatch.setitem(sys.modules, "anthropic", mock_module)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="test-key", backend=CloudBackend.ANTHROPIC)
segments = [_segment(0, "Test")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
with pytest.raises(InvalidResponseError, match="Empty response"):
await summarizer.summarize(request)
class TestCloudSummarizerFiltering:
"""Tests for response filtering in CloudSummarizer."""
@pytest.fixture
def meeting_id(self) -> MeetingId:
"""Create test meeting ID."""
return MeetingId(uuid4())
@pytest.mark.asyncio
async def test_filters_invalid_segment_ids(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Invalid segment_ids should be filtered from response."""
response_content = _valid_json_response(
summary="Test",
key_points=[{"text": "Point", "segment_ids": [0, 99, 100]}],
)
def create_response(**_: Any) -> types.SimpleNamespace:
return types.SimpleNamespace(
choices=[
types.SimpleNamespace(message=types.SimpleNamespace(content=response_content))
],
usage=None,
)
mock_client = types.SimpleNamespace(
chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response))
)
mock_module = types.ModuleType("openai")
mock_module.OpenAI = lambda **_: mock_client
monkeypatch.setitem(sys.modules, "openai", mock_module)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="test-key")
segments = [_segment(0, "Only valid segment")]
request = SummarizationRequest(meeting_id=meeting_id, segments=segments)
result = await summarizer.summarize(request)
assert result.summary.key_points[0].segment_ids == [0]
@pytest.mark.asyncio
async def test_respects_max_limits(
self, meeting_id: MeetingId, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Max limits should truncate response items."""
response_content = _valid_json_response(
summary="Test",
key_points=[{"text": f"Point {i}", "segment_ids": [0]} for i in range(10)],
action_items=[{"text": f"Action {i}", "segment_ids": [0]} for i in range(10)],
)
def create_response(**_: Any) -> types.SimpleNamespace:
return types.SimpleNamespace(
choices=[
types.SimpleNamespace(message=types.SimpleNamespace(content=response_content))
],
usage=None,
)
mock_client = types.SimpleNamespace(
chat=types.SimpleNamespace(completions=types.SimpleNamespace(create=create_response))
)
mock_module = types.ModuleType("openai")
mock_module.OpenAI = lambda **_: mock_client
monkeypatch.setitem(sys.modules, "openai", mock_module)
from noteflow.infrastructure.summarization import CloudSummarizer
summarizer = CloudSummarizer(api_key="test-key")
segments = [_segment(0, "Test")]
request = SummarizationRequest(
meeting_id=meeting_id,
segments=segments,
max_key_points=2,
max_action_items=3,
)
result = await summarizer.summarize(request)
assert len(result.summary.key_points) == 2
assert len(result.summary.action_items) == 3
File: tests/integration/conftest.py
"""Pytest fixtures for integration tests."""
from __future__ import annotations
import time
from collections.abc import AsyncGenerator
from importlib import import_module
from typing import TYPE_CHECKING
from urllib.parse import quote
import pytest
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
if TYPE_CHECKING:
from collections.abc import Self
from noteflow.infrastructure.persistence.models import Base
# Store container reference at module level to reuse
class PgTestContainer:
"""Minimal Postgres testcontainer wrapper with custom readiness wait."""
def __init__(
self,
image: str = "pgvector/pgvector:pg16",
username: str = "test",
password: str = "test",
dbname: str = "noteflow_test",
port: int = 5432,
) -> None:
self.username = username
self.password = password
self.dbname = dbname
self.port = port
container_module = import_module("testcontainers.core.container")
docker_container_cls = container_module.DockerContainer
self._container = (
docker_container_cls(image)
.with_env("POSTGRES_USER", username)
.with_env("POSTGRES_PASSWORD", password)
.with_env("POSTGRES_DB", dbname)
.with_exposed_ports(port)
)
def start(self) -> Self:
"""Start the container."""
self._container.start()
self._wait_until_ready()
return self
def stop(self) -> None:
"""Stop the container."""
self._container.stop()
def get_connection_url(self) -> str:
"""Return a SQLAlchemy-style connection URL."""
host = self._container.get_container_host_ip()
port = self._container._get_exposed_port(self.port)
quoted_password = quote(self.password, safe=" +")
return f"postgresql+psycopg2://{self.username}:{quoted_password}@{host}:{port}/{self.dbname}"
def _wait_until_ready(self, timeout: float = 30.0, interval: float = 0.5) -> None:
"""Wait for Postgres to accept connections by running a simple query."""
start_time = time.time()
escaped_password = self.password.replace("'", "'\"'\"'")
cmd = [
"sh",
"-c",
(
f"PGPASSWORD='{escaped_password}' "
f"psql --username {self.username} --dbname {self.dbname} --host 127.0.0.1 "
"-c 'select 1;'"
),
]
last_error: str | None = None
while True:
result = self._container.exec(cmd)
if result.exit_code == 0:
return
if result.output:
last_error = result.output.decode(errors="ignore")
if time.time() - start_time > timeout:
raise TimeoutError(
"Postgres container did not become ready in time"
+ (f": {last_error}" if last_error else "")
)
time.sleep(interval)
_container: PgTestContainer | None = None
_database_url: str | None = None
def get_or_create_container() -> tuple[PgTestContainer, str]:
"""Get or create the PostgreSQL container."""
global _container, _database_url
if _container is None:
container = PgTestContainer().start()
_container = container
url = container.get_connection_url()
_database_url = url.replace("postgresql+psycopg2://", "postgresql+asyncpg://")
assert _container is not None, "Container should be initialized"
assert _database_url is not None, "Database URL should be initialized"
return _container, _database_url
@pytest.fixture
async def session_factory() -> AsyncGenerator[async_sessionmaker[AsyncSession], None]:
"""Create a session factory and initialize the database schema."""
_, database_url = get_or_create_container()
engine = create_async_engine(database_url, echo=False)
async with engine.begin() as conn:
# Create pgvector extension and schema
await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE"))
await conn.execute(text("CREATE SCHEMA noteflow"))
# Create all tables
await conn.run_sync(Base.metadata.create_all)
yield async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
autocommit=False,
autoflush=False,
)
# Cleanup - drop schema to reset for next test
async with engine.begin() as conn:
await conn.execute(text("DROP SCHEMA IF EXISTS noteflow CASCADE"))
await engine.dispose()
@pytest.fixture
async def session(
session_factory: async_sessionmaker[AsyncSession],
) -> AsyncGenerator[AsyncSession, None]:
"""Provide a database session for each test."""
async with session_factory() as session:
yield session
# Rollback any uncommitted changes
await session.rollback()
def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
"""Cleanup container after all tests complete."""
global _container
if _container is not None:
_container.stop()
_container = None
File: src/noteflow/client/components/transcript.py
"""Transcript display component with click-to-seek and highlighting.
Uses TranscriptSegment from grpc.client and format_timestamp from _formatting.
Does not recreate any types - imports and uses existing ones.
"""
from __future__ import annotations
import hashlib
from collections.abc import Callable
from typing import TYPE_CHECKING
import flet as ft
# REUSE existing formatting - do not recreate
from noteflow.infrastructure.export._formatting import format_timestamp
if TYPE_CHECKING:
from noteflow.client.state import AppState
# REUSE existing types - do not recreate
from noteflow.grpc.client import ServerInfo, TranscriptSegment
class TranscriptComponent:
"""Transcript segment display with click-to-seek, highlighting, and search.
Uses TranscriptSegment from grpc.client and format_timestamp from _formatting.
"""
def __init__(
self,
state: AppState,
on_segment_click: Callable[[int], None] | None = None,
) -> None:
"""Initialize transcript component.
Args:
state: Centralized application state.
on_segment_click: Callback when segment clicked (receives segment index).
"""
self._state = state
self._on_segment_click = on_segment_click
self._list_view: ft.ListView | None = None
self._segment_rows: list[ft.Container | None] = [] # Track rows for highlighting
self._search_field: ft.TextField | None = None
self._search_query: str = ""
self._partial_row: ft.Container | None = None # Live partial at bottom
def build(self) -> ft.Column:
"""Build transcript list view with search.
Returns:
Column with search field and bordered ListView.
"""
self._search_field = ft.TextField(
label="Search transcript",
prefix_icon=ft.Icons.SEARCH,
on_change=self._on_search_change,
dense=True,
height=40,
)
self._list_view = ft.ListView(
spacing=10,
padding=10,
auto_scroll=False, # We control scrolling for sync
height=260,
)
self._segment_rows.clear()
return ft.Column(
[
self._search_field,
ft.Container(
content=self._list_view,
border=ft.border.all(1, ft.Colors.GREY_400),
border_radius=8,
),
],
spacing=5,
)
def add_segment(self, segment: TranscriptSegment) -> None:
"""Add transcript segment to display.
For final segments, adds to transcript list.
For partials, updates the live partial row at bottom.
Args:
segment: Transcript segment from server.
"""
if segment.is_final:
# Clear partial text when we get a final
self._state.current_partial_text = ""
self._state.transcript_segments.append(segment)
self._state.run_on_ui_thread(lambda: self._render_final_segment(segment))
else:
# Update partial text
self._state.current_partial_text = segment.text
self._state.run_on_ui_thread(lambda: self._render_partial(segment.text))
def display_server_info(self, info: ServerInfo) -> None:
"""Display server info in transcript area.
Args:
info: Server info from connection.
"""
self._state.run_on_ui_thread(lambda: self._render_server_info(info))
def clear(self) -> None:
"""Clear all transcript segments and partials."""
self._state.clear_transcript()
self._segment_rows.clear()
self._partial_row = None
self._search_query = ""
if self._search_field:
self._search_field.value = ""
if self._list_view:
self._list_view.controls.clear()
self._state.request_update()
def _on_search_change(self, e: ft.ControlEvent) -> None:
"""Handle search field change.
Args:
e: Control event with new search value.
"""
self._search_query = (e.control.value or "").lower()
self._rerender_all_segments()
def _rerender_all_segments(self) -> None:
"""Re-render all segments with current search filter."""
if not self._list_view:
return
self._list_view.controls.clear()
self._segment_rows.clear()
for idx, segment in enumerate(self._state.transcript_segments):
# Filter by search query
if self._search_query and self._search_query not in segment.text.lower():
# Add placeholder to maintain index alignment
self._segment_rows.append(None)
continue
# Use original index for click handling
container = self._create_segment_row(segment, idx)
self._segment_rows.append(container)
self._list_view.controls.append(container)
self._state.request_update()
def _render_final_segment(self, segment: TranscriptSegment) -> None:
"""Render final segment with click handler (UI thread only).
Args:
segment: Transcript segment to render.
"""
if not self._list_view:
return
# Remove partial row if present (final replaces partial)
if self._partial_row and self._partial_row in self._list_view.controls:
self._list_view.controls.remove(self._partial_row)
self._partial_row = None
# Use the actual index from state (segments are appended before rendering)
segment_index = len(self._state.transcript_segments) - 1
# Filter by search query during live rendering
if self._search_query and self._search_query not in segment.text.lower():
self._segment_rows.append(None)
return
container = self._create_segment_row(segment, segment_index)
self._segment_rows.append(container)
self._list_view.controls.append(container)
self._state.request_update()
def _render_partial(self, text: str) -> None:
"""Render or update the partial text row at the bottom (UI thread only).
Args:
text: Partial transcript text.
"""
if not self._list_view or not text:
return
# Create or update partial row
partial_content = ft.Row(
[
ft.Text("[LIVE]", size=11, color=ft.Colors.BLUE_400, width=120, italic=True),
ft.Text(
text,
size=14,
color=ft.Colors.GREY_500,
weight=ft.FontWeight.W_300,
italic=True,
expand=True,
),
]
)
if self._partial_row:
# Update existing row
self._partial_row.content = partial_content
else:
# Create new row
self._partial_row = ft.Container(
content=partial_content,
padding=5,
border_radius=4,
bgcolor=ft.Colors.BLUE_50,
)
self._list_view.controls.append(self._partial_row)
self._state.request_update()
def _create_segment_row(self, segment: TranscriptSegment, segment_index: int) -> ft.Container:
"""Create a segment row container.
Args:
segment: Transcript segment to render.
segment_index: Index for click handling.
Returns:
Container with segment content.
"""
# REUSE existing format_timestamp from _formatting.py
# Format as time range for transcript display
time_str = (
f"[{format_timestamp(segment.start_time)} - {format_timestamp(segment.end_time)}]"
)
# Style based on finality
color = ft.Colors.BLACK if segment.is_final else ft.Colors.GREY_600
weight = ft.FontWeight.NORMAL if segment.is_final else ft.FontWeight.W_300
# Build row content with optional speaker label
row_controls: list[ft.Control] = [
ft.Text(time_str, size=11, color=ft.Colors.GREY_500, width=120),
]
# Add speaker label if present
if segment.speaker_id:
speaker_color = self._get_speaker_color(segment.speaker_id)
row_controls.append(
ft.Container(
content=ft.Text(
segment.speaker_id,
size=10,
color=ft.Colors.WHITE,
weight=ft.FontWeight.BOLD,
),
bgcolor=speaker_color,
border_radius=10,
padding=ft.padding.symmetric(horizontal=6, vertical=2),
margin=ft.margin.only(right=8),
)
)
row_controls.append(
ft.Text(
segment.text,
size=14,
color=color,
weight=weight,
expand=True,
)
)
row = ft.Row(row_controls)
# Wrap in container for click handling and highlighting
return ft.Container(
content=row,
padding=5,
border_radius=4,
on_click=lambda e, idx=segment_index: self._handle_click(idx),
ink=True,
)
def _get_speaker_color(self, speaker_id: str) -> str:
"""Get consistent color for a speaker.
Args:
speaker_id: Speaker identifier.
Returns:
Color string for the speaker label.
"""
# Use hash to get consistent color index
colors = [
ft.Colors.BLUE_400,
ft.Colors.GREEN_400,
ft.Colors.PURPLE_400,
ft.Colors.ORANGE_400,
ft.Colors.TEAL_400,
ft.Colors.PINK_400,
ft.Colors.INDIGO_400,
ft.Colors.AMBER_600,
]
digest = hashlib.md5(speaker_id.encode("utf-8")).hexdigest()
return colors[int(digest, 16) % len(colors)]
def _handle_click(self, segment_index: int) -> None:
"""Handle segment row click.
Args:
segment_index: Index of clicked segment.
"""
if self._on_segment_click:
self._on_segment_click(segment_index)
def _render_server_info(self, info: ServerInfo) -> None:
"""Render server info (UI thread only).
Args:
info: Server info to display.
"""
if not self._list_view:
return
asr_status = "ready" if info.asr_ready else "not ready"
info_text = (
f"Connected to server v{info.version} | "
f"ASR: {info.asr_model} ({asr_status}) | "
f"Active meetings: {info.active_meetings}"
)
self._list_view.controls.append(
ft.Text(
info_text,
size=12,
color=ft.Colors.GREEN_700,
italic=True,
)
)
self._state.request_update()
def update_highlight(self, highlighted_index: int | None) -> None:
"""Update visual highlight on segments.
Args:
highlighted_index: Index of segment to highlight, or None to clear.
"""
for idx, container in enumerate(self._segment_rows):
if container is None:
continue
if idx == highlighted_index:
container.bgcolor = ft.Colors.YELLOW_100
container.border = ft.border.all(1, ft.Colors.YELLOW_700)
else:
container.bgcolor = None
container.border = None
# Scroll to highlighted segment
if highlighted_index is not None:
self._scroll_to_segment(highlighted_index)
self._state.request_update()
def _scroll_to_segment(self, segment_index: int) -> None:
"""Scroll ListView to show specified segment.
Args:
segment_index: Index of segment to scroll to.
"""
if not self._list_view or segment_index >= len(self._segment_rows):
return
container = self._segment_rows[segment_index]
if container is None:
return
# Estimate row height for scroll calculation
estimated_row_height = 50
offset = segment_index * estimated_row_height
self._list_view.scroll_to(offset=offset, duration=200)
File: src/noteflow/config/settings.py
"""NoteFlow application settings using Pydantic settings."""
from __future__ import annotations
import json
from functools import lru_cache
from pathlib import Path
from typing import Annotated, cast
from pydantic import Field, PostgresDsn, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
def _default_meetings_dir() -> Path:
"""Return default meetings directory path."""
return Path.home() / ".noteflow" / "meetings"
class TriggerSettings(BaseSettings):
"""Client trigger settings loaded from environment variables."""
model_config = SettingsConfigDict(
env_prefix="NOTEFLOW_",
env_file=".env",
env_file_encoding="utf-8",
enable_decoding=False,
extra="ignore",
)
# Trigger settings (client-side)
trigger_enabled: Annotated[
bool,
Field(default=False, description="Enable smart recording triggers (opt-in)"),
]
trigger_auto_start: Annotated[
bool,
Field(default=False, description="Auto-start recording on high confidence"),
]
trigger_rate_limit_minutes: Annotated[
int,
Field(default=10, ge=1, le=60, description="Minimum minutes between trigger prompts"),
]
trigger_snooze_minutes: Annotated[
int,
Field(default=30, ge=5, le=480, description="Default snooze duration in minutes"),
]
trigger_poll_interval_seconds: Annotated[
float,
Field(default=2.0, ge=0.5, le=30.0, description="Trigger polling interval in seconds"),
]
trigger_confidence_ignore: Annotated[
float,
Field(default=0.40, ge=0.0, le=1.0, description="Confidence below which to ignore"),
]
trigger_confidence_auto: Annotated[
float,
Field(default=0.80, ge=0.0, le=1.0, description="Confidence to auto-start recording"),
]
# App audio trigger tuning (system output from whitelisted apps)
trigger_audio_enabled: Annotated[
bool,
Field(default=True, description="Enable app audio activity detection"),
]
trigger_audio_threshold_db: Annotated[
float,
Field(default=-40.0, ge=-60.0, le=0.0, description="Audio activity threshold in dB"),
]
trigger_audio_window_seconds: Annotated[
float,
Field(default=5.0, ge=1.0, le=30.0, description="Audio activity window in seconds"),
]
trigger_audio_min_active_ratio: Annotated[
float,
Field(default=0.6, ge=0.0, le=1.0, description="Minimum active ratio in window"),
]
trigger_audio_min_samples: Annotated[
int,
Field(default=10, ge=1, le=200, description="Minimum samples before evaluating audio"),
]
trigger_audio_max_history: Annotated[
int,
Field(default=50, ge=10, le=1000, description="Max audio activity samples to retain"),
]
# Calendar trigger tuning (optional integration)
trigger_calendar_enabled: Annotated[
bool,
Field(default=False, description="Enable calendar-based trigger detection"),
]
trigger_calendar_lookahead_minutes: Annotated[
int,
Field(default=5, ge=0, le=60, description="Minutes before event start to trigger"),
]
trigger_calendar_lookbehind_minutes: Annotated[
int,
Field(default=5, ge=0, le=60, description="Minutes after event start to keep triggering"),
]
trigger_calendar_events: Annotated[
list[dict[str, object]],
Field(
default_factory=list,
description="Calendar events as JSON list of {start, end, title}",
),
]
# Foreground app trigger tuning
trigger_foreground_enabled: Annotated[
bool,
Field(default=True, description="Enable foreground app detection"),
]
trigger_meeting_apps: Annotated[
list[str],
Field(
default_factory=lambda: [
"zoom",
"teams",
"microsoft teams",
"meet",
"google meet",
"slack",
"webex",
"discord",
"skype",
"gotomeeting",
"facetime",
"webinar",
"ringcentral",
],
description="Meeting app name substrings to detect",
),
]
trigger_suppressed_apps: Annotated[
list[str],
Field(default_factory=list, description="Meeting app substrings to ignore"),
]
# Signal weights
trigger_weight_audio: Annotated[
float,
Field(default=0.30, ge=0.0, le=1.0, description="Audio signal confidence weight"),
]
trigger_weight_foreground: Annotated[
float,
Field(
default=0.40,
ge=0.0,
le=1.0,
description="Foreground app signal confidence weight",
),
]
trigger_weight_calendar: Annotated[
float,
Field(default=0.30, ge=0.0, le=1.0, description="Calendar signal confidence weight"),
]
@field_validator("trigger_meeting_apps", "trigger_suppressed_apps", mode="before")
@classmethod
def _parse_csv_list(cls, value: object) -> list[str]:
if not isinstance(value, str):
return [] if value is None else list(value)
stripped = value.strip()
if stripped.startswith("[") and stripped.endswith("]"):
try:
parsed = json.loads(stripped)
except json.JSONDecodeError:
parsed = None
if isinstance(parsed, list):
return [str(item).strip() for item in parsed if str(item).strip()]
return [item.strip() for item in value.split(",") if item.strip()]
@field_validator("trigger_calendar_events", mode="before")
@classmethod
def _parse_calendar_events(cls, value: object) -> list[dict[str, object]]:
if value is None:
return []
if isinstance(value, str):
stripped = value.strip()
if not stripped:
return []
try:
parsed = json.loads(stripped)
except json.JSONDecodeError:
return []
if isinstance(parsed, list):
return [item for item in parsed if isinstance(item, dict)]
return [parsed] if isinstance(parsed, dict) else []
if isinstance(value, dict):
return [value]
if isinstance(value, list):
return [item for item in value if isinstance(item, dict)]
return []
class Settings(TriggerSettings):
"""Application settings loaded from environment variables.
Environment variables:
NOTEFLOW_DATABASE_URL: PostgreSQL connection URL
Example: postgresql+asyncpg://user:pass@host:5432/dbname?
options=-csearch_path%3Dnoteflow
NOTEFLOW_DB_POOL_SIZE: Connection pool size (default: 5)
NOTEFLOW_DB_ECHO: Echo SQL statements (default: False)
NOTEFLOW_ASR_MODEL_SIZE: Whisper model size (default: base)
NOTEFLOW_ASR_DEVICE: ASR device (default: cpu)
NOTEFLOW_ASR_COMPUTE_TYPE: ASR compute type (default: int8)
NOTEFLOW_MEETINGS_DIR: Directory for meeting audio storage (default: ~/.noteflow/meetings)
NOTEFLOW_RETENTION_ENABLED: Enable automatic retention policy (default: False)
NOTEFLOW_RETENTION_DAYS: Days to retain completed meetings (default: 90)
NOTEFLOW_RETENTION_CHECK_INTERVAL_HOURS: Hours between retention checks (default: 24)
"""
# Database settings
database_url: Annotated[
PostgresDsn,
Field(
description="PostgreSQL connection URL with asyncpg driver",
examples=["postgresql+asyncpg://user:pass@localhost:5432/noteflow"],
),
]
db_pool_size: Annotated[
int,
Field(default=5, ge=1, le=50, description="Database connection pool size"),
]
db_echo: Annotated[
bool,
Field(default=False, description="Echo SQL statements to log"),
]
# ASR settings
asr_model_size: Annotated[
str,
Field(default="base", description="Whisper model size"),
]
asr_device: Annotated[
str,
Field(default="cpu", description="ASR device (cpu or cuda)"),
]
asr_compute_type: Annotated[
str,
Field(default="int8", description="ASR compute type"),
]
# Server settings
grpc_port: Annotated[
int,
Field(default=50051, ge=1, le=65535, description="gRPC server port"),
]
# Storage settings
meetings_dir: Annotated[
Path,
Field(
default_factory=_default_meetings_dir,
description="Directory for meeting audio and metadata storage",
),
]
# Retention settings
retention_enabled: Annotated[
bool,
Field(default=False, description="Enable automatic retention policy"),
]
retention_days: Annotated[
int,
Field(default=90, ge=1, le=3650, description="Days to retain completed meetings"),
]
retention_check_interval_hours: Annotated[
int,
Field(default=24, ge=1, le=168, description="Hours between retention checks"),
]
# Diarization settings
diarization_enabled: Annotated[
bool,
Field(default=False, description="Enable speaker diarization"),
]
diarization_hf_token: Annotated[
str | None,
Field(default=None, description="HuggingFace token for pyannote models"),
]
diarization_device: Annotated[
str,
Field(default="auto", description="Diarization device (auto, cpu, cuda, mps)"),
]
diarization_streaming_latency: Annotated[
float,
Field(default=0.5, ge=0.1, le=5.0, description="Streaming diarization latency in seconds"),
]
diarization_min_speakers: Annotated[
int,
Field(default=1, ge=1, le=20, description="Minimum expected speakers"),
]
diarization_max_speakers: Annotated[
int,
Field(default=10, ge=1, le=50, description="Maximum expected speakers"),
]
diarization_refinement_enabled: Annotated[
bool,
Field(default=True, description="Enable post-meeting diarization refinement"),
]
@property
def database_url_str(self) -> str:
"""Return database URL as string."""
return str(self.database_url)
def _load_settings() -> Settings:
"""Load settings from environment.
Returns:
Settings instance.
Raises:
ValidationError: If required environment variables are not set.
"""
# pydantic-settings reads from environment; model_validate handles this
return cast("Settings", Settings.model_validate({}))
def _load_trigger_settings() -> TriggerSettings:
"""Load trigger settings from environment."""
return cast("TriggerSettings", TriggerSettings.model_validate({}))
@lru_cache
def get_settings() -> Settings:
"""Get cached settings instance.
Returns:
Cached Settings instance loaded from environment.
Raises:
ValidationError: If required environment variables are not set.
"""
return _load_settings()
@lru_cache
def get_trigger_settings() -> TriggerSettings:
"""Get cached trigger settings instance."""
return _load_trigger_settings()
File: src/noteflow/grpc/client.py
"""NoteFlow gRPC client for Flet app integration."""
from __future__ import annotations
import logging
import queue
import threading
import time
from collections.abc import Callable, Iterator
from dataclasses import dataclass
from typing import TYPE_CHECKING, Final
import grpc
from noteflow.config.constants import DEFAULT_SAMPLE_RATE
from .proto import noteflow_pb2, noteflow_pb2_grpc
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
logger = logging.getLogger(__name__)
DEFAULT_SERVER: Final[str] = "localhost:50051"
CHUNK_TIMEOUT: Final[float] = 0.1 # Timeout for getting chunks from queue
@dataclass
class TranscriptSegment:
"""Transcript segment from server."""
segment_id: int
text: str
start_time: float
end_time: float
language: str
is_final: bool
speaker_id: str = "" # Speaker identifier from diarization
speaker_confidence: float = 0.0 # Speaker assignment confidence
@dataclass
class ServerInfo:
"""Server information."""
version: str
asr_model: str
asr_ready: bool
uptime_seconds: float
active_meetings: int
diarization_enabled: bool = False
diarization_ready: bool = False
@dataclass
class MeetingInfo:
"""Meeting information."""
id: str
title: str
state: str
created_at: float
started_at: float
ended_at: float
duration_seconds: float
segment_count: int
@dataclass
class AnnotationInfo:
"""Annotation information."""
id: str
meeting_id: str
annotation_type: str
text: str
start_time: float
end_time: float
segment_ids: list[int]
created_at: float
@dataclass
class ExportResult:
"""Export result."""
content: str
format_name: str
file_extension: str
@dataclass
class DiarizationResult:
"""Result of speaker diarization refinement."""
job_id: str
status: str
segments_updated: int
speaker_ids: list[str]
error_message: str = ""
@property
def success(self) -> bool:
"""Check if diarization succeeded."""
return self.status == "completed" and not self.error_message
@property
def is_terminal(self) -> bool:
"""Check if job reached a terminal state."""
return self.status in {"completed", "failed"}
@dataclass
class RenameSpeakerResult:
"""Result of speaker rename operation."""
segments_updated: int
success: bool
# Callback types
TranscriptCallback = Callable[[TranscriptSegment], None]
ConnectionCallback = Callable[[bool, str], None]
class NoteFlowClient:
"""gRPC client for NoteFlow server.
Provides async-safe methods for Flet app integration.
"""
def __init__(
self,
server_address: str = DEFAULT_SERVER,
on_transcript: TranscriptCallback | None = None,
on_connection_change: ConnectionCallback | None = None,
) -> None:
"""Initialize the client.
Args:
server_address: Server address (host:port).
on_transcript: Callback for transcript updates.
on_connection_change: Callback for connection state changes.
"""
self._server_address = server_address
self._on_transcript = on_transcript
self._on_connection_change = on_connection_change
self._channel: grpc.Channel | None = None
self._stub: noteflow_pb2_grpc.NoteFlowServiceStub | None = None
self._connected = False
# Streaming state
self._stream_thread: threading.Thread | None = None
self._audio_queue: queue.Queue[tuple[str, NDArray[np.float32], float]] = queue.Queue()
self._stop_streaming = threading.Event()
self._current_meeting_id: str | None = None
@property
def connected(self) -> bool:
"""Check if connected to server."""
return self._connected
@property
def server_address(self) -> str:
"""Get server address."""
return self._server_address
def connect(self, timeout: float = 5.0) -> bool:
"""Connect to the server.
Args:
timeout: Connection timeout in seconds.
Returns:
True if connected successfully.
"""
try:
self._channel = grpc.insecure_channel(
self._server_address,
options=[
("grpc.max_send_message_length", 100 * 1024 * 1024),
("grpc.max_receive_message_length", 100 * 1024 * 1024),
],
)
# Wait for channel to be ready
grpc.channel_ready_future(self._channel).result(timeout=timeout)
self._stub = noteflow_pb2_grpc.NoteFlowServiceStub(self._channel)
self._connected = True
logger.info("Connected to server at %s", self._server_address)
self._notify_connection(True, "Connected")
return True
except grpc.FutureTimeoutError:
logger.error("Connection timeout: %s", self._server_address)
self._notify_connection(False, "Connection timeout")
return False
except grpc.RpcError as e:
logger.error("Connection failed: %s", e)
self._notify_connection(False, str(e))
return False
def disconnect(self) -> None:
"""Disconnect from the server."""
self.stop_streaming()
if self._channel:
self._channel.close()
self._channel = None
self._stub = None
self._connected = False
logger.info("Disconnected from server")
self._notify_connection(False, "Disconnected")
def get_server_info(self) -> ServerInfo | None:
"""Get server information.
Returns:
ServerInfo or None if request fails.
"""
if not self._stub:
return None
try:
response = self._stub.GetServerInfo(noteflow_pb2.ServerInfoRequest())
return ServerInfo(
version=response.version,
asr_model=response.asr_model,
asr_ready=response.asr_ready,
uptime_seconds=response.uptime_seconds,
active_meetings=response.active_meetings,
diarization_enabled=response.diarization_enabled,
diarization_ready=response.diarization_ready,
)
except grpc.RpcError as e:
logger.error("Failed to get server info: %s", e)
return None
def create_meeting(self, title: str = "") -> MeetingInfo | None:
"""Create a new meeting.
Args:
title: Optional meeting title.
Returns:
MeetingInfo or None if request fails.
"""
if not self._stub:
return None
try:
request = noteflow_pb2.CreateMeetingRequest(title=title)
response = self._stub.CreateMeeting(request)
return self._proto_to_meeting_info(response)
except grpc.RpcError as e:
logger.error("Failed to create meeting: %s", e)
return None
def stop_meeting(self, meeting_id: str) -> MeetingInfo | None:
"""Stop a meeting.
Args:
meeting_id: Meeting ID.
Returns:
Updated MeetingInfo or None if request fails.
"""
if not self._stub:
return None
try:
request = noteflow_pb2.StopMeetingRequest(meeting_id=meeting_id)
response = self._stub.StopMeeting(request)
return self._proto_to_meeting_info(response)
except grpc.RpcError as e:
logger.error("Failed to stop meeting: %s", e)
return None
def get_meeting(self, meeting_id: str) -> MeetingInfo | None:
"""Get meeting details.
Args:
meeting_id: Meeting ID.
Returns:
MeetingInfo or None if not found.
"""
if not self._stub:
return None
try:
request = noteflow_pb2.GetMeetingRequest(
meeting_id=meeting_id,
include_segments=False,
include_summary=False,
)
response = self._stub.GetMeeting(request)
return self._proto_to_meeting_info(response)
except grpc.RpcError as e:
logger.error("Failed to get meeting: %s", e)
return None
def get_meeting_segments(self, meeting_id: str) -> list[TranscriptSegment]:
"""Retrieve transcript segments for a meeting.
Uses existing GetMeetingRequest with include_segments=True.
Args:
meeting_id: Meeting ID.
Returns:
List of TranscriptSegment or empty list if not found.
"""
if not self._stub:
return []
try:
request = noteflow_pb2.GetMeetingRequest(
meeting_id=meeting_id,
include_segments=True,
include_summary=False,
)
response = self._stub.GetMeeting(request)
return [
TranscriptSegment(
segment_id=seg.segment_id,
text=seg.text,
start_time=seg.start_time,
end_time=seg.end_time,
language=seg.language,
is_final=True,
speaker_id=seg.speaker_id,
speaker_confidence=seg.speaker_confidence,
)
for seg in response.segments
]
except grpc.RpcError as e:
logger.error("Failed to get meeting segments: %s", e)
return []
def list_meetings(self, limit: int = 20) -> list[MeetingInfo]:
"""List recent meetings.
Args:
limit: Maximum number to return.
Returns:
List of MeetingInfo.
"""
if not self._stub:
return []
try:
request = noteflow_pb2.ListMeetingsRequest(
limit=limit,
sort_order=noteflow_pb2.SORT_ORDER_CREATED_DESC,
)
response = self._stub.ListMeetings(request)
return [self._proto_to_meeting_info(m) for m in response.meetings]
except grpc.RpcError as e:
logger.error("Failed to list meetings: %s", e)
return []
def start_streaming(self, meeting_id: str) -> bool:
"""Start streaming audio for a meeting.
Args:
meeting_id: Meeting ID to stream to.
Returns:
True if streaming started.
"""
if not self._stub:
logger.error("Not connected")
return False
if self._stream_thread and self._stream_thread.is_alive():
logger.warning("Already streaming")
return False
self._current_meeting_id = meeting_id
self._stop_streaming.clear()
# Clear any pending audio
while not self._audio_queue.empty():
try:
self._audio_queue.get_nowait()
except queue.Empty:
break
# Start streaming thread
self._stream_thread = threading.Thread(
target=self._stream_worker,
daemon=True,
)
self._stream_thread.start()
logger.info("Started streaming for meeting %s", meeting_id)
return True
def stop_streaming(self) -> None:
"""Stop streaming audio."""
self._stop_streaming.set()
if self._stream_thread:
self._stream_thread.join(timeout=2.0)
self._stream_thread = None
self._current_meeting_id = None
logger.info("Stopped streaming")
def send_audio(
self,
audio: NDArray[np.float32],
timestamp: float | None = None,
) -> None:
"""Send audio chunk to server.
Non-blocking - queues audio for streaming thread.
Args:
audio: Audio samples (float32, mono, 16kHz).
timestamp: Optional capture timestamp.
"""
if not self._current_meeting_id:
return
if timestamp is None:
timestamp = time.time()
self._audio_queue.put(
(
self._current_meeting_id,
audio,
timestamp,
)
)
def _stream_worker(self) -> None:
"""Background thread for audio streaming."""
if not self._stub:
return
def audio_generator() -> Iterator[noteflow_pb2.AudioChunk]:
"""Generate audio chunks from queue."""
while not self._stop_streaming.is_set():
try:
meeting_id, audio, timestamp = self._audio_queue.get(
timeout=CHUNK_TIMEOUT,
)
yield noteflow_pb2.AudioChunk(
meeting_id=meeting_id,
audio_data=audio.tobytes(),
timestamp=timestamp,
sample_rate=DEFAULT_SAMPLE_RATE,
channels=1,
)
except queue.Empty:
continue
try:
# Start bidirectional stream
responses = self._stub.StreamTranscription(audio_generator())
# Process responses
for response in responses:
if self._stop_streaming.is_set():
break
if response.update_type == noteflow_pb2.UPDATE_TYPE_FINAL:
segment = TranscriptSegment(
segment_id=response.segment.segment_id,
text=response.segment.text,
start_time=response.segment.start_time,
end_time=response.segment.end_time,
language=response.segment.language,
is_final=True,
speaker_id=response.segment.speaker_id,
speaker_confidence=response.segment.speaker_confidence,
)
self._notify_transcript(segment)
elif response.update_type == noteflow_pb2.UPDATE_TYPE_PARTIAL:
segment = TranscriptSegment(
segment_id=0,
text=response.partial_text,
start_time=0,
end_time=0,
language="",
is_final=False,
)
self._notify_transcript(segment)
except grpc.RpcError as e:
logger.error("Stream error: %s", e)
self._notify_connection(False, f"Stream error: {e}")
def _notify_transcript(self, segment: TranscriptSegment) -> None:
"""Notify transcript callback.
Args:
segment: Transcript segment.
"""
if self._on_transcript:
try:
self._on_transcript(segment)
except Exception as e:
logger.error("Transcript callback error: %s", e)
def _notify_connection(self, connected: bool, message: str) -> None:
"""Notify connection callback.
Args:
connected: Connection state.
message: Status message.
"""
if self._on_connection_change:
try:
self._on_connection_change(connected, message)
except Exception as e:
logger.error("Connection callback error: %s", e)
@staticmethod
def _proto_to_meeting_info(meeting: noteflow_pb2.Meeting) -> MeetingInfo:
"""Convert proto Meeting to MeetingInfo.
Args:
meeting: Proto meeting.
Returns:
MeetingInfo dataclass.
"""
state_map = {
noteflow_pb2.MEETING_STATE_UNSPECIFIED: "unknown",
noteflow_pb2.MEETING_STATE_CREATED: "created",
noteflow_pb2.MEETING_STATE_RECORDING: "recording",
noteflow_pb2.MEETING_STATE_STOPPED: "stopped",
noteflow_pb2.MEETING_STATE_COMPLETED: "completed",
noteflow_pb2.MEETING_STATE_ERROR: "error",
}
return MeetingInfo(
id=meeting.id,
title=meeting.title,
state=state_map.get(meeting.state, "unknown"),
created_at=meeting.created_at,
started_at=meeting.started_at,
ended_at=meeting.ended_at,
duration_seconds=meeting.duration_seconds,
segment_count=len(meeting.segments),
)
# =========================================================================
# Annotation Methods
# =========================================================================
def add_annotation(
self,
meeting_id: str,
annotation_type: str,
text: str,
start_time: float,
end_time: float,
segment_ids: list[int] | None = None,
) -> AnnotationInfo | None:
"""Add an annotation to a meeting.
Args:
meeting_id: Meeting ID.
annotation_type: Type of annotation (action_item, decision, note).
text: Annotation text.
start_time: Start time in seconds.
end_time: End time in seconds.
segment_ids: Optional list of linked segment IDs.
Returns:
AnnotationInfo or None if request fails.
"""
if not self._stub:
return None
try:
proto_type = self._annotation_type_to_proto(annotation_type)
request = noteflow_pb2.AddAnnotationRequest(
meeting_id=meeting_id,
annotation_type=proto_type,
text=text,
start_time=start_time,
end_time=end_time,
segment_ids=segment_ids or [],
)
response = self._stub.AddAnnotation(request)
return self._proto_to_annotation_info(response)
except grpc.RpcError as e:
logger.error("Failed to add annotation: %s", e)
return None
def get_annotation(self, annotation_id: str) -> AnnotationInfo | None:
"""Get an annotation by ID.
Args:
annotation_id: Annotation ID.
Returns:
AnnotationInfo or None if not found.
"""
if not self._stub:
return None
try:
request = noteflow_pb2.GetAnnotationRequest(annotation_id=annotation_id)
response = self._stub.GetAnnotation(request)
return self._proto_to_annotation_info(response)
except grpc.RpcError as e:
logger.error("Failed to get annotation: %s", e)
return None
def list_annotations(
self,
meeting_id: str,
start_time: float = 0,
end_time: float = 0,
) -> list[AnnotationInfo]:
"""List annotations for a meeting.
Args:
meeting_id: Meeting ID.
start_time: Optional start time filter.
end_time: Optional end time filter.
Returns:
List of AnnotationInfo.
"""
if not self._stub:
return []
try:
request = noteflow_pb2.ListAnnotationsRequest(
meeting_id=meeting_id,
start_time=start_time,
end_time=end_time,
)
response = self._stub.ListAnnotations(request)
return [self._proto_to_annotation_info(a) for a in response.annotations]
except grpc.RpcError as e:
logger.error("Failed to list annotations: %s", e)
return []
def update_annotation(
self,
annotation_id: str,
annotation_type: str | None = None,
text: str | None = None,
start_time: float | None = None,
end_time: float | None = None,
segment_ids: list[int] | None = None,
) -> AnnotationInfo | None:
"""Update an existing annotation.
Args:
annotation_id: Annotation ID.
annotation_type: Optional new type.
text: Optional new text.
start_time: Optional new start time.
end_time: Optional new end time.
segment_ids: Optional new segment IDs.
Returns:
Updated AnnotationInfo or None if request fails.
"""
if not self._stub:
return None
try:
proto_type = (
self._annotation_type_to_proto(annotation_type)
if annotation_type
else noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED
)
request = noteflow_pb2.UpdateAnnotationRequest(
annotation_id=annotation_id,
annotation_type=proto_type,
text=text or "",
start_time=start_time or 0,
end_time=end_time or 0,
segment_ids=segment_ids or [],
)
response = self._stub.UpdateAnnotation(request)
return self._proto_to_annotation_info(response)
except grpc.RpcError as e:
logger.error("Failed to update annotation: %s", e)
return None
def delete_annotation(self, annotation_id: str) -> bool:
"""Delete an annotation.
Args:
annotation_id: Annotation ID.
Returns:
True if deleted successfully.
"""
if not self._stub:
return False
try:
request = noteflow_pb2.DeleteAnnotationRequest(annotation_id=annotation_id)
response = self._stub.DeleteAnnotation(request)
return response.success
except grpc.RpcError as e:
logger.error("Failed to delete annotation: %s", e)
return False
@staticmethod
def _proto_to_annotation_info(
annotation: noteflow_pb2.Annotation,
) -> AnnotationInfo:
"""Convert proto Annotation to AnnotationInfo.
Args:
annotation: Proto annotation.
Returns:
AnnotationInfo dataclass.
"""
type_map = {
noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED: "note",
noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM: "action_item",
noteflow_pb2.ANNOTATION_TYPE_DECISION: "decision",
noteflow_pb2.ANNOTATION_TYPE_NOTE: "note",
noteflow_pb2.ANNOTATION_TYPE_RISK: "risk",
}
return AnnotationInfo(
id=annotation.id,
meeting_id=annotation.meeting_id,
annotation_type=type_map.get(annotation.annotation_type, "note"),
text=annotation.text,
start_time=annotation.start_time,
end_time=annotation.end_time,
segment_ids=list(annotation.segment_ids),
created_at=annotation.created_at,
)
@staticmethod
def _annotation_type_to_proto(annotation_type: str) -> int:
"""Convert annotation type string to proto enum.
Args:
annotation_type: Type string.
Returns:
Proto enum value.
"""
type_map = {
"action_item": noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM,
"decision": noteflow_pb2.ANNOTATION_TYPE_DECISION,
"note": noteflow_pb2.ANNOTATION_TYPE_NOTE,
"risk": noteflow_pb2.ANNOTATION_TYPE_RISK,
}
return type_map.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_NOTE)
# =========================================================================
# Export Methods
# =========================================================================
def export_transcript(
self,
meeting_id: str,
format_name: str = "markdown",
) -> ExportResult | None:
"""Export meeting transcript.
Args:
meeting_id: Meeting ID.
format_name: Export format (markdown, html).
Returns:
ExportResult or None if request fails.
"""
if not self._stub:
return None
try:
proto_format = self._export_format_to_proto(format_name)
request = noteflow_pb2.ExportTranscriptRequest(
meeting_id=meeting_id,
format=proto_format,
)
response = self._stub.ExportTranscript(request)
return ExportResult(
content=response.content,
format_name=response.format_name,
file_extension=response.file_extension,
)
except grpc.RpcError as e:
logger.error("Failed to export transcript: %s", e)
return None
@staticmethod
def _export_format_to_proto(format_name: str) -> int:
"""Convert export format string to proto enum.
Args:
format_name: Format string.
Returns:
Proto enum value.
"""
format_map = {
"markdown": noteflow_pb2.EXPORT_FORMAT_MARKDOWN,
"md": noteflow_pb2.EXPORT_FORMAT_MARKDOWN,
"html": noteflow_pb2.EXPORT_FORMAT_HTML,
}
return format_map.get(format_name.lower(), noteflow_pb2.EXPORT_FORMAT_MARKDOWN)
@staticmethod
def _job_status_to_str(status: int) -> str:
"""Convert job status enum to string."""
status_map = {
noteflow_pb2.JOB_STATUS_UNSPECIFIED: "unspecified",
noteflow_pb2.JOB_STATUS_QUEUED: "queued",
noteflow_pb2.JOB_STATUS_RUNNING: "running",
noteflow_pb2.JOB_STATUS_COMPLETED: "completed",
noteflow_pb2.JOB_STATUS_FAILED: "failed",
}
return status_map.get(status, "unspecified")
# =========================================================================
# Speaker Diarization Methods
# =========================================================================
def refine_speaker_diarization(
self,
meeting_id: str,
num_speakers: int | None = None,
) -> DiarizationResult | None:
"""Run post-meeting speaker diarization refinement.
Requests the server to run offline diarization on the meeting audio
as a background job and update segment speaker assignments.
Args:
meeting_id: Meeting ID.
num_speakers: Optional known number of speakers (auto-detect if None).
Returns:
DiarizationResult with job status or None if request fails.
"""
if not self._stub:
return None
try:
request = noteflow_pb2.RefineSpeakerDiarizationRequest(
meeting_id=meeting_id,
num_speakers=num_speakers or 0,
)
response = self._stub.RefineSpeakerDiarization(request)
return DiarizationResult(
job_id=response.job_id,
status=self._job_status_to_str(response.status),
segments_updated=response.segments_updated,
speaker_ids=list(response.speaker_ids),
error_message=response.error_message,
)
except grpc.RpcError as e:
logger.error("Failed to refine speaker diarization: %s", e)
return None
def get_diarization_job_status(self, job_id: str) -> DiarizationResult | None:
"""Get status for a diarization background job."""
if not self._stub:
return None
try:
request = noteflow_pb2.GetDiarizationJobStatusRequest(job_id=job_id)
response = self._stub.GetDiarizationJobStatus(request)
return DiarizationResult(
job_id=response.job_id,
status=self._job_status_to_str(response.status),
segments_updated=response.segments_updated,
speaker_ids=list(response.speaker_ids),
error_message=response.error_message,
)
except grpc.RpcError as e:
logger.error("Failed to get diarization job status: %s", e)
return None
def rename_speaker(
self,
meeting_id: str,
old_speaker_id: str,
new_speaker_name: str,
) -> RenameSpeakerResult | None:
"""Rename a speaker in all segments of a meeting.
Args:
meeting_id: Meeting ID.
old_speaker_id: Current speaker ID (e.g., "SPEAKER_00").
new_speaker_name: New speaker name (e.g., "Alice").
Returns:
RenameSpeakerResult or None if request fails.
"""
if not self._stub:
return None
try:
request = noteflow_pb2.RenameSpeakerRequest(
meeting_id=meeting_id,
old_speaker_id=old_speaker_id,
new_speaker_name=new_speaker_name,
)
response = self._stub.RenameSpeaker(request)
return RenameSpeakerResult(
segments_updated=response.segments_updated,
success=response.success,
)
except grpc.RpcError as e:
logger.error("Failed to rename speaker: %s", e)
return None
File: tests/integration/test_repositories.py
"""Integration tests for SQLAlchemy repositories."""
from __future__ import annotations
from datetime import UTC, datetime
from typing import TYPE_CHECKING
from uuid import uuid4
import pytest
from noteflow.domain.entities import Annotation, Meeting, Segment, Summary, WordTiming
from noteflow.domain.entities.summary import ActionItem, KeyPoint
from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState
from noteflow.infrastructure.persistence.repositories import (
SqlAlchemyAnnotationRepository,
SqlAlchemyMeetingRepository,
SqlAlchemySegmentRepository,
SqlAlchemySummaryRepository,
)
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession
@pytest.mark.integration
class TestMeetingRepository:
"""Integration tests for SqlAlchemyMeetingRepository."""
async def test_create_and_get_meeting(self, session: AsyncSession) -> None:
"""Test creating and retrieving a meeting."""
repo = SqlAlchemyMeetingRepository(session)
meeting = Meeting.create(title="Test Meeting", metadata={"key": "value"})
# Create
await repo.create(meeting)
await session.commit()
# Get
retrieved = await repo.get(meeting.id)
assert retrieved is not None
assert retrieved.id == meeting.id
assert retrieved.title == "Test Meeting"
assert retrieved.state == MeetingState.CREATED
assert retrieved.metadata == {"key": "value"}
async def test_get_meeting_not_found(self, session: AsyncSession) -> None:
"""Test retrieving non-existent meeting returns None."""
repo = SqlAlchemyMeetingRepository(session)
meeting_id = MeetingId(Meeting.create().id)
result = await repo.get(meeting_id)
assert result is None
async def test_update_meeting(self, session: AsyncSession) -> None:
"""Test updating a meeting."""
repo = SqlAlchemyMeetingRepository(session)
meeting = Meeting.create(title="Original")
await repo.create(meeting)
await session.commit()
# Update state and title
meeting.start_recording()
await repo.update(meeting)
await session.commit()
# Verify
retrieved = await repo.get(meeting.id)
assert retrieved is not None
assert retrieved.state == MeetingState.RECORDING
assert retrieved.started_at is not None
async def test_delete_meeting(self, session: AsyncSession) -> None:
"""Test deleting a meeting."""
repo = SqlAlchemyMeetingRepository(session)
meeting = Meeting.create(title="To Delete")
await repo.create(meeting)
await session.commit()
# Delete
result = await repo.delete(meeting.id)
await session.commit()
assert result is True
# Verify deleted
retrieved = await repo.get(meeting.id)
assert retrieved is None
async def test_delete_meeting_not_found(self, session: AsyncSession) -> None:
"""Test deleting non-existent meeting returns False."""
repo = SqlAlchemyMeetingRepository(session)
meeting_id = MeetingId(Meeting.create().id)
result = await repo.delete(meeting_id)
assert result is False
async def test_list_all_meetings(self, session: AsyncSession) -> None:
"""Test listing all meetings with pagination."""
repo = SqlAlchemyMeetingRepository(session)
# Create multiple meetings
meetings = [Meeting.create(title=f"Meeting {i}") for i in range(5)]
for m in meetings:
await repo.create(m)
await session.commit()
# List with pagination
result, total = await repo.list_all(limit=3, offset=0)
assert len(result) == 3
assert total == 5
async def test_list_meetings_filter_by_state(self, session: AsyncSession) -> None:
"""Test filtering meetings by state."""
repo = SqlAlchemyMeetingRepository(session)
# Create meetings in different states
created = Meeting.create(title="Created")
await repo.create(created)
recording = Meeting.create(title="Recording")
recording.start_recording()
await repo.create(recording)
await session.commit()
# Filter by RECORDING state
result, _ = await repo.list_all(states=[MeetingState.RECORDING])
assert len(result) == 1
assert result[0].title == "Recording"
async def test_count_by_state(self, session: AsyncSession) -> None:
"""Test counting meetings by state."""
repo = SqlAlchemyMeetingRepository(session)
# Create meetings
for _ in range(3):
await repo.create(Meeting.create())
await session.commit()
count = await repo.count_by_state(MeetingState.CREATED)
assert count == 3
@pytest.mark.integration
class TestSegmentRepository:
"""Integration tests for SqlAlchemySegmentRepository."""
async def test_add_and_get_segments(self, session: AsyncSession) -> None:
"""Test adding and retrieving segments."""
meeting_repo = SqlAlchemyMeetingRepository(session)
segment_repo = SqlAlchemySegmentRepository(session)
# Create meeting first
meeting = Meeting.create(title="Test")
await meeting_repo.create(meeting)
await session.commit()
# Add segments
segment = Segment(
segment_id=0,
text="Hello world",
start_time=0.0,
end_time=2.5,
meeting_id=meeting.id,
language="en",
)
await segment_repo.add(meeting.id, segment)
await session.commit()
# Get segments
result = await segment_repo.get_by_meeting(meeting.id)
assert len(result) == 1
assert result[0].text == "Hello world"
assert result[0].db_id is not None
async def test_add_segment_with_words(self, session: AsyncSession) -> None:
"""Test adding segment with word-level timing."""
meeting_repo = SqlAlchemyMeetingRepository(session)
segment_repo = SqlAlchemySegmentRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
words = [
WordTiming(word="Hello", start_time=0.0, end_time=0.5, probability=0.95),
WordTiming(word="world", start_time=0.5, end_time=1.0, probability=0.98),
]
segment = Segment(
segment_id=0,
text="Hello world",
start_time=0.0,
end_time=1.0,
meeting_id=meeting.id,
words=words,
)
await segment_repo.add(meeting.id, segment)
await session.commit()
result = await segment_repo.get_by_meeting(meeting.id, include_words=True)
assert len(result[0].words) == 2
assert result[0].words[0].word == "Hello"
async def test_add_batch_segments(self, session: AsyncSession) -> None:
"""Test batch adding segments."""
meeting_repo = SqlAlchemyMeetingRepository(session)
segment_repo = SqlAlchemySegmentRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
segments = [
Segment(segment_id=i, text=f"Segment {i}", start_time=float(i), end_time=float(i + 1))
for i in range(3)
]
await segment_repo.add_batch(meeting.id, segments)
await session.commit()
result = await segment_repo.get_by_meeting(meeting.id)
assert len(result) == 3
async def test_get_next_segment_id(self, session: AsyncSession) -> None:
"""Test get_next_segment_id returns max + 1 or 0 when empty."""
meeting_repo = SqlAlchemyMeetingRepository(session)
segment_repo = SqlAlchemySegmentRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
assert await segment_repo.get_next_segment_id(meeting.id) == 0
segments = [
Segment(segment_id=0, text="Segment 0", start_time=0.0, end_time=1.0),
Segment(segment_id=5, text="Segment 5", start_time=1.0, end_time=2.0),
]
await segment_repo.add_batch(meeting.id, segments)
await session.commit()
assert await segment_repo.get_next_segment_id(meeting.id) == 6
async def test_update_embedding_and_retrieve(self, session: AsyncSession) -> None:
"""Test updating a segment embedding persists to the database."""
meeting_repo = SqlAlchemyMeetingRepository(session)
segment_repo = SqlAlchemySegmentRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
segment = Segment(segment_id=0, text="Hello", start_time=0.0, end_time=1.0)
await segment_repo.add(meeting.id, segment)
await session.commit()
assert segment.db_id is not None
embedding = [0.1] * 1536
await segment_repo.update_embedding(segment.db_id, embedding)
await session.commit()
result = await segment_repo.get_by_meeting(meeting.id)
assert result[0].embedding == pytest.approx(embedding)
async def test_search_semantic_orders_by_similarity(self, session: AsyncSession) -> None:
"""Test semantic search returns closest matches first."""
meeting_repo = SqlAlchemyMeetingRepository(session)
segment_repo = SqlAlchemySegmentRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
emb1 = [1.0] + [0.0] * 1535
emb2 = [0.0, 1.0] + [0.0] * 1534
segment1 = Segment(
segment_id=0,
text="First",
start_time=0.0,
end_time=1.0,
embedding=emb1,
)
segment2 = Segment(
segment_id=1,
text="Second",
start_time=1.0,
end_time=2.0,
embedding=emb2,
)
await segment_repo.add_batch(meeting.id, [segment1, segment2])
await session.commit()
results = await segment_repo.search_semantic(query_embedding=emb1, limit=2)
assert len(results) == 2
assert results[0][0].segment_id == 0
assert results[0][1] >= results[1][1]
@pytest.mark.integration
class TestSummaryRepository:
"""Integration tests for SqlAlchemySummaryRepository."""
async def test_save_and_get_summary(self, session: AsyncSession) -> None:
"""Test saving and retrieving summary."""
meeting_repo = SqlAlchemyMeetingRepository(session)
summary_repo = SqlAlchemySummaryRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
summary = Summary(
meeting_id=meeting.id,
executive_summary="This was a productive meeting.",
generated_at=datetime.now(UTC),
model_version="test-v1",
)
await summary_repo.save(summary)
await session.commit()
result = await summary_repo.get_by_meeting(meeting.id)
assert result is not None
assert result.executive_summary == "This was a productive meeting."
assert result.model_version == "test-v1"
async def test_save_summary_with_key_points(self, session: AsyncSession) -> None:
"""Test saving summary with key points."""
meeting_repo = SqlAlchemyMeetingRepository(session)
summary_repo = SqlAlchemySummaryRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
key_points = [
KeyPoint(text="Point 1", segment_ids=[0, 1]),
KeyPoint(text="Point 2", segment_ids=[2]),
]
summary = Summary(
meeting_id=meeting.id,
executive_summary="Summary",
key_points=key_points,
)
await summary_repo.save(summary)
await session.commit()
result = await summary_repo.get_by_meeting(meeting.id)
assert result is not None
assert len(result.key_points) == 2
assert result.key_points[0].text == "Point 1"
async def test_save_summary_with_action_items(self, session: AsyncSession) -> None:
"""Test saving summary with action items."""
meeting_repo = SqlAlchemyMeetingRepository(session)
summary_repo = SqlAlchemySummaryRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
action_items = [
ActionItem(text="Review PR", assignee="Alice", priority=2),
]
summary = Summary(
meeting_id=meeting.id,
executive_summary="Summary",
action_items=action_items,
)
await summary_repo.save(summary)
await session.commit()
result = await summary_repo.get_by_meeting(meeting.id)
assert result is not None
assert len(result.action_items) == 1
assert result.action_items[0].text == "Review PR"
assert result.action_items[0].assignee == "Alice"
async def test_delete_summary(self, session: AsyncSession) -> None:
"""Test deleting summary."""
meeting_repo = SqlAlchemyMeetingRepository(session)
summary_repo = SqlAlchemySummaryRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
summary = Summary(meeting_id=meeting.id, executive_summary="To delete")
await summary_repo.save(summary)
await session.commit()
result = await summary_repo.delete_by_meeting(meeting.id)
await session.commit()
assert result is True
retrieved = await summary_repo.get_by_meeting(meeting.id)
assert retrieved is None
async def test_update_summary_replaces_items(self, session: AsyncSession) -> None:
"""Test saving a summary twice replaces key points and action items."""
meeting_repo = SqlAlchemyMeetingRepository(session)
summary_repo = SqlAlchemySummaryRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
summary_v1 = Summary(
meeting_id=meeting.id,
executive_summary="v1",
key_points=[KeyPoint(text="Old KP")],
action_items=[ActionItem(text="Old AI")],
)
await summary_repo.save(summary_v1)
await session.commit()
summary_v2 = Summary(
meeting_id=meeting.id,
executive_summary="v2",
key_points=[KeyPoint(text="New KP")],
action_items=[ActionItem(text="New AI")],
)
await summary_repo.save(summary_v2)
await session.commit()
result = await summary_repo.get_by_meeting(meeting.id)
assert result is not None
assert result.executive_summary == "v2"
assert [kp.text for kp in result.key_points] == ["New KP"]
assert [ai.text for ai in result.action_items] == ["New AI"]
@pytest.mark.integration
class TestAnnotationRepository:
"""Integration tests for SqlAlchemyAnnotationRepository."""
async def test_add_and_get_annotation(self, session: AsyncSession) -> None:
"""Test adding and retrieving annotation."""
meeting_repo = SqlAlchemyMeetingRepository(session)
annotation_repo = SqlAlchemyAnnotationRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
annotation = Annotation(
id=AnnotationId(uuid4()),
meeting_id=meeting.id,
annotation_type=AnnotationType.NOTE,
text="Decision made",
start_time=1.0,
end_time=2.0,
segment_ids=[0],
)
await annotation_repo.add(annotation)
await session.commit()
retrieved = await annotation_repo.get(annotation.id)
assert retrieved is not None
assert retrieved.text == "Decision made"
assert retrieved.segment_ids == [0]
async def test_get_by_meeting_ordered(self, session: AsyncSession) -> None:
"""Test annotations returned in start_time order."""
meeting_repo = SqlAlchemyMeetingRepository(session)
annotation_repo = SqlAlchemyAnnotationRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
a1 = Annotation(
id=AnnotationId(uuid4()),
meeting_id=meeting.id,
annotation_type=AnnotationType.NOTE,
text="Second",
start_time=2.0,
end_time=3.0,
)
a2 = Annotation(
id=AnnotationId(uuid4()),
meeting_id=meeting.id,
annotation_type=AnnotationType.NOTE,
text="First",
start_time=1.0,
end_time=2.0,
)
await annotation_repo.add(a1)
await annotation_repo.add(a2)
await session.commit()
result = await annotation_repo.get_by_meeting(meeting.id)
assert [a.text for a in result] == ["First", "Second"]
async def test_get_by_time_range_inclusive(self, session: AsyncSession) -> None:
"""Test time range query includes boundary overlaps."""
meeting_repo = SqlAlchemyMeetingRepository(session)
annotation_repo = SqlAlchemyAnnotationRepository(session)
meeting = Meeting.create()
await meeting_repo.create(meeting)
await session.commit()
a1 = Annotation(
id=AnnotationId(uuid4()),
meeting_id=meeting.id,
annotation_type=AnnotationType.NOTE,
text="Ends at boundary",
start_time=0.0,
end_time=1.0,
)
a2 = Annotation(
id=AnnotationId(uuid4()),
meeting_id=meeting.id,
annotation_type=AnnotationType.NOTE,
text="Starts at boundary",
start_time=1.0,
end_time=2.0,
)
await annotation_repo.add(a1)
await annotation_repo.add(a2)
await session.commit()
result = await annotation_repo.get_by_time_range(meeting.id, start_time=1.0, end_time=1.0)
assert {a.text for a in result} == {"Ends at boundary", "Starts at boundary"}
async def test_update_annotation_not_found_raises(self, session: AsyncSession) -> None:
"""Test update raises when annotation does not exist."""
annotation_repo = SqlAlchemyAnnotationRepository(session)
annotation = Annotation(
id=AnnotationId(uuid4()),
meeting_id=MeetingId(uuid4()),
annotation_type=AnnotationType.NOTE,
text="Missing",
start_time=0.0,
end_time=1.0,
)
with pytest.raises(ValueError, match=r"Annotation .* not found"):
await annotation_repo.update(annotation)
async def test_delete_annotation_not_found(self, session: AsyncSession) -> None:
"""Test deleting unknown annotation returns False."""
annotation_repo = SqlAlchemyAnnotationRepository(session)
result = await annotation_repo.delete(AnnotationId(uuid4()))
assert result is False
File: src/noteflow/client/components/init.py
"""UI components for NoteFlow client.
All components use existing types and utilities - no recreation.
"""
from noteflow.client.components._async_mixin import AsyncOperationMixin
from noteflow.client.components._thread_mixin import BackgroundWorkerMixin
from noteflow.client.components.annotation_display import AnnotationDisplayComponent
from noteflow.client.components.annotation_toolbar import AnnotationToolbarComponent
from noteflow.client.components.connection_panel import ConnectionPanelComponent
from noteflow.client.components.meeting_library import MeetingLibraryComponent
from noteflow.client.components.playback_controls import PlaybackControlsComponent
from noteflow.client.components.playback_sync import PlaybackSyncController
from noteflow.client.components.recording_timer import RecordingTimerComponent
from noteflow.client.components.summary_panel import SummaryPanelComponent
from noteflow.client.components.transcript import TranscriptComponent
from noteflow.client.components.vu_meter import VuMeterComponent
__all__ = [
"AnnotationDisplayComponent",
"AnnotationToolbarComponent",
"AsyncOperationMixin",
"BackgroundWorkerMixin",
"ConnectionPanelComponent",
"MeetingLibraryComponent",
"PlaybackControlsComponent",
"PlaybackSyncController",
"RecordingTimerComponent",
"SummaryPanelComponent",
"TranscriptComponent",
"VuMeterComponent",
]
File: src/noteflow/client/app.py
"""NoteFlow Flet client application.
Captures audio locally and streams to NoteFlow gRPC server for transcription.
Orchestrates UI components - does not contain component logic.
"""
from __future__ import annotations
import argparse
import asyncio
import logging
import queue
import threading
import time
from typing import TYPE_CHECKING, Final
import flet as ft
from noteflow.application.services import TriggerService
from noteflow.client._trigger_mixin import TriggerMixin
from noteflow.client.components import (
AnnotationDisplayComponent,
AnnotationToolbarComponent,
ConnectionPanelComponent,
MeetingLibraryComponent,
PlaybackControlsComponent,
PlaybackSyncController,
RecordingTimerComponent,
SummaryPanelComponent,
TranscriptComponent,
VuMeterComponent,
)
from noteflow.client.state import AppState
from noteflow.config.constants import DEFAULT_SAMPLE_RATE
from noteflow.config.settings import TriggerSettings, get_settings
from noteflow.infrastructure.audio import (
MeetingAudioReader,
PlaybackState,
SoundDeviceCapture,
TimestampedAudio,
)
from noteflow.infrastructure.security import AesGcmCryptoBox, KeyringKeyStore
from noteflow.infrastructure.summarization import create_summarization_service
if TYPE_CHECKING:
import numpy as np
from numpy.typing import NDArray
from noteflow.application.services.summarization_service import SummarizationService
from noteflow.grpc.client import (
AnnotationInfo,
MeetingInfo,
NoteFlowClient,
ServerInfo,
TranscriptSegment,
)
from noteflow.infrastructure.triggers import AppAudioProvider, CalendarProvider
logger = logging.getLogger(__name__)
DEFAULT_SERVER: Final[str] = "localhost:50051"
class NoteFlowClientApp(TriggerMixin):
"""Flet client application for NoteFlow.
Orchestrates UI components and recording logic.
Inherits trigger detection from TriggerMixin.
"""
def __init__(self, server_address: str = DEFAULT_SERVER) -> None:
"""Initialize the app.
Args:
server_address: NoteFlow server address.
"""
# Centralized state
self._state = AppState(server_address=server_address)
# Audio capture (REUSE existing SoundDeviceCapture)
self._audio_capture: SoundDeviceCapture | None = None
# Client reference (managed by ConnectionPanelComponent)
self._client: NoteFlowClient | None = None
# UI components (initialized in _build_ui)
self._connection_panel: ConnectionPanelComponent | None = None
self._vu_meter: VuMeterComponent | None = None
self._timer: RecordingTimerComponent | None = None
self._transcript: TranscriptComponent | None = None
self._playback_controls: PlaybackControlsComponent | None = None
self._sync_controller: PlaybackSyncController | None = None
self._annotation_toolbar: AnnotationToolbarComponent | None = None
# Meeting library (M4)
self._meeting_library: MeetingLibraryComponent | None = None
# Summarization (M6)
self._summarization_service: SummarizationService | None = None
self._summary_panel: SummaryPanelComponent | None = None
# Annotation display for review mode (M4)
self._annotation_display: AnnotationDisplayComponent | None = None
# Audio reader for archived meetings (M4)
self._audio_reader: MeetingAudioReader | None = None
# Trigger detection (M5)
self._trigger_settings: TriggerSettings | None = None
self._trigger_service: TriggerService | None = None
self._app_audio: AppAudioProvider | None = None
self._calendar_provider: CalendarProvider | None = None
self._trigger_poll_interval: float = 0.0
self._trigger_task: asyncio.Task | None = None
# Recording buttons
self._record_btn: ft.ElevatedButton | None = None
self._stop_btn: ft.ElevatedButton | None = None
# Audio frame consumer thread (process frames from audio callback thread)
self._audio_frame_queue: queue.Queue[tuple[NDArray[np.float32], float]] = queue.Queue()
self._audio_consumer_stop = threading.Event()
self._audio_consumer_thread: threading.Thread | None = None
def run(self) -> None:
"""Run the Flet application."""
ft.app(target=self._main)
def _main(self, page: ft.Page) -> None:
"""Flet app entry point.
Args:
page: Flet page.
"""
self._state.set_page(page)
page.title = "NoteFlow Client"
page.window.width = 800
page.window.height = 600
page.padding = 20
page.add(self._build_ui())
page.update()
# Initialize trigger detection (M5)
self._initialize_triggers()
# Start trigger check loop if enabled (opt-in via settings)
if self._state.trigger_enabled:
self._trigger_task = page.run_task(self._trigger_check_loop)
# Ensure background tasks are cancelled when the UI closes
page.on_disconnect = lambda _e: self._shutdown()
def _build_ui(self) -> ft.Column:
"""Build the main UI by composing components.
Returns:
Main UI column.
"""
# Create components with state
self._connection_panel = ConnectionPanelComponent(
state=self._state,
on_connected=self._on_connected,
on_disconnected=self._on_disconnected,
on_transcript_callback=self._on_transcript,
on_connection_change_callback=self._on_connection_change,
)
self._vu_meter = VuMeterComponent(state=self._state)
self._timer = RecordingTimerComponent(state=self._state)
# Transcript with click handler for playback sync
self._transcript = TranscriptComponent(
state=self._state,
on_segment_click=self._on_segment_click,
)
# Playback controls and sync
self._playback_controls = PlaybackControlsComponent(
state=self._state,
on_position_change=self._on_playback_position_change,
)
self._sync_controller = PlaybackSyncController(
state=self._state,
on_highlight_change=self._on_highlight_change,
)
# Annotation toolbar
self._annotation_toolbar = AnnotationToolbarComponent(
state=self._state,
get_client=lambda: self._client,
)
# Annotation display for review mode
self._annotation_display = AnnotationDisplayComponent(
state=self._state,
on_annotation_seek=self._on_annotation_seek,
)
# Meeting library (M4)
self._meeting_library = MeetingLibraryComponent(
state=self._state,
get_client=lambda: self._client,
on_meeting_selected=self._on_meeting_selected,
)
# Initialize summarization service - auto-detects LOCAL/MOCK providers
self._summarization_service = create_summarization_service()
# Summary panel
self._summary_panel = SummaryPanelComponent(
state=self._state,
get_service=lambda: self._summarization_service,
on_citation_click=self._on_citation_click,
)
# Recording controls (still in app.py - orchestration)
self._record_btn = ft.ElevatedButton(
"Start Recording",
on_click=self._on_record_click,
icon=ft.Icons.MIC,
disabled=True,
)
self._stop_btn = ft.ElevatedButton(
"Stop",
on_click=self._on_stop_click,
icon=ft.Icons.STOP,
disabled=True,
)
recording_row = ft.Row([self._record_btn, self._stop_btn])
# Main layout - compose component builds
return ft.Column(
[
ft.Text("NoteFlow Client", size=24, weight=ft.FontWeight.BOLD),
ft.Divider(),
self._connection_panel.build(),
ft.Divider(),
recording_row,
self._vu_meter.build(),
self._timer.build(),
self._annotation_toolbar.build(),
self._annotation_display.build(),
ft.Divider(),
ft.Text("Transcript:", size=16, weight=ft.FontWeight.BOLD),
self._transcript.build(),
self._playback_controls.build(),
ft.Divider(),
self._summary_panel.build(),
ft.Divider(),
ft.Text("Meeting Library:", size=16, weight=ft.FontWeight.BOLD),
self._meeting_library.build(),
],
spacing=10,
)
def _ensure_audio_reader(self) -> MeetingAudioReader | None:
"""Lazily initialize MeetingAudioReader (for review playback)."""
if self._audio_reader:
return self._audio_reader
try:
settings = get_settings()
keystore = KeyringKeyStore()
crypto = AesGcmCryptoBox(keystore)
self._audio_reader = MeetingAudioReader(crypto, settings.meetings_dir)
except (OSError, ValueError, KeyError, RuntimeError) as exc:
logger.exception("Failed to initialize meeting audio reader: %s", exc)
self._audio_reader = None
return self._audio_reader
def _load_meeting_audio(self, meeting: MeetingInfo) -> list[TimestampedAudio]:
"""Load archived audio for a meeting, if available."""
reader = self._ensure_audio_reader()
if not reader:
return []
try:
if not reader.audio_exists(meeting.id):
logger.info("No archived audio for meeting %s", meeting.id)
return []
return reader.load_meeting_audio(meeting.id)
except FileNotFoundError:
logger.info("Audio file missing for meeting %s", meeting.id)
return []
except (OSError, ValueError, RuntimeError) as exc:
logger.exception("Failed to load audio for meeting %s: %s", meeting.id, exc)
return []
def _ensure_audio_capture(self) -> bool:
"""Start audio capture if needed.
Returns:
True if audio capture is running, False if start failed.
"""
if self._audio_capture:
return True
try:
self._audio_capture = SoundDeviceCapture()
self._audio_capture.start(
device_id=None,
on_frames=self._on_audio_frames,
sample_rate=DEFAULT_SAMPLE_RATE,
channels=1,
chunk_duration_ms=100,
)
except (RuntimeError, OSError) as exc:
logger.exception("Failed to start audio capture: %s", exc)
self._audio_capture = None
return False
return True
def _on_connected(self, client: NoteFlowClient, info: ServerInfo) -> None:
"""Handle successful connection.
Args:
client: Connected NoteFlowClient.
info: Server info.
"""
self._client = client
if self._transcript:
self._transcript.display_server_info(info)
if (
self._state.recording
and self._state.current_meeting
and not self._client.start_streaming(self._state.current_meeting.id)
):
logger.error("Failed to resume streaming after reconnect")
self._stop_recording()
self._update_recording_buttons()
# Refresh meeting library on connection
if self._meeting_library:
self._meeting_library.refresh_meetings()
def _on_disconnected(self) -> None:
"""Handle disconnection."""
self._shutdown()
if self._state.recording:
self._stop_recording()
self._client = None
self._update_recording_buttons()
def _on_connection_change(self, _connected: bool, _message: str) -> None:
"""Handle connection state change from client.
Args:
connected: Connection state.
message: Status message.
"""
self._update_recording_buttons()
def _on_transcript(self, segment: TranscriptSegment) -> None:
"""Handle transcript update callback.
Args:
segment: Transcript segment from server.
"""
if self._transcript:
self._transcript.add_segment(segment)
self._ensure_summary_panel_ready()
def _on_record_click(self, e: ft.ControlEvent) -> None:
"""Handle record button click.
Args:
e: Control event.
"""
self._start_recording()
def _on_stop_click(self, e: ft.ControlEvent) -> None:
"""Handle stop button click.
Args:
e: Control event.
"""
self._stop_recording()
def _start_recording(self) -> None:
"""Start recording audio."""
if not self._client or not self._state.connected:
return
# Create meeting
meeting = self._client.create_meeting(title=f"Recording {time.strftime('%Y-%m-%d %H:%M')}")
if not meeting:
logger.error("Failed to create meeting")
return
self._state.current_meeting = meeting
# Make summary panel visible once we have meeting context
self._ensure_summary_panel_ready()
# Start streaming
if not self._client.start_streaming(meeting.id):
logger.error("Failed to start streaming")
self._client.stop_meeting(meeting.id)
self._state.current_meeting = None
return
# Start audio capture (reuse existing capture if already running)
if not self._ensure_audio_capture():
self._client.stop_streaming()
self._client.stop_meeting(meeting.id)
self._state.reset_recording_state()
self._update_recording_buttons()
return
self._state.recording = True
# Start audio frame consumer thread
self._start_audio_consumer()
# Clear audio buffer for new recording
self._state.session_audio_buffer.clear()
# Start timer
if self._timer:
self._timer.start()
# Clear transcript
if self._transcript:
self._transcript.clear()
# Enable annotation toolbar
if self._annotation_toolbar:
self._annotation_toolbar.set_visible(True)
self._annotation_toolbar.set_enabled(True)
self._update_recording_buttons()
def _stop_recording(self) -> None:
"""Stop recording audio."""
# Stop audio frame consumer thread
self._stop_audio_consumer()
# Stop audio capture
if self._audio_capture and not self._should_keep_capture_running():
self._audio_capture.stop()
self._audio_capture = None
# Stop streaming
if self._client:
self._client.stop_streaming()
# Stop meeting
if self._state.current_meeting:
self._client.stop_meeting(self._state.current_meeting.id)
# Load buffered audio for playback
if self._state.session_audio_buffer and self._playback_controls:
self._playback_controls.load_audio()
self._playback_controls.set_visible(True)
# Start sync controller for playback
if self._sync_controller:
self._sync_controller.start()
# Keep annotation toolbar visible for playback annotations
if self._annotation_toolbar:
self._annotation_toolbar.set_enabled(True)
# Ensure summary panel reflects current data after recording ends
self._ensure_summary_panel_ready()
# Reset recording state (but keep meeting/transcript for playback)
self._state.recording = False
# Stop timer
if self._timer:
self._timer.stop()
self._update_recording_buttons()
def _on_audio_frames(
self,
frames: NDArray[np.float32],
timestamp: float,
) -> None:
"""Handle audio frames from capture (called from audio thread).
Enqueues frames for processing by consumer thread to avoid blocking
the real-time audio callback.
Args:
frames: Audio samples.
timestamp: Capture timestamp.
"""
self._audio_frame_queue.put_nowait((frames.copy(), timestamp))
def _start_audio_consumer(self) -> None:
"""Start the audio frame consumer thread."""
if self._audio_consumer_thread is not None and self._audio_consumer_thread.is_alive():
return
self._audio_consumer_stop.clear()
self._audio_consumer_thread = threading.Thread(
target=self._audio_consumer_loop,
daemon=True,
name="audio-consumer",
)
self._audio_consumer_thread.start()
def _stop_audio_consumer(self) -> None:
"""Stop the audio frame consumer thread."""
self._audio_consumer_stop.set()
if self._audio_consumer_thread is not None:
self._audio_consumer_thread.join(timeout=1.0)
self._audio_consumer_thread = None
# Drain remaining frames
while not self._audio_frame_queue.empty():
try:
self._audio_frame_queue.get_nowait()
except queue.Empty:
break
def _audio_consumer_loop(self) -> None:
"""Consumer loop that processes audio frames from the queue."""
while not self._audio_consumer_stop.is_set():
try:
frames, timestamp = self._audio_frame_queue.get(timeout=0.1)
self._process_audio_frames(frames, timestamp)
except queue.Empty:
continue
def _process_audio_frames(
self,
frames: NDArray[np.float32],
timestamp: float,
) -> None:
"""Process audio frames from consumer thread.
Args:
frames: Audio samples.
timestamp: Capture timestamp.
"""
# Send to server
if self._client and self._state.recording:
self._client.send_audio(frames, timestamp)
# Buffer for playback
if self._state.recording:
duration = len(frames) / DEFAULT_SAMPLE_RATE
self._state.session_audio_buffer.append(
TimestampedAudio(frames=frames, timestamp=timestamp, duration=duration)
)
# Update VU meter
if self._vu_meter:
self._vu_meter.on_audio_frames(frames)
# Trigger detection uses system output + calendar; no mic-derived updates here.
def _on_segment_click(self, segment_index: int) -> None:
"""Handle transcript segment click - seek playback to segment.
Args:
segment_index: Index of clicked segment.
"""
if self._sync_controller:
self._sync_controller.seek_to_segment(segment_index)
def _on_citation_click(self, segment_id: int) -> None:
"""Handle citation chip click - seek to segment by segment_id.
Args:
segment_id: Segment ID from citation.
"""
# Find segment index by segment_id
for idx, seg in enumerate(self._state.transcript_segments):
if seg.segment_id == segment_id:
self._on_segment_click(idx)
break
def _on_annotation_seek(self, timestamp: float) -> None:
"""Handle annotation click - seek to timestamp.
Args:
timestamp: Timestamp in seconds to seek to.
"""
if self._playback_controls:
self._playback_controls.seek(timestamp)
def _on_meeting_selected(self, meeting: MeetingInfo) -> None:
"""Handle meeting selection from library.
Loads transcript segments, annotations, and prepares for playback review.
Args:
meeting: Selected meeting info.
"""
if not self._client:
return
# 1. Stop any existing playback
if self._state.playback.state != PlaybackState.STOPPED:
self._state.playback.stop()
if self._sync_controller:
self._sync_controller.stop()
# Capture client reference for closure (may run in background thread)
client = self._client
def load_and_apply() -> None:
if not client:
return
try:
segments = client.get_meeting_segments(meeting.id)
annotations = client.list_annotations(meeting.id)
audio_chunks = self._load_meeting_audio(meeting)
except (ConnectionError, ValueError, OSError, RuntimeError) as exc:
logger.exception("Failed to load meeting %s: %s", meeting.id, exc)
return
# Apply results on UI thread to avoid race conditions
self._state.run_on_ui_thread(
lambda: self._apply_meeting_data(meeting, segments, annotations, audio_chunks)
)
page = self._state._page
if page and hasattr(page, "run_thread"):
page.run_thread(load_and_apply)
else:
load_and_apply()
def _apply_meeting_data(
self,
meeting: MeetingInfo,
segments: list[TranscriptSegment],
annotations: list[AnnotationInfo],
audio_chunks: list[TimestampedAudio],
) -> None:
"""Apply loaded meeting data to state and UI (UI thread only)."""
# Clear state and UI before populating with fresh data
self._state.clear_transcript()
self._state.annotations.clear()
self._state.current_summary = None
self._state.highlighted_segment_index = None
self._state.clear_session_audio()
if self._transcript:
self._transcript.clear()
if self._annotation_display:
self._annotation_display.clear()
# Populate transcript
if self._transcript:
for segment in segments:
self._transcript.add_segment(segment)
# Populate annotations
self._state.annotations = annotations
if self._annotation_display:
self._annotation_display.load_annotations(annotations)
# Update meeting state
self._state.current_meeting = meeting
self._state.selected_meeting = meeting
# Enable annotation toolbar for adding new annotations
if self._annotation_toolbar:
self._annotation_toolbar.set_visible(True)
self._annotation_toolbar.set_enabled(True)
# Load audio for playback if available
if audio_chunks:
self._state.session_audio_buffer = audio_chunks
if self._playback_controls:
self._playback_controls.load_audio()
self._playback_controls.set_visible(True)
else:
# Hide controls when no audio is available
if self._playback_controls:
self._playback_controls.set_visible(False)
self._state.playback.stop()
self._state.playback_position = 0.0
# Update summary panel visibility/enabled state
self._ensure_summary_panel_ready()
# Start sync controller for playback highlighting
if self._sync_controller:
self._sync_controller.start()
logger.info(
"Loaded meeting: %s (%d segments, %d annotations, %d audio chunks)",
meeting.title,
len(segments),
len(annotations),
len(audio_chunks),
)
def _ensure_summary_panel_ready(self) -> None:
"""Update summary panel visibility/enabled state based on data availability."""
if not self._summary_panel:
return
has_meeting = self._state.current_meeting is not None
has_segments = bool(self._state.transcript_segments)
# Visible once there is a meeting context; enabled when segments exist.
self._summary_panel.set_visible(has_meeting or has_segments)
self._summary_panel.set_enabled(has_segments and not self._state.summary_loading)
def _on_highlight_change(self, index: int | None) -> None:
"""Handle highlight change from sync controller.
Args:
index: Segment index to highlight, or None to clear.
"""
if self._transcript:
self._transcript.update_highlight(index)
def _on_playback_position_change(self, position: float) -> None:
"""Handle playback position change.
Args:
position: Current playback position in seconds.
"""
# Sync controller handles segment matching internally
_ = position # Position tracked in state
def _shutdown(self) -> None:
"""Stop background tasks and capture started for triggers."""
if self._trigger_task:
self._trigger_task.cancel()
self._trigger_task = None
# Stop audio consumer if running
self._stop_audio_consumer()
if self._app_audio:
self._app_audio.close()
if self._audio_capture and not self._state.recording:
try:
self._audio_capture.stop()
except RuntimeError:
logger.debug("Error stopping audio capture during shutdown", exc_info=True)
self._audio_capture = None
def _update_recording_buttons(self) -> None:
"""Update recording button states."""
if self._record_btn:
self._record_btn.disabled = not self._state.connected or self._state.recording
if self._stop_btn:
self._stop_btn.disabled = not self._state.recording
self._state.request_update()
def main() -> None:
"""Run the NoteFlow client application."""
parser = argparse.ArgumentParser(description="NoteFlow Client")
parser.add_argument(
"-s",
"--server",
type=str,
default=DEFAULT_SERVER,
help=f"Server address (default: {DEFAULT_SERVER})",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable verbose logging",
)
args = parser.parse_args()
# Configure logging
log_level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(
level=log_level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
# Run app
app = NoteFlowClientApp(server_address=args.server)
app.run()
if __name__ == "__main__":
main()
File: src/noteflow/grpc/service.py
"""NoteFlow gRPC service implementation (async with UoW)."""
from __future__ import annotations
import asyncio
import logging
import struct
import time
from collections.abc import AsyncIterator
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar, Final
from uuid import UUID, uuid4
import grpc.aio
import numpy as np
from numpy.typing import NDArray
from noteflow.application.services.export_service import ExportFormat, ExportService
from noteflow.application.services.summarization_service import SummarizationService
from noteflow.config.constants import DEFAULT_SAMPLE_RATE as _DEFAULT_SAMPLE_RATE
from noteflow.domain.entities import Annotation, Meeting, Segment, Summary
from noteflow.domain.summarization import ProviderUnavailableError
from noteflow.domain.value_objects import AnnotationId, AnnotationType, MeetingId, MeetingState
from noteflow.infrastructure.asr import Segmenter, SegmenterConfig, StreamingVad
from noteflow.infrastructure.audio.reader import MeetingAudioReader
from noteflow.infrastructure.audio.writer import MeetingAudioWriter
from noteflow.infrastructure.converters import AsrConverter
from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker
from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork
from noteflow.infrastructure.security.crypto import AesGcmCryptoBox
from noteflow.infrastructure.security.keystore import KeyringKeyStore
from .meeting_store import MeetingStore
from .proto import noteflow_pb2, noteflow_pb2_grpc
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from noteflow.infrastructure.asr import FasterWhisperEngine
from noteflow.infrastructure.asr.dto import AsrResult
from noteflow.infrastructure.diarization import DiarizationEngine
logger = logging.getLogger(__name__)
@dataclass
class _StreamSessionInit:
"""Result of stream session initialization."""
next_segment_id: int
error_code: int | None = None
error_message: str | None = None
@property
def success(self) -> bool:
"""Check if initialization succeeded."""
return self.error_code is None
@dataclass
class _DiarizationJob:
"""Track background diarization job state."""
job_id: str
meeting_id: str
status: int
segments_updated: int = 0
speaker_ids: list[str] = field(default_factory=list)
error_message: str = ""
created_at: float = field(default_factory=time.time)
updated_at: float = field(default_factory=time.time)
task: asyncio.Task[None] | None = None
def to_proto(self) -> noteflow_pb2.DiarizationJobStatus:
return noteflow_pb2.DiarizationJobStatus(
job_id=self.job_id,
status=self.status,
segments_updated=self.segments_updated,
speaker_ids=self.speaker_ids,
error_message=self.error_message,
)
class NoteFlowServicer(noteflow_pb2_grpc.NoteFlowServiceServicer):
"""Async gRPC service implementation for NoteFlow with PostgreSQL persistence."""
VERSION: Final[str] = "0.2.0"
MAX_CHUNK_SIZE: Final[int] = 1024 * 1024 # 1MB
DEFAULT_SAMPLE_RATE: Final[int] = _DEFAULT_SAMPLE_RATE
SUPPORTED_SAMPLE_RATES: ClassVar[list[int]] = [16000, 44100, 48000]
PARTIAL_CADENCE_SECONDS: Final[float] = 2.0 # Emit partials every 2 seconds
MIN_PARTIAL_AUDIO_SECONDS: Final[float] = 0.5 # Minimum audio for partial inference
def __init__(
self,
asr_engine: FasterWhisperEngine | None = None,
session_factory: async_sessionmaker[AsyncSession] | None = None,
meetings_dir: Path | None = None,
summarization_service: SummarizationService | None = None,
diarization_engine: DiarizationEngine | None = None,
) -> None:
"""Initialize the service.
Args:
asr_engine: Optional ASR engine.
session_factory: Optional async session factory for database persistence.
If not provided, falls back to in-memory MeetingStore.
meetings_dir: Optional directory for meeting audio storage.
Defaults to ~/.noteflow/meetings.
summarization_service: Optional summarization service for generating summaries.
diarization_engine: Optional diarization engine for speaker identification.
"""
self._asr_engine = asr_engine
self._session_factory = session_factory
self._summarization_service = summarization_service
self._diarization_engine = diarization_engine
self._start_time = time.time()
# Fallback to in-memory store if no database configured
self._memory_store: MeetingStore | None = (
MeetingStore() if session_factory is None else None
)
# Audio writing infrastructure
self._meetings_dir = meetings_dir or (Path.home() / ".noteflow" / "meetings")
self._keystore = KeyringKeyStore()
self._crypto = AesGcmCryptoBox(self._keystore)
self._audio_writers: dict[str, MeetingAudioWriter] = {}
# VAD and segmentation state per meeting
self._vad_instances: dict[str, StreamingVad] = {}
self._segmenters: dict[str, Segmenter] = {}
self._was_speaking: dict[str, bool] = {}
self._segment_counters: dict[str, int] = {}
self._stream_formats: dict[str, tuple[int, int]] = {}
self._active_streams: set[str] = set()
# Partial transcription state per meeting
self._partial_buffers: dict[str, list[NDArray[np.float32]]] = {}
self._last_partial_time: dict[str, float] = {}
self._last_partial_text: dict[str, str] = {}
# Streaming diarization state per meeting
self._diarization_turns: dict[str, list[SpeakerTurn]] = {}
self._diarization_stream_time: dict[str, float] = {}
self._diarization_streaming_failed: set[str] = set()
# Track audio write failures to avoid log spam
self._audio_write_failed: set[str] = set()
# Background diarization jobs
self._diarization_jobs: dict[str, _DiarizationJob] = {}
@property
def asr_engine(self) -> FasterWhisperEngine | None:
"""Get the ASR engine."""
return self._asr_engine
def set_asr_engine(self, engine: FasterWhisperEngine) -> None:
"""Set the ASR engine."""
self._asr_engine = engine
@property
def diarization_engine(self) -> DiarizationEngine | None:
"""Get the diarization engine."""
return self._diarization_engine
def set_diarization_engine(self, engine: DiarizationEngine) -> None:
"""Set the diarization engine."""
self._diarization_engine = engine
def _use_database(self) -> bool:
"""Check if database persistence is configured."""
return self._session_factory is not None
def _get_memory_store(self) -> MeetingStore:
"""Get the in-memory store, raising if not configured."""
if self._memory_store is None:
raise RuntimeError("Memory store not configured")
return self._memory_store
def _create_uow(self) -> SqlAlchemyUnitOfWork:
"""Create a new Unit of Work."""
if self._session_factory is None:
raise RuntimeError("Database not configured")
return SqlAlchemyUnitOfWork(self._session_factory)
def _init_streaming_state(self, meeting_id: str, next_segment_id: int) -> None:
"""Initialize VAD, Segmenter, speaking state, and partial buffers for a meeting."""
self._vad_instances[meeting_id] = StreamingVad()
self._segmenters[meeting_id] = Segmenter(
config=SegmenterConfig(sample_rate=self.DEFAULT_SAMPLE_RATE)
)
self._was_speaking[meeting_id] = False
self._segment_counters[meeting_id] = next_segment_id
self._partial_buffers[meeting_id] = []
self._last_partial_time[meeting_id] = time.time()
self._last_partial_text[meeting_id] = ""
self._diarization_turns[meeting_id] = []
self._diarization_stream_time[meeting_id] = 0.0
self._diarization_streaming_failed.discard(meeting_id)
if self._diarization_engine is not None:
self._diarization_engine.reset_streaming()
def _cleanup_streaming_state(self, meeting_id: str) -> None:
"""Clean up VAD, Segmenter, speaking state, and partial buffers for a meeting."""
self._vad_instances.pop(meeting_id, None)
self._segmenters.pop(meeting_id, None)
self._was_speaking.pop(meeting_id, None)
self._segment_counters.pop(meeting_id, None)
self._stream_formats.pop(meeting_id, None)
self._partial_buffers.pop(meeting_id, None)
self._last_partial_time.pop(meeting_id, None)
self._last_partial_text.pop(meeting_id, None)
self._diarization_turns.pop(meeting_id, None)
self._diarization_stream_time.pop(meeting_id, None)
self._diarization_streaming_failed.discard(meeting_id)
def _ensure_meeting_dek(self, meeting: Meeting) -> tuple[bytes, bytes, bool]:
"""Ensure meeting has a DEK, generating one if needed.
Args:
meeting: Meeting entity.
Returns:
Tuple of (dek, wrapped_dek, needs_update).
"""
if meeting.wrapped_dek is None:
dek = self._crypto.generate_dek()
wrapped_dek = self._crypto.wrap_dek(dek)
meeting.wrapped_dek = wrapped_dek
return dek, wrapped_dek, True
wrapped_dek = meeting.wrapped_dek
dek = self._crypto.unwrap_dek(wrapped_dek)
return dek, wrapped_dek, False
def _start_meeting_if_needed(self, meeting: Meeting) -> tuple[bool, str | None]:
"""Start recording on meeting if not already recording.
Args:
meeting: Meeting entity.
Returns:
Tuple of (needs_update, error_message).
"""
if meeting.state == MeetingState.RECORDING:
return False, None
try:
meeting.start_recording()
return True, None
except ValueError as e:
return False, str(e)
def _open_meeting_audio_writer(
self,
meeting_id: str,
dek: bytes,
wrapped_dek: bytes,
) -> None:
"""Open audio writer for a meeting.
Args:
meeting_id: Meeting ID string.
dek: Data encryption key.
wrapped_dek: Wrapped DEK.
"""
writer = MeetingAudioWriter(self._crypto, self._meetings_dir)
writer.open(
meeting_id=meeting_id,
dek=dek,
wrapped_dek=wrapped_dek,
sample_rate=self.DEFAULT_SAMPLE_RATE,
)
self._audio_writers[meeting_id] = writer
logger.info("Audio writer opened for meeting %s", meeting_id)
async def _init_stream_session_db(self, meeting_id: str) -> _StreamSessionInit:
"""Initialize stream session using database persistence.
Args:
meeting_id: Meeting ID string.
Returns:
Stream session initialization result.
"""
async with self._create_uow() as uow:
meeting = await uow.meetings.get(MeetingId(UUID(meeting_id)))
if meeting is None:
return _StreamSessionInit(
next_segment_id=0,
error_code=grpc.StatusCode.NOT_FOUND,
error_message=f"Meeting {meeting_id} not found",
)
dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting)
recording_updated, error_msg = self._start_meeting_if_needed(meeting)
if error_msg:
return _StreamSessionInit(
next_segment_id=0,
error_code=grpc.StatusCode.INVALID_ARGUMENT,
error_message=error_msg,
)
if dek_updated or recording_updated:
await uow.meetings.update(meeting)
await uow.commit()
next_segment_id = await uow.segments.get_next_segment_id(meeting.id)
self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek)
self._init_streaming_state(meeting_id, next_segment_id)
return _StreamSessionInit(next_segment_id=next_segment_id)
def _init_stream_session_memory(self, meeting_id: str) -> _StreamSessionInit:
"""Initialize stream session using in-memory store.
Args:
meeting_id: Meeting ID string.
Returns:
Stream session initialization result.
"""
store = self._get_memory_store()
meeting = store.get(meeting_id)
if meeting is None:
return _StreamSessionInit(
next_segment_id=0,
error_code=grpc.StatusCode.NOT_FOUND,
error_message=f"Meeting {meeting_id} not found",
)
dek, wrapped_dek, dek_updated = self._ensure_meeting_dek(meeting)
recording_updated, error_msg = self._start_meeting_if_needed(meeting)
if error_msg:
return _StreamSessionInit(
next_segment_id=0,
error_code=grpc.StatusCode.INVALID_ARGUMENT,
error_message=error_msg,
)
if dek_updated or recording_updated:
store.update(meeting)
next_segment_id = meeting.next_segment_id
self._open_meeting_audio_writer(meeting_id, dek, wrapped_dek)
self._init_streaming_state(meeting_id, next_segment_id)
return _StreamSessionInit(next_segment_id=next_segment_id)
def _next_segment_id(self, meeting_id: str, fallback: int = 0) -> int:
"""Get and increment the next segment id for a meeting."""
next_id = self._segment_counters.get(meeting_id)
if next_id is None:
next_id = fallback
self._segment_counters[meeting_id] = next_id + 1
return next_id
def _normalize_stream_format(
self,
meeting_id: str,
sample_rate: int,
channels: int,
) -> tuple[int, int]:
"""Validate and persist stream audio format for a meeting."""
normalized_rate = sample_rate or self.DEFAULT_SAMPLE_RATE
normalized_channels = channels or 1
if normalized_rate not in self.SUPPORTED_SAMPLE_RATES:
raise ValueError(
"Unsupported sample_rate "
f"{normalized_rate}; supported: {self.SUPPORTED_SAMPLE_RATES}"
)
if normalized_channels < 1:
raise ValueError("channels must be >= 1")
existing = self._stream_formats.get(meeting_id)
if existing and existing != (normalized_rate, normalized_channels):
raise ValueError("Stream audio format cannot change mid-stream")
self._stream_formats.setdefault(meeting_id, (normalized_rate, normalized_channels))
return normalized_rate, normalized_channels
def _convert_audio_format(
self,
audio: NDArray[np.float32],
sample_rate: int,
channels: int,
) -> NDArray[np.float32]:
"""Downmix/resample audio to the server's expected format."""
if channels > 1:
if audio.size % channels != 0:
raise ValueError("Audio buffer size is not divisible by channel count")
audio = audio.reshape(-1, channels).mean(axis=1)
if sample_rate != self.DEFAULT_SAMPLE_RATE:
audio = self._resample_audio(audio, sample_rate, self.DEFAULT_SAMPLE_RATE)
return audio
@staticmethod
def _resample_audio(
audio: NDArray[np.float32],
src_rate: int,
dst_rate: int,
) -> NDArray[np.float32]:
"""Resample audio using linear interpolation."""
if src_rate == dst_rate or audio.size == 0:
return audio
ratio = dst_rate / src_rate
new_length = round(audio.shape[0] * ratio)
if new_length <= 0:
return np.array([], dtype=np.float32)
old_indices = np.arange(audio.shape[0])
new_indices = np.arange(new_length) / ratio
return np.interp(new_indices, old_indices, audio).astype(np.float32)
def _close_audio_writer(self, meeting_id: str) -> None:
"""Close and remove the audio writer for a meeting."""
# Clean up write failure tracking
self._audio_write_failed.discard(meeting_id)
if meeting_id not in self._audio_writers:
return
try:
writer = self._audio_writers.pop(meeting_id)
writer.close()
logger.info(
"Audio writer closed for meeting %s: %d bytes written",
meeting_id,
writer.bytes_written,
)
except Exception as e:
logger.error(
"Failed to close audio writer for meeting %s: %s",
meeting_id,
e,
)
async def _count_active_meetings_db(self) -> int:
"""Count active meetings using database state."""
async with self._create_uow() as uow:
total = 0
for state in (MeetingState.RECORDING, MeetingState.STOPPING):
total += await uow.meetings.count_by_state(state)
return total
async def StreamTranscription(
self,
request_iterator: AsyncIterator[noteflow_pb2.AudioChunk],
context: grpc.aio.ServicerContext,
) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]:
"""Handle bidirectional audio streaming with persistence.
Receives audio chunks from client, processes through ASR,
persists segments, and yields transcript updates.
"""
if self._asr_engine is None or not self._asr_engine.is_loaded:
await context.abort(
grpc.StatusCode.FAILED_PRECONDITION,
"ASR engine not loaded",
)
current_meeting_id: str | None = None
try:
async for chunk in request_iterator:
meeting_id = chunk.meeting_id
if not meeting_id:
await context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
"meeting_id required",
)
# Initialize stream on first chunk
if current_meeting_id is None:
init_result = await self._init_stream_for_meeting(meeting_id, context)
if init_result is None:
return # Error already sent via context.abort
current_meeting_id = meeting_id
elif meeting_id != current_meeting_id:
await context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
"Stream may only contain a single meeting_id",
)
# Process audio chunk
async for update in self._process_stream_chunk(current_meeting_id, chunk, context):
yield update
# Flush any remaining audio from segmenter
if current_meeting_id and current_meeting_id in self._segmenters:
async for update in self._flush_segmenter(current_meeting_id):
yield update
finally:
if current_meeting_id:
self._cleanup_streaming_state(current_meeting_id)
self._close_audio_writer(current_meeting_id)
self._active_streams.discard(current_meeting_id)
async def _init_stream_for_meeting(
self,
meeting_id: str,
context: grpc.aio.ServicerContext,
) -> _StreamSessionInit | None:
"""Initialize streaming for a meeting.
Args:
meeting_id: Meeting ID string.
context: gRPC context for error handling.
Returns:
Initialization result, or None if error was sent.
"""
if meeting_id in self._active_streams:
await context.abort(
grpc.StatusCode.FAILED_PRECONDITION,
f"Meeting {meeting_id} already streaming",
)
self._active_streams.add(meeting_id)
if self._use_database():
init_result = await self._init_stream_session_db(meeting_id)
else:
init_result = self._init_stream_session_memory(meeting_id)
if not init_result.success:
self._active_streams.discard(meeting_id)
await context.abort(init_result.error_code, init_result.error_message or "")
return init_result
async def _process_stream_chunk(
self,
meeting_id: str,
chunk: noteflow_pb2.AudioChunk,
context: grpc.aio.ServicerContext,
) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]:
"""Process a single audio chunk from the stream.
Args:
meeting_id: Meeting ID string.
chunk: Audio chunk from client.
context: gRPC context for error handling.
Yields:
Transcript updates from processing.
"""
try:
sample_rate, channels = self._normalize_stream_format(
meeting_id,
chunk.sample_rate,
chunk.channels,
)
except ValueError as e:
await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e))
audio = self._decode_audio_chunk(chunk)
if audio is None:
return
try:
audio = self._convert_audio_format(audio, sample_rate, channels)
except ValueError as e:
await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e))
# Write to encrypted audio file
self._write_audio_chunk_safe(meeting_id, audio)
# VAD-driven segmentation
async for update in self._process_audio_with_vad(meeting_id, audio):
yield update
def _write_audio_chunk_safe(
self,
meeting_id: str,
audio: NDArray[np.float32],
) -> None:
"""Write audio chunk to encrypted file, logging errors without raising.
Args:
meeting_id: Meeting ID string.
audio: Audio samples to write.
"""
if meeting_id not in self._audio_writers:
return
if meeting_id in self._audio_write_failed:
return # Already failed, skip to avoid log spam
try:
self._audio_writers[meeting_id].write_chunk(audio)
except Exception as e:
logger.error(
"Audio write failed for meeting %s: %s. Recording may be incomplete.",
meeting_id,
e,
)
self._audio_write_failed.add(meeting_id)
def _decode_audio_chunk(
self,
chunk: noteflow_pb2.AudioChunk,
) -> NDArray[np.float32] | None:
"""Decode audio chunk from protobuf to numpy array."""
if not chunk.audio_data:
return None
try:
return np.frombuffer(chunk.audio_data, dtype=np.float32)
except (ValueError, struct.error) as e:
logger.warning("Failed to decode audio chunk: %s", e)
return None
async def _process_audio_with_vad(
self,
meeting_id: str,
audio: NDArray[np.float32],
) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]:
"""Process audio chunk through VAD and Segmenter.
Args:
meeting_id: Meeting identifier.
audio: Audio samples (float32, mono).
Yields:
TranscriptUpdates for VAD events, partials, and finals.
"""
vad = self._vad_instances.get(meeting_id)
segmenter = self._segmenters.get(meeting_id)
if vad is None or segmenter is None:
return
# Get VAD decision
is_speech = vad.process_chunk(audio)
# Streaming diarization (optional)
self._process_streaming_diarization(meeting_id, audio)
# Emit VAD state change events
was_speaking = self._was_speaking.get(meeting_id, False)
if is_speech and not was_speaking:
# Speech started
yield self._create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_START)
self._was_speaking[meeting_id] = True
elif not is_speech and was_speaking:
# Speech ended
yield self._create_vad_update(meeting_id, noteflow_pb2.UPDATE_TYPE_VAD_END)
self._was_speaking[meeting_id] = False
# Buffer audio for partial transcription
if is_speech:
if meeting_id in self._partial_buffers:
self._partial_buffers[meeting_id].append(audio.copy())
# Check if we should emit a partial
partial_update = await self._maybe_emit_partial(meeting_id)
if partial_update is not None:
yield partial_update
# Process through segmenter
for audio_segment in segmenter.process_audio(audio, is_speech):
# Clear partial buffer when we get a final segment
self._clear_partial_buffer(meeting_id)
async for update in self._process_audio_segment(
meeting_id,
audio_segment.audio,
audio_segment.start_time,
):
yield update
async def _maybe_emit_partial(
self,
meeting_id: str,
) -> noteflow_pb2.TranscriptUpdate | None:
"""Check if it's time to emit a partial and generate if so.
Args:
meeting_id: Meeting identifier.
Returns:
TranscriptUpdate with partial text, or None if not time yet.
"""
if self._asr_engine is None or not self._asr_engine.is_loaded:
return None
last_time = self._last_partial_time.get(meeting_id, 0)
now = time.time()
# Check if enough time has passed since last partial
if now - last_time < self.PARTIAL_CADENCE_SECONDS:
return None
# Check if we have enough audio
buffer = self._partial_buffers.get(meeting_id, [])
if not buffer:
return None
# Concatenate buffered audio
combined = np.concatenate(buffer)
audio_seconds = len(combined) / self.DEFAULT_SAMPLE_RATE
if audio_seconds < self.MIN_PARTIAL_AUDIO_SECONDS:
return None
# Run inference on buffered audio (async to avoid blocking event loop)
results = await self._asr_engine.transcribe_async(combined)
partial_text = " ".join(result.text for result in results)
# Clear buffer after inference to keep partials incremental and bounded
self._partial_buffers[meeting_id] = []
# Only emit if text changed (debounce)
last_text = self._last_partial_text.get(meeting_id, "")
if partial_text and partial_text != last_text:
self._last_partial_time[meeting_id] = now
self._last_partial_text[meeting_id] = partial_text
return noteflow_pb2.TranscriptUpdate(
meeting_id=meeting_id,
update_type=noteflow_pb2.UPDATE_TYPE_PARTIAL,
partial_text=partial_text,
server_timestamp=now,
)
self._last_partial_time[meeting_id] = now
return None
def _clear_partial_buffer(self, meeting_id: str) -> None:
"""Clear the partial buffer and reset state after a final is emitted.
Args:
meeting_id: Meeting identifier.
"""
if meeting_id in self._partial_buffers:
self._partial_buffers[meeting_id] = []
if meeting_id in self._last_partial_text:
self._last_partial_text[meeting_id] = ""
if meeting_id in self._last_partial_time:
self._last_partial_time[meeting_id] = time.time()
def _process_streaming_diarization(
self,
meeting_id: str,
audio: NDArray[np.float32],
) -> None:
"""Process an audio chunk for streaming diarization (best-effort)."""
if self._diarization_engine is None:
return
if meeting_id in self._diarization_streaming_failed:
return
if audio.size == 0:
return
if not self._diarization_engine.is_streaming_loaded:
try:
self._diarization_engine.load_streaming_model()
except (RuntimeError, ValueError) as exc:
logger.warning(
"Streaming diarization disabled for meeting %s: %s",
meeting_id,
exc,
)
self._diarization_streaming_failed.add(meeting_id)
return
stream_time = self._diarization_stream_time.get(meeting_id, 0.0)
duration = len(audio) / self.DEFAULT_SAMPLE_RATE
try:
turns = self._diarization_engine.process_chunk(
audio,
sample_rate=self.DEFAULT_SAMPLE_RATE,
)
except Exception as exc:
logger.warning(
"Streaming diarization failed for meeting %s: %s",
meeting_id,
exc,
)
self._diarization_streaming_failed.add(meeting_id)
return
diarization_turns = self._diarization_turns.setdefault(meeting_id, [])
for turn in turns:
diarization_turns.append(
SpeakerTurn(
speaker=turn.speaker,
start=turn.start + stream_time,
end=turn.end + stream_time,
confidence=turn.confidence,
)
)
self._diarization_stream_time[meeting_id] = stream_time + duration
async def _flush_segmenter(
self,
meeting_id: str,
) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]:
"""Flush remaining audio from segmenter at stream end.
Args:
meeting_id: Meeting identifier.
Yields:
TranscriptUpdates for final segment.
"""
segmenter = self._segmenters.get(meeting_id)
if segmenter is None:
return
# Clear partial buffer since we're flushing to final
self._clear_partial_buffer(meeting_id)
final_segment = segmenter.flush()
if final_segment is not None:
async for update in self._process_audio_segment(
meeting_id,
final_segment.audio,
final_segment.start_time,
):
yield update
async def _process_audio_segment(
self,
meeting_id: str,
audio: NDArray[np.float32],
segment_start_time: float,
) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]:
"""Process a complete audio segment through ASR.
Args:
meeting_id: Meeting identifier.
audio: Complete audio segment.
segment_start_time: Segment start time in stream seconds.
Yields:
TranscriptUpdates for transcribed segments.
"""
if len(audio) == 0 or self._asr_engine is None:
return
if self._use_database():
async with self._create_uow() as uow:
meeting = await uow.meetings.get(MeetingId(UUID(meeting_id)))
if meeting is None:
return
results = await self._asr_engine.transcribe_async(audio)
for result in results:
segment_id = self._next_segment_id(
meeting_id,
fallback=meeting.next_segment_id,
)
segment = self._create_segment_from_asr(
meeting.id,
segment_id,
result,
segment_start_time,
)
self._maybe_assign_speaker(meeting_id, segment)
meeting.add_segment(segment)
await uow.segments.add(meeting.id, segment)
await uow.commit()
yield self._segment_to_proto_update(meeting_id, segment)
else:
store = self._get_memory_store()
meeting = store.get(meeting_id)
if meeting is None:
return
results = await self._asr_engine.transcribe_async(audio)
for result in results:
segment_id = self._next_segment_id(
meeting_id,
fallback=meeting.next_segment_id,
)
segment = self._create_segment_from_asr(
meeting.id,
segment_id,
result,
segment_start_time,
)
self._maybe_assign_speaker(meeting_id, segment)
store.add_segment(meeting_id, segment)
yield self._segment_to_proto_update(meeting_id, segment)
def _create_vad_update(
self,
meeting_id: str,
update_type: int,
) -> noteflow_pb2.TranscriptUpdate:
"""Create a VAD event update.
Args:
meeting_id: Meeting identifier.
update_type: VAD_START or VAD_END.
Returns:
TranscriptUpdate with VAD event.
"""
return noteflow_pb2.TranscriptUpdate(
meeting_id=meeting_id,
update_type=update_type,
server_timestamp=time.time(),
)
def _create_segment_from_asr(
self,
meeting_id: MeetingId,
segment_id: int,
result: AsrResult,
segment_start_time: float,
) -> Segment:
"""Create a Segment from ASR result.
Use converters to transform ASR DTO to domain entities.
"""
words = AsrConverter.result_to_domain_words(result)
if segment_start_time:
for word in words:
word.start_time += segment_start_time
word.end_time += segment_start_time
return Segment(
segment_id=segment_id,
text=result.text,
start_time=result.start + segment_start_time,
end_time=result.end + segment_start_time,
meeting_id=meeting_id,
words=words,
language=result.language,
language_confidence=result.language_probability,
avg_logprob=result.avg_logprob,
no_speech_prob=result.no_speech_prob,
)
def _maybe_assign_speaker(self, meeting_id: str, segment: Segment) -> None:
"""Assign speaker to a segment using streaming diarization turns (best-effort)."""
if self._diarization_engine is None:
return
if meeting_id in self._diarization_streaming_failed:
return
turns = self._diarization_turns.get(meeting_id)
if not turns:
return
speaker_id, confidence = assign_speaker(
segment.start_time,
segment.end_time,
turns,
)
if speaker_id is None:
return
segment.speaker_id = speaker_id
segment.speaker_confidence = confidence
def _segment_to_proto_update(
self,
meeting_id: str,
segment: Segment,
) -> noteflow_pb2.TranscriptUpdate:
"""Convert domain Segment to protobuf TranscriptUpdate."""
words = [
noteflow_pb2.WordTiming(
word=w.word,
start_time=w.start_time,
end_time=w.end_time,
probability=w.probability,
)
for w in segment.words
]
final_segment = noteflow_pb2.FinalSegment(
segment_id=segment.segment_id,
text=segment.text,
start_time=segment.start_time,
end_time=segment.end_time,
words=words,
language=segment.language,
language_confidence=segment.language_confidence,
avg_logprob=segment.avg_logprob,
no_speech_prob=segment.no_speech_prob,
speaker_id=segment.speaker_id or "",
speaker_confidence=segment.speaker_confidence,
)
return noteflow_pb2.TranscriptUpdate(
meeting_id=meeting_id,
update_type=noteflow_pb2.UPDATE_TYPE_FINAL,
segment=final_segment,
server_timestamp=time.time(),
)
async def CreateMeeting(
self,
request: noteflow_pb2.CreateMeetingRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Meeting:
"""Create a new meeting."""
metadata = dict(request.metadata) if request.metadata else {}
if self._use_database():
async with self._create_uow() as uow:
meeting = Meeting.create(title=request.title, metadata=metadata)
saved = await uow.meetings.create(meeting)
await uow.commit()
return self._meeting_to_proto(saved)
else:
store = self._get_memory_store()
meeting = store.create(title=request.title, metadata=metadata)
return self._meeting_to_proto(meeting)
async def StopMeeting(
self,
request: noteflow_pb2.StopMeetingRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Meeting:
"""Stop a meeting using graceful STOPPING -> STOPPED transition."""
meeting_id = request.meeting_id
# Close audio writer if open
if meeting_id in self._audio_writers:
self._close_audio_writer(meeting_id)
if self._use_database():
async with self._create_uow() as uow:
meeting = await uow.meetings.get(MeetingId(UUID(meeting_id)))
if meeting is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Meeting {meeting_id} not found",
)
try:
# Graceful shutdown: RECORDING -> STOPPING -> STOPPED
meeting.begin_stopping()
meeting.stop_recording()
except ValueError as e:
await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e))
await uow.meetings.update(meeting)
await uow.commit()
return self._meeting_to_proto(meeting)
store = self._get_memory_store()
meeting = store.get(meeting_id)
if meeting is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Meeting {meeting_id} not found",
)
try:
# Graceful shutdown: RECORDING -> STOPPING -> STOPPED
meeting.begin_stopping()
meeting.stop_recording()
except ValueError as e:
await context.abort(grpc.StatusCode.INVALID_ARGUMENT, str(e))
store.update(meeting)
return self._meeting_to_proto(meeting)
async def refine_speaker_diarization(
self,
meeting_id: str,
num_speakers: int | None = None,
) -> int:
"""Run post-meeting speaker diarization refinement.
Loads the full meeting audio, runs offline diarization, and updates
segment speaker assignments. This provides higher quality speaker
labels than streaming diarization.
Args:
meeting_id: Meeting UUID string.
num_speakers: Known number of speakers (None for auto-detect).
Returns:
Number of segments updated with speaker labels.
Raises:
RuntimeError: If diarization engine not available or meeting not found.
"""
turns = await asyncio.to_thread(
self._run_diarization_inference,
meeting_id,
num_speakers,
)
updated_count = await self._apply_diarization_turns(meeting_id, turns)
logger.info(
"Updated %d segments with speaker labels for meeting %s",
updated_count,
meeting_id,
)
return updated_count
def _run_diarization_inference(
self,
meeting_id: str,
num_speakers: int | None,
) -> list[SpeakerTurn]:
"""Run offline diarization and return speaker turns (blocking)."""
if self._diarization_engine is None:
raise RuntimeError("Diarization engine not configured")
if not self._diarization_engine.is_offline_loaded:
logger.info("Loading offline diarization model for refinement...")
self._diarization_engine.load_offline_model()
audio_reader = MeetingAudioReader(self._crypto, self._meetings_dir)
if not audio_reader.audio_exists(meeting_id):
raise RuntimeError("No audio file found for meeting")
logger.info("Loading audio for meeting %s", meeting_id)
try:
audio_chunks = audio_reader.load_meeting_audio(meeting_id)
except (FileNotFoundError, ValueError) as exc:
raise RuntimeError(f"Failed to load audio: {exc}") from exc
if not audio_chunks:
raise RuntimeError("No audio chunks loaded for meeting")
sample_rate = audio_reader.sample_rate
all_audio = np.concatenate([chunk.frames for chunk in audio_chunks])
logger.info(
"Running offline diarization on %.2f seconds of audio",
len(all_audio) / sample_rate,
)
turns = self._diarization_engine.diarize_full(
all_audio,
sample_rate=sample_rate,
num_speakers=num_speakers,
)
logger.info("Diarization found %d speaker turns", len(turns))
return list(turns)
async def _apply_diarization_turns(
self,
meeting_id: str,
turns: list[SpeakerTurn],
) -> int:
"""Apply diarization turns to segments and return updated count."""
updated_count = 0
if self._use_database():
async with self._create_uow() as uow:
segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id)))
for segment in segments:
if segment.db_id is None:
continue
speaker_id, confidence = assign_speaker(
segment.start_time,
segment.end_time,
turns,
)
if speaker_id is None:
continue
await uow.segments.update_speaker(
segment.db_id,
speaker_id,
confidence,
)
updated_count += 1
await uow.commit()
else:
store = self._get_memory_store()
if meeting := store.get(meeting_id):
for segment in meeting.segments:
speaker_id, confidence = assign_speaker(
segment.start_time,
segment.end_time,
turns,
)
if speaker_id is None:
continue
segment.speaker_id = speaker_id
segment.speaker_confidence = confidence
updated_count += 1
return updated_count
async def _collect_speaker_ids(self, meeting_id: str) -> list[str]:
"""Collect distinct speaker IDs for a meeting."""
if self._use_database():
async with self._create_uow() as uow:
segments = await uow.segments.get_by_meeting(MeetingId(UUID(meeting_id)))
return sorted({s.speaker_id for s in segments if s.speaker_id})
store = self._get_memory_store()
if meeting := store.get(meeting_id):
return sorted({s.speaker_id for s in meeting.segments if s.speaker_id})
return []
async def ListMeetings(
self,
request: noteflow_pb2.ListMeetingsRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.ListMeetingsResponse:
"""List meetings."""
limit = request.limit or 100
offset = request.offset or 0
sort_desc = request.sort_order != noteflow_pb2.SORT_ORDER_CREATED_ASC
if self._use_database():
states = [MeetingState(s) for s in request.states] if request.states else None
async with self._create_uow() as uow:
meetings, total = await uow.meetings.list_all(
states=states,
limit=limit,
offset=offset,
sort_desc=sort_desc,
)
return noteflow_pb2.ListMeetingsResponse(
meetings=[self._meeting_to_proto(m, include_segments=False) for m in meetings],
total_count=total,
)
else:
store = self._get_memory_store()
states = [MeetingState(s) for s in request.states] if request.states else None
meetings, total = store.list_all(
states=states,
limit=limit,
offset=offset,
sort_desc=sort_desc,
)
return noteflow_pb2.ListMeetingsResponse(
meetings=[self._meeting_to_proto(m, include_segments=False) for m in meetings],
total_count=total,
)
async def GetMeeting(
self,
request: noteflow_pb2.GetMeetingRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Meeting:
"""Get meeting details."""
if self._use_database():
async with self._create_uow() as uow:
meeting = await uow.meetings.get(MeetingId(UUID(request.meeting_id)))
if meeting is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Meeting {request.meeting_id} not found",
)
# Load segments if requested
if request.include_segments:
segments = await uow.segments.get_by_meeting(meeting.id)
meeting.segments = list(segments)
# Load summary if requested
if request.include_summary:
summary = await uow.summaries.get_by_meeting(meeting.id)
meeting.summary = summary
return self._meeting_to_proto(
meeting,
include_segments=request.include_segments,
include_summary=request.include_summary,
)
store = self._get_memory_store()
meeting = store.get(request.meeting_id)
if meeting is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Meeting {request.meeting_id} not found",
)
return self._meeting_to_proto(
meeting,
include_segments=request.include_segments,
include_summary=request.include_summary,
)
async def DeleteMeeting(
self,
request: noteflow_pb2.DeleteMeetingRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.DeleteMeetingResponse:
"""Delete a meeting."""
if self._use_database():
async with self._create_uow() as uow:
success = await uow.meetings.delete(MeetingId(UUID(request.meeting_id)))
if success:
await uow.commit()
return noteflow_pb2.DeleteMeetingResponse(success=True)
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Meeting {request.meeting_id} not found",
)
store = self._get_memory_store()
success = store.delete(request.meeting_id)
if not success:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Meeting {request.meeting_id} not found",
)
return noteflow_pb2.DeleteMeetingResponse(success=True)
async def GenerateSummary(
self,
request: noteflow_pb2.GenerateSummaryRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Summary:
"""Generate meeting summary using SummarizationService with fallback."""
if self._use_database():
return await self._generate_summary_db(request, context)
return await self._generate_summary_memory(request, context)
async def _generate_summary_db(
self,
request: noteflow_pb2.GenerateSummaryRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Summary:
"""Generate summary for a meeting stored in the database.
The potentially slow summarization step is executed outside the UoW to
avoid holding database connections while waiting on LLMs.
"""
meeting_id = MeetingId(UUID(request.meeting_id))
# 1) Load meeting, existing summary, and segments inside a short UoW
async with self._create_uow() as uow:
meeting = await uow.meetings.get(meeting_id)
if meeting is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Meeting {request.meeting_id} not found",
)
existing = await uow.summaries.get_by_meeting(meeting.id)
if existing and not request.force_regenerate:
return self._summary_to_proto(existing)
segments = list(await uow.segments.get_by_meeting(meeting.id))
# 2) Run summarization outside DB transaction
summary = await self._summarize_or_placeholder(meeting_id, segments)
# 3) Persist in a fresh UoW
async with self._create_uow() as uow:
saved = await uow.summaries.save(summary)
await uow.commit()
return self._summary_to_proto(saved)
async def _generate_summary_memory(
self,
request: noteflow_pb2.GenerateSummaryRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Summary:
"""Generate summary for meetings held in the in-memory store."""
store = self._get_memory_store()
meeting = store.get(request.meeting_id)
if meeting is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Meeting {request.meeting_id} not found",
)
if meeting.summary and not request.force_regenerate:
return self._summary_to_proto(meeting.summary)
summary = await self._summarize_or_placeholder(meeting.id, meeting.segments)
store.set_summary(request.meeting_id, summary)
return self._summary_to_proto(summary)
async def _summarize_or_placeholder(
self,
meeting_id: MeetingId,
segments: list[Segment],
) -> Summary:
"""Try to summarize via service, fallback to placeholder on failure."""
if self._summarization_service is None:
logger.warning("SummarizationService not configured; using placeholder summary")
return self._generate_placeholder_summary(meeting_id, segments)
try:
result = await self._summarization_service.summarize(
meeting_id=meeting_id,
segments=segments,
)
logger.info(
"Generated summary using %s (fallback=%s)",
result.provider_used,
result.fallback_used,
)
return result.summary
except ProviderUnavailableError as exc:
logger.warning("Summarization provider unavailable; using placeholder: %s", exc)
except (TimeoutError, RuntimeError, ValueError) as exc:
logger.exception(
"Summarization failed (%s); using placeholder summary", type(exc).__name__
)
return self._generate_placeholder_summary(meeting_id, segments)
def _generate_placeholder_summary(
self,
meeting_id: MeetingId,
segments: list[Segment],
) -> Summary:
"""Generate a lightweight placeholder summary when summarization fails."""
full_text = " ".join(s.text for s in segments)
executive = f"{full_text[:200]}..." if len(full_text) > 200 else full_text
executive = executive or "No transcript available."
return Summary(
meeting_id=meeting_id,
executive_summary=executive,
key_points=[],
action_items=[],
model_version="placeholder-v0",
)
async def GetServerInfo(
self,
request: noteflow_pb2.ServerInfoRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.ServerInfo:
"""Get server information."""
asr_model = ""
asr_ready = False
if self._asr_engine:
asr_ready = self._asr_engine.is_loaded
asr_model = self._asr_engine.model_size or ""
diarization_enabled = self._diarization_engine is not None
diarization_ready = self._diarization_engine is not None and (
self._diarization_engine.is_streaming_loaded
or self._diarization_engine.is_offline_loaded
)
if self._use_database():
active = await self._count_active_meetings_db()
else:
active = self._get_memory_store().active_count
return noteflow_pb2.ServerInfo(
version=self.VERSION,
asr_model=asr_model,
asr_ready=asr_ready,
supported_sample_rates=self.SUPPORTED_SAMPLE_RATES,
max_chunk_size=self.MAX_CHUNK_SIZE,
uptime_seconds=time.time() - self._start_time,
active_meetings=active,
diarization_enabled=diarization_enabled,
diarization_ready=diarization_ready,
)
def _meeting_to_proto(
self,
meeting: Meeting,
include_segments: bool = True,
include_summary: bool = True,
) -> noteflow_pb2.Meeting:
"""Convert domain Meeting to protobuf."""
segments = []
if include_segments:
for seg in meeting.segments:
words = [
noteflow_pb2.WordTiming(
word=w.word,
start_time=w.start_time,
end_time=w.end_time,
probability=w.probability,
)
for w in seg.words
]
segments.append(
noteflow_pb2.FinalSegment(
segment_id=seg.segment_id,
text=seg.text,
start_time=seg.start_time,
end_time=seg.end_time,
words=words,
language=seg.language,
language_confidence=seg.language_confidence,
avg_logprob=seg.avg_logprob,
no_speech_prob=seg.no_speech_prob,
speaker_id=seg.speaker_id or "",
speaker_confidence=seg.speaker_confidence,
)
)
summary = None
if include_summary and meeting.summary:
summary = self._summary_to_proto(meeting.summary)
return noteflow_pb2.Meeting(
id=str(meeting.id),
title=meeting.title,
state=meeting.state.value,
created_at=meeting.created_at.timestamp(),
started_at=meeting.started_at.timestamp() if meeting.started_at else 0,
ended_at=meeting.ended_at.timestamp() if meeting.ended_at else 0,
duration_seconds=meeting.duration_seconds,
segments=segments,
summary=summary,
metadata=meeting.metadata,
)
def _summary_to_proto(self, summary: Summary) -> noteflow_pb2.Summary:
"""Convert domain Summary to protobuf."""
key_points = [
noteflow_pb2.KeyPoint(
text=kp.text,
segment_ids=kp.segment_ids,
start_time=kp.start_time,
end_time=kp.end_time,
)
for kp in summary.key_points
]
action_items = [
noteflow_pb2.ActionItem(
text=ai.text,
assignee=ai.assignee,
due_date=ai.due_date.timestamp() if ai.due_date is not None else 0,
priority=ai.priority,
segment_ids=ai.segment_ids,
)
for ai in summary.action_items
]
return noteflow_pb2.Summary(
meeting_id=str(summary.meeting_id),
executive_summary=summary.executive_summary,
key_points=key_points,
action_items=action_items,
generated_at=(
summary.generated_at.timestamp() if summary.generated_at is not None else 0
),
model_version=summary.model_version,
)
# =========================================================================
# Annotation Methods
# =========================================================================
async def AddAnnotation(
self,
request: noteflow_pb2.AddAnnotationRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Annotation:
"""Add an annotation to a meeting."""
if not self._use_database():
await context.abort(
grpc.StatusCode.UNIMPLEMENTED,
"Annotations require database persistence",
)
annotation_type = self._proto_to_annotation_type(request.annotation_type)
from uuid import uuid4
annotation = Annotation(
id=AnnotationId(uuid4()),
meeting_id=MeetingId(UUID(request.meeting_id)),
annotation_type=annotation_type,
text=request.text,
start_time=request.start_time,
end_time=request.end_time,
segment_ids=list(request.segment_ids),
)
async with self._create_uow() as uow:
saved = await uow.annotations.add(annotation)
await uow.commit()
return self._annotation_to_proto(saved)
async def GetAnnotation(
self,
request: noteflow_pb2.GetAnnotationRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Annotation:
"""Get an annotation by ID."""
if not self._use_database():
await context.abort(
grpc.StatusCode.UNIMPLEMENTED,
"Annotations require database persistence",
)
async with self._create_uow() as uow:
annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id)))
if annotation is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Annotation {request.annotation_id} not found",
)
return self._annotation_to_proto(annotation)
async def ListAnnotations(
self,
request: noteflow_pb2.ListAnnotationsRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.ListAnnotationsResponse:
"""List annotations for a meeting."""
if not self._use_database():
await context.abort(
grpc.StatusCode.UNIMPLEMENTED,
"Annotations require database persistence",
)
async with self._create_uow() as uow:
meeting_id = MeetingId(UUID(request.meeting_id))
# Check if time range filter is specified
if request.start_time > 0 or request.end_time > 0:
annotations = await uow.annotations.get_by_time_range(
meeting_id,
request.start_time,
request.end_time,
)
else:
annotations = await uow.annotations.get_by_meeting(meeting_id)
return noteflow_pb2.ListAnnotationsResponse(
annotations=[self._annotation_to_proto(a) for a in annotations]
)
async def UpdateAnnotation(
self,
request: noteflow_pb2.UpdateAnnotationRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Annotation:
"""Update an existing annotation."""
if not self._use_database():
await context.abort(
grpc.StatusCode.UNIMPLEMENTED,
"Annotations require database persistence",
)
async with self._create_uow() as uow:
annotation = await uow.annotations.get(AnnotationId(UUID(request.annotation_id)))
if annotation is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Annotation {request.annotation_id} not found",
)
# Update fields if provided
if request.annotation_type != noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED:
annotation.annotation_type = self._proto_to_annotation_type(request.annotation_type)
if request.text:
annotation.text = request.text
if request.start_time > 0:
annotation.start_time = request.start_time
if request.end_time > 0:
annotation.end_time = request.end_time
if request.segment_ids:
annotation.segment_ids = list(request.segment_ids)
updated = await uow.annotations.update(annotation)
await uow.commit()
return self._annotation_to_proto(updated)
async def DeleteAnnotation(
self,
request: noteflow_pb2.DeleteAnnotationRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.DeleteAnnotationResponse:
"""Delete an annotation."""
if not self._use_database():
await context.abort(
grpc.StatusCode.UNIMPLEMENTED,
"Annotations require database persistence",
)
async with self._create_uow() as uow:
success = await uow.annotations.delete(AnnotationId(UUID(request.annotation_id)))
if success:
await uow.commit()
return noteflow_pb2.DeleteAnnotationResponse(success=True)
await context.abort(
grpc.StatusCode.NOT_FOUND,
f"Annotation {request.annotation_id} not found",
)
def _annotation_to_proto(
self,
annotation: Annotation,
) -> noteflow_pb2.Annotation:
"""Convert domain Annotation to protobuf."""
return noteflow_pb2.Annotation(
id=str(annotation.id),
meeting_id=str(annotation.meeting_id),
annotation_type=self._annotation_type_to_proto(annotation.annotation_type),
text=annotation.text,
start_time=annotation.start_time,
end_time=annotation.end_time,
segment_ids=annotation.segment_ids,
created_at=annotation.created_at.timestamp(),
)
def _annotation_type_to_proto(
self,
annotation_type: AnnotationType,
) -> int:
"""Convert domain AnnotationType to protobuf enum."""
mapping = {
AnnotationType.ACTION_ITEM: noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM,
AnnotationType.DECISION: noteflow_pb2.ANNOTATION_TYPE_DECISION,
AnnotationType.NOTE: noteflow_pb2.ANNOTATION_TYPE_NOTE,
AnnotationType.RISK: noteflow_pb2.ANNOTATION_TYPE_RISK,
}
return mapping.get(annotation_type, noteflow_pb2.ANNOTATION_TYPE_UNSPECIFIED)
def _proto_to_annotation_type(
self,
proto_type: int,
) -> AnnotationType:
"""Convert protobuf enum to domain AnnotationType."""
mapping: dict[int, AnnotationType] = {
int(noteflow_pb2.ANNOTATION_TYPE_ACTION_ITEM): AnnotationType.ACTION_ITEM,
int(noteflow_pb2.ANNOTATION_TYPE_DECISION): AnnotationType.DECISION,
int(noteflow_pb2.ANNOTATION_TYPE_NOTE): AnnotationType.NOTE,
int(noteflow_pb2.ANNOTATION_TYPE_RISK): AnnotationType.RISK,
}
return mapping.get(proto_type, AnnotationType.NOTE)
# =========================================================================
# Export Methods
# =========================================================================
async def ExportTranscript(
self,
request: noteflow_pb2.ExportTranscriptRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.ExportTranscriptResponse:
"""Export meeting transcript to specified format."""
if not self._use_database():
await context.abort(
grpc.StatusCode.UNIMPLEMENTED,
"Export requires database persistence",
)
# Map proto format to ExportFormat
fmt = self._proto_to_export_format(request.format)
export_service = ExportService(self._create_uow())
try:
content = await export_service.export_transcript(
MeetingId(UUID(request.meeting_id)),
fmt,
)
exporter_info = export_service.get_supported_formats()
fmt_name = ""
fmt_ext = ""
for name, ext in exporter_info:
if fmt == ExportFormat.MARKDOWN and ext == ".md":
fmt_name, fmt_ext = name, ext
break
if fmt == ExportFormat.HTML and ext == ".html":
fmt_name, fmt_ext = name, ext
break
return noteflow_pb2.ExportTranscriptResponse(
content=content,
format_name=fmt_name,
file_extension=fmt_ext,
)
except ValueError as e:
await context.abort(
grpc.StatusCode.NOT_FOUND,
str(e),
)
def _proto_to_export_format(self, proto_format: int) -> ExportFormat:
"""Convert protobuf ExportFormat to domain ExportFormat."""
if proto_format == noteflow_pb2.EXPORT_FORMAT_HTML:
return ExportFormat.HTML
return ExportFormat.MARKDOWN # Default to Markdown
# =========================================================================
# Speaker Diarization Methods
# =========================================================================
async def RefineSpeakerDiarization(
self,
request: noteflow_pb2.RefineSpeakerDiarizationRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.RefineSpeakerDiarizationResponse:
"""Run post-meeting speaker diarization refinement.
Loads the full meeting audio, runs offline diarization, and updates
segment speaker assignments.
"""
if self._diarization_engine is None:
response = noteflow_pb2.RefineSpeakerDiarizationResponse()
response.segments_updated = 0
response.speaker_ids[:] = []
response.error_message = "Diarization not enabled on server"
response.job_id = ""
response.status = noteflow_pb2.JOB_STATUS_FAILED
return response
try:
meeting_uuid = UUID(request.meeting_id)
except ValueError:
response = noteflow_pb2.RefineSpeakerDiarizationResponse()
response.segments_updated = 0
response.speaker_ids[:] = []
response.error_message = "Invalid meeting_id"
response.job_id = ""
response.status = noteflow_pb2.JOB_STATUS_FAILED
return response
if self._use_database():
async with self._create_uow() as uow:
meeting = await uow.meetings.get(MeetingId(meeting_uuid))
else:
store = self._get_memory_store()
meeting = store.get(request.meeting_id)
if meeting is None:
response = noteflow_pb2.RefineSpeakerDiarizationResponse()
response.segments_updated = 0
response.speaker_ids[:] = []
response.error_message = "Meeting not found"
response.job_id = ""
response.status = noteflow_pb2.JOB_STATUS_FAILED
return response
meeting_state = meeting.state
if meeting_state in (
MeetingState.UNSPECIFIED,
MeetingState.CREATED,
MeetingState.RECORDING,
MeetingState.STOPPING,
):
response = noteflow_pb2.RefineSpeakerDiarizationResponse()
response.segments_updated = 0
response.speaker_ids[:] = []
response.error_message = (
"Meeting must be stopped before refinement "
f"(state: {meeting_state.name.lower()})"
)
response.job_id = ""
response.status = noteflow_pb2.JOB_STATUS_FAILED
return response
num_speakers = request.num_speakers if request.num_speakers > 0 else None
job_id = str(uuid4())
job = _DiarizationJob(
job_id=job_id,
meeting_id=request.meeting_id,
status=noteflow_pb2.JOB_STATUS_QUEUED,
)
self._diarization_jobs[job_id] = job
# Task runs in background, no need to await
task = asyncio.create_task(self._run_diarization_job(job_id, num_speakers))
job.task = task
response = noteflow_pb2.RefineSpeakerDiarizationResponse()
response.segments_updated = 0
response.speaker_ids[:] = []
response.error_message = ""
response.job_id = job_id
response.status = noteflow_pb2.JOB_STATUS_QUEUED
return response
async def _run_diarization_job(self, job_id: str, num_speakers: int | None) -> None:
job = self._diarization_jobs.get(job_id)
if job is None:
return
job.status = noteflow_pb2.JOB_STATUS_RUNNING
job.updated_at = time.time()
try:
updated_count = await self.refine_speaker_diarization(
meeting_id=job.meeting_id,
num_speakers=num_speakers,
)
speaker_ids = await self._collect_speaker_ids(job.meeting_id)
job.segments_updated = updated_count
job.speaker_ids = speaker_ids
job.status = noteflow_pb2.JOB_STATUS_COMPLETED
except Exception as exc:
logger.exception("Diarization failed for meeting %s", job.meeting_id)
job.error_message = str(exc)
job.status = noteflow_pb2.JOB_STATUS_FAILED
finally:
job.updated_at = time.time()
async def RenameSpeaker(
self,
request: noteflow_pb2.RenameSpeakerRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.RenameSpeakerResponse:
"""Rename a speaker ID in all segments of a meeting.
Updates all segments where speaker_id matches old_speaker_id
to use new_speaker_name instead.
"""
if not request.old_speaker_id or not request.new_speaker_name:
await context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
"old_speaker_id and new_speaker_name are required",
)
try:
meeting_uuid = UUID(request.meeting_id)
except ValueError:
await context.abort(
grpc.StatusCode.INVALID_ARGUMENT,
"Invalid meeting_id",
)
updated_count = 0
if self._use_database():
async with self._create_uow() as uow:
segments = await uow.segments.get_by_meeting(MeetingId(meeting_uuid))
for segment in segments:
if segment.speaker_id == request.old_speaker_id and segment.db_id:
await uow.segments.update_speaker(
segment.db_id,
request.new_speaker_name,
segment.speaker_confidence,
)
updated_count += 1
await uow.commit()
else:
store = self._get_memory_store()
if meeting := store.get(request.meeting_id):
for segment in meeting.segments:
if segment.speaker_id == request.old_speaker_id:
segment.speaker_id = request.new_speaker_name
updated_count += 1
return noteflow_pb2.RenameSpeakerResponse(
segments_updated=updated_count,
success=updated_count > 0,
)
async def GetDiarizationJobStatus(
self,
request: noteflow_pb2.GetDiarizationJobStatusRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.DiarizationJobStatus:
"""Return current status for a diarization job."""
job = self._diarization_jobs.get(request.job_id)
if job is None:
await context.abort(
grpc.StatusCode.NOT_FOUND,
"Diarization job not found",
)
return job.to_proto()