Enhance summarization model attributes and database schema

- Updated the Summary entity to include provider and model names, along with tokens used and latency metrics for better tracking of summarization performance.
- Modified the ORM converters and repository methods to accommodate new attributes, ensuring backward compatibility.
- Introduced word timing position indexing to maintain order within summaries.
- Added a new SQLAlchemy model structure for improved organization of persistence layers, including core, identity, and integration models.
- Removed deprecated models and files to streamline the codebase.
This commit is contained in:
2025-12-25 13:46:00 -05:00
parent 038f63866e
commit 6fa792990b
52 changed files with 13196 additions and 11789 deletions

5
.claude/settings.json Normal file
View File

@@ -0,0 +1,5 @@
{
"enabledPlugins": {
"pyright-lsp@claude-plugins-official": true
}
}

View File

@@ -15,9 +15,7 @@
# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
languages:
- python
# the encoding used by text files in the project
# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
- typescript
encoding: "utf-8"
# whether to use the project's gitignore file to ignore files

599
docker/db/schema.sql Normal file
View File

@@ -0,0 +1,599 @@
-- noteflow_init.sql
-- Creates schema + tables + placeholder data for local dev.
-- Extensions (safe to run repeatedly)
CREATE EXTENSION IF NOT EXISTS pgcrypto;
CREATE EXTENSION IF NOT EXISTS citext;
CREATE EXTENSION IF NOT EXISTS vector;
-- Schema
CREATE SCHEMA IF NOT EXISTS noteflow;
SET search_path TO noteflow, public;
-- updated_at trigger helper
CREATE OR REPLACE FUNCTION noteflow.set_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = now();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
--------------------------------------------------------------------------------
-- Identity / tenancy (future-ready)
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS noteflow.workspaces (
id uuid PRIMARY KEY,
slug text UNIQUE,
name text NOT NULL,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
metadata jsonb NOT NULL DEFAULT '{}'::jsonb
);
CREATE TABLE IF NOT EXISTS noteflow.users (
id uuid PRIMARY KEY,
email citext UNIQUE,
display_name text NOT NULL,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
metadata jsonb NOT NULL DEFAULT '{}'::jsonb
);
CREATE TABLE IF NOT EXISTS noteflow.workspace_memberships (
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
user_id uuid NOT NULL REFERENCES noteflow.users(id) ON DELETE CASCADE,
role text NOT NULL DEFAULT 'owner',
created_at timestamptz NOT NULL DEFAULT now(),
PRIMARY KEY (workspace_id, user_id)
);
CREATE TRIGGER trg_workspaces_updated_at
BEFORE UPDATE ON noteflow.workspaces
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE TRIGGER trg_users_updated_at
BEFORE UPDATE ON noteflow.users
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
--------------------------------------------------------------------------------
-- Core domain (matches current project shape)
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS noteflow.meetings (
id uuid PRIMARY KEY,
-- Forward-looking fields: safe defaults for current code
workspace_id uuid NOT NULL DEFAULT '00000000-0000-0000-0000-000000000001'::uuid
REFERENCES noteflow.workspaces(id) ON DELETE RESTRICT,
created_by_id uuid NULL DEFAULT '00000000-0000-0000-0000-000000000001'::uuid
REFERENCES noteflow.users(id) ON DELETE SET NULL,
title varchar(255) NOT NULL,
state integer NOT NULL DEFAULT 1, -- 1..5 (Created..Error)
created_at timestamptz NOT NULL DEFAULT now(),
started_at timestamptz NULL,
ended_at timestamptz NULL,
metadata jsonb NOT NULL DEFAULT '{}'::jsonb,
wrapped_dek bytea NULL,
asset_path text NULL,
deleted_at timestamptz NULL
);
ALTER TABLE noteflow.meetings
ADD CONSTRAINT IF NOT EXISTS meetings_state_chk
CHECK (state BETWEEN 1 AND 5);
CREATE INDEX IF NOT EXISTS idx_meetings_workspace_created_at
ON noteflow.meetings(workspace_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_meetings_state
ON noteflow.meetings(state);
CREATE TABLE IF NOT EXISTS noteflow.segments (
id bigserial PRIMARY KEY,
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
segment_id integer NOT NULL, -- stable ordering within meeting
text text NOT NULL,
start_time double precision NOT NULL,
end_time double precision NOT NULL,
language varchar(10) NOT NULL DEFAULT 'en',
language_confidence double precision NOT NULL DEFAULT 0,
avg_logprob double precision NOT NULL DEFAULT 0,
no_speech_prob double precision NOT NULL DEFAULT 0,
embedding vector(1536) NULL,
speaker_id varchar(50) NULL,
speaker_confidence double precision NOT NULL DEFAULT 0.0,
created_at timestamptz NOT NULL DEFAULT now()
);
ALTER TABLE noteflow.segments
ADD CONSTRAINT IF NOT EXISTS segments_unique_per_meeting
UNIQUE (meeting_id, segment_id);
CREATE INDEX IF NOT EXISTS idx_segments_meeting_id
ON noteflow.segments(meeting_id);
CREATE INDEX IF NOT EXISTS idx_segments_meeting_time
ON noteflow.segments(meeting_id, start_time);
-- Vector index (ivfflat is broadly supported; you can switch to hnsw later)
CREATE INDEX IF NOT EXISTS idx_segments_embedding_ivfflat
ON noteflow.segments USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
CREATE TABLE IF NOT EXISTS noteflow.word_timings (
id bigserial PRIMARY KEY,
segment_pk bigint NOT NULL REFERENCES noteflow.segments(id) ON DELETE CASCADE,
word_index integer NOT NULL,
word varchar(255) NOT NULL,
start_time double precision NOT NULL,
end_time double precision NOT NULL,
probability double precision NOT NULL,
UNIQUE (segment_pk, word_index)
);
CREATE INDEX IF NOT EXISTS idx_word_timings_segment_pk
ON noteflow.word_timings(segment_pk);
CREATE TABLE IF NOT EXISTS noteflow.summaries (
id bigserial PRIMARY KEY,
meeting_id uuid NOT NULL UNIQUE REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
executive_summary text NOT NULL DEFAULT '',
generated_at timestamptz NOT NULL DEFAULT now(),
provider_name text NOT NULL DEFAULT '',
model_name text NOT NULL DEFAULT '',
tokens_used integer NULL,
latency_ms double precision NULL,
verification jsonb NOT NULL DEFAULT '{}'::jsonb
);
CREATE TABLE IF NOT EXISTS noteflow.key_points (
id bigserial PRIMARY KEY,
summary_id bigint NOT NULL REFERENCES noteflow.summaries(id) ON DELETE CASCADE,
position integer NOT NULL,
text text NOT NULL,
segment_ids integer[] NOT NULL DEFAULT '{}'::integer[],
start_time double precision NOT NULL DEFAULT 0,
end_time double precision NOT NULL DEFAULT 0,
UNIQUE (summary_id, position)
);
CREATE TABLE IF NOT EXISTS noteflow.action_items (
id bigserial PRIMARY KEY,
summary_id bigint NOT NULL REFERENCES noteflow.summaries(id) ON DELETE CASCADE,
position integer NOT NULL,
text text NOT NULL,
segment_ids integer[] NOT NULL DEFAULT '{}'::integer[],
start_time double precision NOT NULL DEFAULT 0,
end_time double precision NOT NULL DEFAULT 0,
assignee text NOT NULL DEFAULT '',
due_date timestamptz NULL,
priority integer NOT NULL DEFAULT 0,
UNIQUE (summary_id, position)
);
CREATE TABLE IF NOT EXISTS noteflow.annotations (
id bigserial PRIMARY KEY,
annotation_id uuid NOT NULL DEFAULT gen_random_uuid(),
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
annotation_type varchar(50) NOT NULL,
text text NOT NULL,
start_time double precision NOT NULL DEFAULT 0,
end_time double precision NOT NULL DEFAULT 0,
segment_ids integer[] NOT NULL DEFAULT '{}'::integer[],
created_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (annotation_id)
);
CREATE INDEX IF NOT EXISTS idx_annotations_meeting_id
ON noteflow.annotations(meeting_id);
CREATE TABLE IF NOT EXISTS noteflow.diarization_jobs (
id varchar(36) PRIMARY KEY,
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
status integer NOT NULL DEFAULT 0,
segments_updated integer NOT NULL DEFAULT 0,
speaker_ids text[] NOT NULL DEFAULT '{}'::text[],
error_message text NOT NULL DEFAULT '',
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now()
);
CREATE TRIGGER trg_diarization_jobs_updated_at
BEFORE UPDATE ON noteflow.diarization_jobs
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE TABLE IF NOT EXISTS noteflow.streaming_diarization_turns (
id bigserial PRIMARY KEY,
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
speaker varchar(50) NOT NULL,
start_time double precision NOT NULL,
end_time double precision NOT NULL,
confidence double precision NOT NULL DEFAULT 0.0,
created_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_streaming_turns_meeting_time
ON noteflow.streaming_diarization_turns(meeting_id, start_time);
-- Existing style KV preferences (compat with current repo pattern) [oai_citation:11‡repomix-output.md](sediment://file_000000004f2c722fbba5e8a81215dabf)
CREATE TABLE IF NOT EXISTS noteflow.user_preferences (
key varchar(64) PRIMARY KEY,
value jsonb NOT NULL DEFAULT '{}'::jsonb,
updated_at timestamptz NOT NULL DEFAULT now()
);
--------------------------------------------------------------------------------
-- Future-facing but safe additions: people, tags, tasks, integrations, settings
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS noteflow.persons (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
display_name text NOT NULL,
email citext NULL,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
metadata jsonb NOT NULL DEFAULT '{}'::jsonb,
UNIQUE (workspace_id, email)
);
CREATE TRIGGER trg_persons_updated_at
BEFORE UPDATE ON noteflow.persons
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE TABLE IF NOT EXISTS noteflow.meeting_speakers (
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
speaker_id varchar(50) NOT NULL,
display_name text NULL,
person_id uuid NULL REFERENCES noteflow.persons(id) ON DELETE SET NULL,
created_at timestamptz NOT NULL DEFAULT now(),
PRIMARY KEY (meeting_id, speaker_id)
);
CREATE TABLE IF NOT EXISTS noteflow.tags (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
name text NOT NULL,
color text NOT NULL DEFAULT '#888888',
created_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (workspace_id, name)
);
CREATE TABLE IF NOT EXISTS noteflow.meeting_tags (
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
tag_id uuid NOT NULL REFERENCES noteflow.tags(id) ON DELETE CASCADE,
PRIMARY KEY (meeting_id, tag_id)
);
CREATE TABLE IF NOT EXISTS noteflow.tasks (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
meeting_id uuid NULL REFERENCES noteflow.meetings(id) ON DELETE SET NULL,
action_item_id bigint NULL REFERENCES noteflow.action_items(id) ON DELETE SET NULL,
text text NOT NULL,
status text NOT NULL DEFAULT 'open',
assignee_person_id uuid NULL REFERENCES noteflow.persons(id) ON DELETE SET NULL,
due_date timestamptz NULL,
priority integer NOT NULL DEFAULT 0,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
completed_at timestamptz NULL,
metadata jsonb NOT NULL DEFAULT '{}'::jsonb
);
ALTER TABLE noteflow.tasks
ADD CONSTRAINT IF NOT EXISTS tasks_status_chk
CHECK (status IN ('open','done','dismissed'));
CREATE INDEX IF NOT EXISTS idx_tasks_workspace_status
ON noteflow.tasks(workspace_id, status);
CREATE TRIGGER trg_tasks_updated_at
BEFORE UPDATE ON noteflow.tasks
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE TABLE IF NOT EXISTS noteflow.integrations (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
name text NOT NULL,
type text NOT NULL,
status text NOT NULL DEFAULT 'disconnected',
config jsonb NOT NULL DEFAULT '{}'::jsonb,
last_sync timestamptz NULL,
error_message text NULL,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now()
);
ALTER TABLE noteflow.integrations
ADD CONSTRAINT IF NOT EXISTS integrations_type_chk
CHECK (type IN ('auth','email','calendar','pkm','custom'));
ALTER TABLE noteflow.integrations
ADD CONSTRAINT IF NOT EXISTS integrations_status_chk
CHECK (status IN ('disconnected','connected','error'));
CREATE TRIGGER trg_integrations_updated_at
BEFORE UPDATE ON noteflow.integrations
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE TABLE IF NOT EXISTS noteflow.integration_secrets (
integration_id uuid NOT NULL REFERENCES noteflow.integrations(id) ON DELETE CASCADE,
secret_key text NOT NULL,
secret_value bytea NOT NULL,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
PRIMARY KEY (integration_id, secret_key)
);
CREATE TRIGGER trg_integration_secrets_updated_at
BEFORE UPDATE ON noteflow.integration_secrets
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE TABLE IF NOT EXISTS noteflow.integration_sync_runs (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
integration_id uuid NOT NULL REFERENCES noteflow.integrations(id) ON DELETE CASCADE,
status text NOT NULL,
started_at timestamptz NOT NULL DEFAULT now(),
ended_at timestamptz NULL,
duration_ms integer NULL,
error_message text NULL,
stats jsonb NOT NULL DEFAULT '{}'::jsonb
);
ALTER TABLE noteflow.integration_sync_runs
ADD CONSTRAINT IF NOT EXISTS integration_sync_runs_status_chk
CHECK (status IN ('running','success','error'));
CREATE INDEX IF NOT EXISTS idx_sync_runs_integration_started
ON noteflow.integration_sync_runs(integration_id, started_at DESC);
CREATE TABLE IF NOT EXISTS noteflow.calendar_events (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
integration_id uuid NOT NULL REFERENCES noteflow.integrations(id) ON DELETE CASCADE,
external_id text NOT NULL,
calendar_id text NOT NULL,
calendar_name text NOT NULL,
title text NOT NULL,
description text NULL,
start_time timestamptz NOT NULL,
end_time timestamptz NOT NULL,
location text NULL,
attendees text[] NULL,
is_all_day boolean NOT NULL DEFAULT false,
meeting_link text NULL,
raw jsonb NOT NULL DEFAULT '{}'::jsonb,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (integration_id, external_id)
);
CREATE TRIGGER trg_calendar_events_updated_at
BEFORE UPDATE ON noteflow.calendar_events
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE TABLE IF NOT EXISTS noteflow.meeting_calendar_links (
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
calendar_event_id uuid NOT NULL REFERENCES noteflow.calendar_events(id) ON DELETE CASCADE,
PRIMARY KEY (meeting_id, calendar_event_id)
);
CREATE TABLE IF NOT EXISTS noteflow.external_refs (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
integration_id uuid NOT NULL REFERENCES noteflow.integrations(id) ON DELETE CASCADE,
entity_type text NOT NULL,
entity_id text NOT NULL,
external_id text NOT NULL,
external_url text NULL,
created_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (integration_id, entity_type, entity_id)
);
CREATE TABLE IF NOT EXISTS noteflow.settings (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
scope text NOT NULL, -- system | workspace | user
workspace_id uuid NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
user_id uuid NULL REFERENCES noteflow.users(id) ON DELETE CASCADE,
key text NOT NULL,
value jsonb NOT NULL DEFAULT '{}'::jsonb,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
UNIQUE (scope, workspace_id, user_id, key)
);
ALTER TABLE noteflow.settings
ADD CONSTRAINT IF NOT EXISTS settings_scope_chk
CHECK (scope IN ('system','workspace','user'));
CREATE TRIGGER trg_settings_updated_at
BEFORE UPDATE ON noteflow.settings
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
--------------------------------------------------------------------------------
-- Seed data (safe to re-run)
--------------------------------------------------------------------------------
-- Deterministic IDs for local dev
-- workspace/user share the same UUID to simplify defaults
INSERT INTO noteflow.workspaces (id, slug, name, metadata)
VALUES (
'00000000-0000-0000-0000-000000000001',
'default',
'Default Workspace',
'{"seed":true}'::jsonb
)
ON CONFLICT (id) DO NOTHING;
INSERT INTO noteflow.users (id, email, display_name, metadata)
VALUES (
'00000000-0000-0000-0000-000000000001',
'local@noteflow.local',
'Local User',
'{"seed":true}'::jsonb
)
ON CONFLICT (id) DO NOTHING;
INSERT INTO noteflow.workspace_memberships (workspace_id, user_id, role)
VALUES (
'00000000-0000-0000-0000-000000000001',
'00000000-0000-0000-0000-000000000001',
'owner'
)
ON CONFLICT DO NOTHING;
-- Sample meeting
INSERT INTO noteflow.meetings (
id, title, state, created_at, started_at, ended_at, metadata, asset_path
) VALUES (
'11111111-1111-1111-1111-111111111111',
'Seed Meeting: Project Kickoff',
4,
now() - interval '2 days',
now() - interval '2 days' + interval '5 minutes',
now() - interval '2 days' + interval '47 minutes',
'{"source":"seed","topic":"kickoff"}'::jsonb,
'11111111-1111-1111-1111-111111111111'
)
ON CONFLICT (id) DO NOTHING;
-- Sample segments
INSERT INTO noteflow.segments (
id, meeting_id, segment_id, text, start_time, end_time, language, speaker_id, speaker_confidence
) VALUES
(1, '11111111-1111-1111-1111-111111111111', 0, 'Welcome everyone. Today we will align on goals and deliverables.', 0.0, 6.2, 'en', 'SPEAKER_00', 0.92),
(2, '11111111-1111-1111-1111-111111111111', 1, 'We should prioritize the database schema first, then build the UI around it.', 6.2, 12.4, 'en', 'SPEAKER_01', 0.88),
(3, '11111111-1111-1111-1111-111111111111', 2, 'Action item: draft an initial schema and seed script for local development.', 12.4, 18.0, 'en', 'SPEAKER_00', 0.90)
ON CONFLICT (id) DO NOTHING;
-- Word timings (a few illustrative words)
INSERT INTO noteflow.word_timings (segment_pk, word_index, word, start_time, end_time, probability)
VALUES
(1, 0, 'Welcome', 0.00, 0.40, 0.98),
(1, 1, 'everyone.', 0.41, 0.80, 0.97),
(2, 0, 'We', 6.20, 6.30, 0.99),
(2, 1, 'should', 6.31, 6.55, 0.99),
(3, 0, 'Action', 12.40, 12.62, 0.97),
(3, 1, 'item:', 12.63, 12.82, 0.95)
ON CONFLICT DO NOTHING;
-- Summary + points + items
INSERT INTO noteflow.summaries (
id, meeting_id, executive_summary, provider_name, model_name, tokens_used, latency_ms, verification
) VALUES (
1,
'11111111-1111-1111-1111-111111111111',
'Aligned on building a scalable schema first; UI will follow. Identified a concrete next action to draft schema + seeds.',
'local',
'mock',
123,
42.0,
'{"seed":true}'::jsonb
)
ON CONFLICT (id) DO NOTHING;
INSERT INTO noteflow.key_points (id, summary_id, position, text, segment_ids, start_time, end_time)
VALUES
(1, 1, 0, 'Schema-first development to accelerate UI work.', ARRAY[1], 6.2, 12.4)
ON CONFLICT (id) DO NOTHING;
INSERT INTO noteflow.action_items (id, summary_id, position, text, segment_ids, start_time, end_time, assignee, priority)
VALUES
(1, 1, 0, 'Draft initial database schema + seed script.', ARRAY[2], 12.4, 18.0, 'Local User', 2)
ON CONFLICT (id) DO NOTHING;
-- Task derived from action item (future task workflow)
INSERT INTO noteflow.tasks (id, workspace_id, meeting_id, action_item_id, text, status, priority)
VALUES (
'22222222-2222-2222-2222-222222222222',
'00000000-0000-0000-0000-000000000001',
'11111111-1111-1111-1111-111111111111',
1,
'Draft initial database schema + seed script.',
'open',
2
)
ON CONFLICT (id) DO NOTHING;
-- Annotation
INSERT INTO noteflow.annotations (id, meeting_id, annotation_type, text, start_time, end_time, segment_ids)
VALUES
(1, '11111111-1111-1111-1111-111111111111', 'ANNOTATION_TYPE_NOTE', 'Remember to keep schema modular and future-proof.', 6.0, 10.0, ARRAY[1])
ON CONFLICT (id) DO NOTHING;
-- Speaker/person mapping
INSERT INTO noteflow.persons (id, workspace_id, display_name, email)
VALUES
('33333333-3333-3333-3333-333333333333', '00000000-0000-0000-0000-000000000001', 'Alex Example', 'alex@example.com')
ON CONFLICT (id) DO NOTHING;
INSERT INTO noteflow.meeting_speakers (meeting_id, speaker_id, display_name, person_id)
VALUES
('11111111-1111-1111-1111-111111111111', 'SPEAKER_00', 'Alex', '33333333-3333-3333-3333-333333333333'),
('11111111-1111-1111-1111-111111111111', 'SPEAKER_01', 'Jordan', NULL)
ON CONFLICT DO NOTHING;
-- Tags
INSERT INTO noteflow.tags (id, workspace_id, name, color)
VALUES
('44444444-4444-4444-4444-444444444444', '00000000-0000-0000-0000-000000000001', 'seed', '#00AEEF')
ON CONFLICT (id) DO NOTHING;
INSERT INTO noteflow.meeting_tags (meeting_id, tag_id)
VALUES
('11111111-1111-1111-1111-111111111111', '44444444-4444-4444-4444-444444444444')
ON CONFLICT DO NOTHING;
-- Mock integration + a calendar event (shape matches your client-side config model) [oai_citation:12‡repomix-output.md](sediment://file_000000004f2c722fbba5e8a81215dabf)
INSERT INTO noteflow.integrations (id, workspace_id, name, type, status, config, last_sync)
VALUES (
'55555555-5555-5555-5555-555555555555',
'00000000-0000-0000-0000-000000000001',
'Mock Calendar',
'calendar',
'connected',
'{"sync_interval_minutes":60,"calendar_ids":["primary"],"webhook_url":"https://example.invalid/webhook"}'::jsonb,
now() - interval '1 day'
)
ON CONFLICT (id) DO NOTHING;
INSERT INTO noteflow.calendar_events (
id, integration_id, external_id, calendar_id, calendar_name, title, start_time, end_time, attendees, meeting_link
) VALUES (
'66666666-6666-6666-6666-666666666666',
'55555555-5555-5555-5555-555555555555',
'evt_seed_001',
'primary',
'Primary',
'Seed Meeting: Project Kickoff',
now() - interval '2 days' + interval '5 minutes',
now() - interval '2 days' + interval '47 minutes',
ARRAY['alex@example.com'],
'https://meet.example.invalid/seed'
)
ON CONFLICT (id) DO NOTHING;
INSERT INTO noteflow.meeting_calendar_links (meeting_id, calendar_event_id)
VALUES ('11111111-1111-1111-1111-111111111111', '66666666-6666-6666-6666-666666666666')
ON CONFLICT DO NOTHING;
-- Preferences KV used by server-side logic (stored as {"value": ...}) [oai_citation:13‡repomix-output.md](sediment://file_000000004f2c722fbba5e8a81215dabf)
INSERT INTO noteflow.user_preferences (key, value)
VALUES
('cloud_consent_granted', '{"value": false}'::jsonb),
('schema_seeded', '{"value": true}'::jsonb)
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value, updated_at = now();
-- Keep sequences sane if you re-run
SELECT setval('noteflow.segments_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.segments));
SELECT setval('noteflow.summaries_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.summaries));
SELECT setval('noteflow.key_points_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.key_points));
SELECT setval('noteflow.action_items_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.action_items));
SELECT setval('noteflow.annotations_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.annotations));
SELECT setval('noteflow.word_timings_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.word_timings));
SELECT setval('noteflow.streaming_diarization_turns_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.streaming_diarization_turns));

View File

@@ -1,633 +0,0 @@
# Code Quality Correction Plan
This plan addresses code quality issues identified by automated testing across the NoteFlow codebase.
## Executive Summary
| Area | Failing Tests | Issues Found | Status |
|------|---------------|--------------|--------|
| Python Backend Code | 10 | 17 violations | 🔴 Thresholds tightened |
| Python Test Smells | 7 | 223 smells | 🔴 Thresholds tightened |
| React/TypeScript Frontend | 6 | 23 violations | 🔴 Already strict |
| Rust/Tauri | 0 | 4 large files | ⚪ No quality tests |
**2024-12-24 Update:** Quality test thresholds have been aggressively tightened to expose real technical debt. Previously, all tests passed because thresholds were set just above actual violation counts.
---
## Phase 1: Python Backend (High Priority)
### 1.1 Split `NoteFlowClient` God Class
**File:** `src/noteflow/grpc/client.py` (942 lines, 32 methods)
**Problem:** Single class combines 6 distinct concerns: connection management, streaming, meeting CRUD, annotation CRUD, export, and diarization.
**Solution:** Apply mixin pattern (already used successfully in `grpc/_mixins/`).
```
src/noteflow/grpc/
├── client.py # Thin facade (~100 lines)
├── _client_mixins/
│ ├── __init__.py
│ ├── connection.py # GrpcConnectionMixin (~100 lines)
│ ├── streaming.py # AudioStreamingMixin (~150 lines)
│ ├── meeting.py # MeetingClientMixin (~100 lines)
│ ├── annotation.py # AnnotationClientMixin (~150 lines)
│ ├── export.py # ExportClientMixin (~50 lines)
│ ├── diarization.py # DiarizationClientMixin (~100 lines)
│ └── converters.py # Proto conversion helpers (~100 lines)
└── ...
```
**Steps:**
1. Create `_client_mixins/` directory structure
2. Extract `converters.py` with static proto conversion functions
3. Extract each mixin with focused responsibilities
4. Compose `NoteFlowClient` from mixins
5. Update imports in dependent code
**Estimated Impact:** -800 lines in single file, +750 lines across 7 focused files
---
### 1.2 Reduce `StreamTranscription` Complexity
**File:** `src/noteflow/grpc/_mixins/streaming.py` (579 lines, complexity=16)
**Problem:** 11 per-meeting state dictionaries, deeply nested async generators.
**Solution:** Create `StreamingSession` class to encapsulate per-meeting state.
```python
# New file: src/noteflow/grpc/_mixins/_streaming_session.py
@dataclass
class StreamingSession:
"""Encapsulates all per-meeting streaming state."""
meeting_id: str
vad: StreamingVad
segmenter: Segmenter
partial_state: PartialState
diarization_state: DiarizationState | None
audio_writer: BufferedAudioWriter | None
next_segment_id: int
stop_requested: bool = False
@classmethod
async def create(cls, meeting_id: str, host: ServicerHost, ...) -> "StreamingSession":
"""Factory method for session initialization."""
...
```
**Steps:**
1. Define `StreamingSession` dataclass with all session state
2. Extract `PartialState` and `DiarizationState` as nested dataclasses
3. Replace dictionary lookups (`self._vad_instances[meeting_id]`) with session attributes
4. Move helper methods into session class where appropriate
5. Simplify `StreamTranscription` to manage session lifecycle
**Estimated Impact:** Complexity 16 → 10, clearer state management
---
### 1.3 Create Server Configuration Objects
**File:** `src/noteflow/grpc/server.py` (430 lines)
**Problem:** `run_server()` has 12 parameters, `main()` has 124 lines of argument parsing.
**Solution:** Create configuration dataclasses.
```python
# New file: src/noteflow/grpc/_config.py
@dataclass(frozen=True)
class AsrConfig:
model: str
device: str
compute_type: str
@dataclass(frozen=True)
class DiarizationConfig:
enabled: bool = False
hf_token: str | None = None
device: str = "auto"
streaming_latency: float | None = None
min_speakers: int | None = None
max_speakers: int | None = None
refinement_enabled: bool = True
@dataclass(frozen=True)
class ServerConfig:
port: int
asr: AsrConfig
database_url: str | None = None
diarization: DiarizationConfig | None = None
```
**Steps:**
1. Create `_config.py` with config dataclasses
2. Refactor `run_server()` to accept `ServerConfig`
3. Extract `_parse_arguments()` function from `main()`
4. Create `_build_config()` to construct config from args
5. Extract `ServerBootstrap` class for initialization phases
**Estimated Impact:** 12 params → 3, functions 146 → ~60 lines each
---
### 1.4 Simplify `parse_llm_response`
**File:** `src/noteflow/infrastructure/summarization/_parsing.py` (complexity=21)
**Problem:** Multiple parsing phases, repeated patterns for key_points/action_items.
**Solution:** Extract helper functions for common patterns.
```python
# Refactored structure
def _strip_markdown_fences(text: str) -> str:
"""Remove markdown code block delimiters."""
...
def _parse_items[T](
raw_items: list[dict],
valid_segment_ids: set[int],
segments: Sequence[Segment],
item_factory: Callable[..., T],
) -> list[T]:
"""Generic parser for key_points and action_items."""
...
def parse_llm_response(
raw_response: str,
request: SummarizationRequest,
) -> Summary:
"""Parse LLM JSON response into Summary entity."""
text = _strip_markdown_fences(raw_response)
data = json.loads(text)
valid_ids = {seg.id for seg in request.segments}
key_points = _parse_items(data.get("key_points", []), valid_ids, ...)
action_items = _parse_items(data.get("action_items", []), valid_ids, ...)
return Summary(...)
```
**Steps:**
1. Extract `_strip_markdown_fences()` helper
2. Create generic `_parse_items()` function
3. Simplify `parse_llm_response()` to use helpers
4. Add unit tests for extracted functions
**Estimated Impact:** Complexity 21 → 12
---
### 1.5 Update Quality Test Thresholds
The feature envy test has 39 false positives because converters and repositories legitimately work with external objects.
**File:** `tests/quality/test_code_smells.py`
**Changes:**
```python
def test_no_feature_envy() -> None:
"""Detect methods that use other objects more than self."""
# Exclude known patterns that are NOT feature envy:
# - Converter classes (naturally transform external objects)
# - Repository methods (query + convert pattern)
# - Exporter classes (transform domain to output)
excluded_patterns = [
"converter",
"repo",
"exporter",
"_to_domain",
"_to_proto",
"_proto_to_",
]
...
```
---
## Phase 2: React/TypeScript Frontend (High Priority)
### 2.1 Split `Settings.tsx` into Sub-Components
**File:** `client/src/pages/Settings.tsx` (1,831 lines)
**Problem:** Monolithic page with 7+ concerns mixed together.
**Solution:** Extract into settings module.
```
client/src/pages/settings/
├── Settings.tsx # Page orchestrator (~150 lines)
├── components/
│ ├── ServerConnectionPanel.tsx # Connection settings (~150 lines)
│ ├── AudioDevicePanel.tsx # Audio device selection (~200 lines)
│ ├── ProviderConfigPanel.tsx # AI provider configs (~400 lines)
│ ├── AITemplatePanel.tsx # Tone/format/verbosity (~150 lines)
│ ├── SyncPanel.tsx # Sync settings (~100 lines)
│ ├── IntegrationsPanel.tsx # Third-party integrations (~200 lines)
│ └── QuickActionsPanel.tsx # Quick actions bar (~80 lines)
└── hooks/
├── useProviderConfig.ts # Provider state management (~150 lines)
└── useServerConnection.ts # Connection state (~100 lines)
```
**Steps:**
1. Create `settings/` directory structure
2. Extract `useProviderConfig` hook for shared provider logic
3. Extract each accordion section into focused component
4. Create shared `ProviderConfigCard` component for reuse
5. Update routing to use new `Settings.tsx`
**Estimated Impact:** 1,831 lines → ~150 lines main + 1,500 distributed
---
### 2.2 Centralize Configuration Constants
**Problem:** Hardcoded endpoints scattered across 4 files.
**Solution:** Create centralized configuration.
```typescript
// client/src/lib/config/index.ts
export * from './provider-endpoints';
export * from './defaults';
export * from './server';
// client/src/lib/config/provider-endpoints.ts
export const PROVIDER_ENDPOINTS = {
openai: 'https://api.openai.com/v1',
anthropic: 'https://api.anthropic.com/v1',
google: 'https://generativelanguage.googleapis.com/v1',
azure: 'https://{resource}.openai.azure.com',
ollama: 'http://localhost:11434/api',
deepgram: 'https://api.deepgram.com/v1',
elevenlabs: 'https://api.elevenlabs.io/v1',
} as const;
// client/src/lib/config/server.ts
export const SERVER_DEFAULTS = {
HOST: 'localhost',
PORT: 50051,
} as const;
// client/src/lib/config/defaults.ts
export const DEFAULT_PREFERENCES = { ... };
```
**Files to Update:**
- `lib/ai-providers.ts` - Import from config
- `lib/preferences.ts` - Import defaults from config
- `pages/Settings.tsx` - Import server defaults
**Estimated Impact:** Eliminates 16 hardcoded endpoint violations
---
### 2.3 Extract Shared Adapter Utilities
**Files:** `api/mock-adapter.ts` (637 lines), `api/tauri-adapter.ts` (635 lines)
**Problem:** ~150 lines of duplicated helper code.
**Solution:** Extract shared utilities.
```typescript
// client/src/api/constants.ts
export const TauriCommands = { ... };
export const TauriEvents = { ... };
// client/src/api/helpers.ts
export function isRecord(value: unknown): value is Record<string, unknown> { ... }
export function extractStringArrayFromRecords(records: unknown[], key: string): string[] { ... }
export function getErrorMessage(value: unknown): string | undefined { ... }
export function normalizeSuccessResponse(response: boolean | { success: boolean }): boolean { ... }
export function stateToGrpcEnum(state: string): number { ... }
```
**Steps:**
1. Create `api/constants.ts` with shared command/event names
2. Create `api/helpers.ts` with type guards and converters
3. Update both adapters to import from shared modules
4. Remove duplicated code
**Estimated Impact:** -150 lines of duplication
---
### 2.4 Refactor `lib/preferences.ts`
**File:** `client/src/lib/preferences.ts` (670 lines)
**Problem:** 15 identical setter patterns.
**Solution:** Create generic setter factory.
```typescript
// Before: 15 methods like this
setTranscriptionProvider(provider: TranscriptionProviderType, baseUrl: string): void {
const prefs = loadPreferences();
prefs.ai_config.transcription.provider = provider;
prefs.ai_config.transcription.base_url = baseUrl;
prefs.ai_config.transcription.test_status = 'untested';
savePreferences(prefs);
}
// After: Single generic function
updateAIConfig<K extends keyof AIConfig>(
configType: K,
updates: Partial<AIConfig[K]>
): void {
const prefs = loadPreferences();
prefs.ai_config[configType] = {
...prefs.ai_config[configType],
...updates,
test_status: 'untested',
};
savePreferences(prefs);
}
```
**Steps:**
1. Create generic `updateAIConfig()` function
2. Deprecate individual setter methods
3. Update Settings.tsx to use generic setter
4. Remove deprecated methods after migration
**Estimated Impact:** -200 lines of repetitive code
---
### 2.5 Split Type Definitions
**File:** `client/src/api/types.ts` (659 lines)
**Solution:** Organize into focused modules.
```
client/src/api/types/
├── index.ts # Re-exports all
├── enums.ts # All enum types (~100 lines)
├── messages.ts # Core DTOs (Meeting, Segment, etc.) (~200 lines)
├── requests.ts # Request/Response types (~150 lines)
├── config.ts # Provider config types (~100 lines)
└── integrations.ts # Integration types (~80 lines)
```
**Steps:**
1. Create `types/` directory
2. Split types by domain (safe refactor - no logic changes)
3. Create `index.ts` with re-exports
4. Update imports across codebase
**Estimated Impact:** Better organization, easier navigation
---
## Phase 3: Component Refactoring (Medium Priority)
### 3.1 Split `Recording.tsx`
**File:** `client/src/pages/Recording.tsx` (641 lines)
**Solution:** Extract hooks and components.
```
client/src/pages/recording/
├── Recording.tsx # Orchestrator (~100 lines)
├── hooks/
│ ├── useRecordingState.ts # State machine (~150 lines)
│ ├── useTranscriptionStream.ts # Stream handling (~120 lines)
│ └── useRecordingControls.ts # Control actions (~80 lines)
└── components/
├── RecordingHeader.tsx # Title + timer (~50 lines)
├── TranscriptPanel.tsx # Transcript display (~80 lines)
├── NotesPanel.tsx # Notes editor (~70 lines)
└── RecordingControls.tsx # Control buttons (~50 lines)
```
---
### 3.2 Split `sidebar.tsx`
**File:** `client/src/components/ui/sidebar.tsx` (639 lines)
**Solution:** Split into sidebar module with sub-components.
```
client/src/components/ui/sidebar/
├── index.ts # Re-exports
├── context.ts # SidebarContext + useSidebar (~50 lines)
├── provider.tsx # SidebarProvider (~200 lines)
└── components/
├── sidebar-trigger.tsx # (~40 lines)
├── sidebar-rail.tsx # (~40 lines)
├── sidebar-content.tsx # (~40 lines)
├── sidebar-menu.tsx # (~60 lines)
└── sidebar-inset.tsx # (~20 lines)
```
---
### 3.3 Refactor `ai-providers.ts`
**File:** `client/src/lib/ai-providers.ts` (618 lines)
**Problem:** 7 provider-specific fetch functions with duplicated error handling.
**Solution:** Create provider metadata + generic fetcher.
```typescript
// client/src/lib/ai-providers/provider-metadata.ts
interface ProviderMetadata {
value: string;
label: string;
defaultUrl: string;
authHeader: { name: string; prefix: string };
modelsEndpoint: string | null;
modelKey: string;
fallbackModels: string[];
}
export const PROVIDERS: Record<string, ProviderMetadata> = {
openai: {
value: 'openai',
label: 'OpenAI',
defaultUrl: PROVIDER_ENDPOINTS.openai,
authHeader: { name: 'Authorization', prefix: 'Bearer ' },
modelsEndpoint: '/models',
modelKey: 'id',
fallbackModels: ['gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo'],
},
// ... other providers
};
// client/src/lib/ai-providers/model-fetcher.ts
export async function fetchModels(
provider: string,
baseUrl: string,
apiKey: string
): Promise<string[]> {
const meta = PROVIDERS[provider];
if (!meta?.modelsEndpoint) return meta?.fallbackModels ?? [];
const response = await fetch(`${baseUrl}${meta.modelsEndpoint}`, {
headers: { [meta.authHeader.name]: `${meta.authHeader.prefix}${apiKey}` },
});
const data = await response.json();
return extractModels(data, meta.modelKey);
}
```
---
## Phase 4: Rust/Tauri (Low Priority)
### 4.1 Add Clippy Lints
**File:** `client/src-tauri/Cargo.toml`
Add additional clippy lints:
```toml
[lints.clippy]
unwrap_used = "warn"
expect_used = "warn"
todo = "warn"
cognitive_complexity = "warn"
```
### 4.2 Review Clone Usage
Run quality script and address files with excessive `.clone()` calls.
---
## Implementation Order
### Week 1: Configuration & Quick Wins
1. ✅ Create `lib/config/` with centralized endpoints
2. ✅ Extract `api/helpers.ts` shared utilities
3. ✅ Update quality test thresholds for false positives
4. ✅ Tighten Python quality test thresholds (2024-12-24)
5. ✅ Add test smell detection suite (15 tests) (2024-12-24)
### Week 2: Python Backend Core
4. Create `ServerConfig` dataclasses
5. Refactor `run_server()` to use config
6. Extract `parse_llm_response` helpers
### Week 3: Client God Class
7. Create `_client_mixins/converters.py`
8. Extract connection mixin
9. Extract streaming mixin
10. Extract remaining mixins
11. Compose `NoteFlowClient` from mixins
### Week 4: Frontend Pages
12. Split `Settings.tsx` into sub-components
13. Create `useProviderConfig` hook
14. Refactor `preferences.ts` with generic setter
### Week 5: Streaming & Types
15. Create `StreamingSession` class
16. Split `api/types.ts` into modules
17. Refactor `ai-providers.ts` with metadata
### Week 6: Component Cleanup
18. Split `Recording.tsx`
19. Split `sidebar.tsx`
20. Final quality test run & verification
---
## Current Quality Test Status (2024-12-24)
### Python Backend Tests (17 failures)
| Test | Found | Threshold | Key Offenders |
|------|-------|-----------|---------------|
| Long parameter lists | 4 | ≤2 | `run_server` (12), `add_segment` (11) |
| God classes | 3 | ≤1 | `NoteFlowClient` (32 methods, 815 lines) |
| Long methods | 7 | ≤4 | `run_server` (145 lines), `main` (123) |
| Module size (hard >750) | 1 | ≤0 | `client.py` (942 lines) |
| Module size (soft >500) | 3 | ≤1 | `streaming.py`, `diarization.py` |
| Scattered helpers | 21 | ≤10 | Helpers across unrelated modules |
| Duplicate helper signatures | 32 | ≤20 | `is_enabled` (7x), `get_by_meeting` (6x) |
| Repeated code patterns | 92 | ≤50 | Docstring blocks, method signatures |
| Magic numbers | 15 | ≤10 | `10` (20x), `1024` (14x), `5` (13x) |
| Repeated strings | 53 | ≤30 | Log messages, schema names |
| Thin wrappers | 46 | ≤25 | Passthrough functions |
### Python Test Smell Tests (7 failures)
| Test | Found | Threshold | Issue |
|------|-------|-----------|-------|
| Assertion roulette | 91 | ≤50 | Tests with naked asserts (no messages) |
| Conditional test logic | 75 | ≤40 | Loops/ifs in test bodies |
| Sleepy tests | 5 | ≤3 | Uses `time.sleep()` |
| Broad exception handling | 5 | ≤3 | Catches generic `Exception` |
| Sensitive equality | 12 | ≤10 | Comparing `str()` output |
| Duplicate test names | 26 | ≤15 | Same test name in multiple files |
| Long test methods | 5 | ≤3 | Tests exceeding 50 lines |
### Frontend Tests (6 failures)
| Test | Found | Threshold | Key Offenders |
|------|-------|-----------|---------------|
| Overly long files | 9 | ≤3 | `Settings.tsx` (1832!), 8 others >500 |
| Hardcoded endpoints | 4 | 0 | API URLs outside config |
| Nested ternaries | 1 | 0 | Complex conditional |
| TODO/FIXME comments | >15 | ≤15 | Technical debt markers |
| Commented-out code | >10 | ≤10 | Stale code blocks |
### Rust/Tauri (no quality tests yet)
Large files that could benefit from splitting:
- `noteflow.rs`: 1205 lines (generated proto)
- `recording.rs`: 897 lines
- `app_state.rs`: 851 lines
- `client.rs`: 681 lines
---
## Success Metrics
| Metric | Current | Target |
|--------|---------|--------|
| Python files > 750 lines | 1 | 0 |
| TypeScript files > 500 lines | 9 | 3 |
| Functions > 100 lines | 8 | 2 |
| Cyclomatic complexity > 15 | 2 | 0 |
| Functions with > 7 params | 4 | 0 |
| Hardcoded endpoints | 4 | 0 |
| Duplicated adapter code | ~150 lines | 0 |
| Python quality tests passing | 23/40 (58%) | 38/40 (95%) |
| Frontend quality tests passing | 15/21 (71%) | 20/21 (95%) |
---
## Notes
### False Positives to Ignore
The following "feature envy" detections are **correct design patterns** and should NOT be refactored:
1. **Converter classes** (`OrmConverter`, `AsrConverter`) - Inherently transform external objects
2. **Repository methods** - Query→fetch→convert is the standard pattern
3. **Exporter classes** - Transformation classes work with domain entities
4. **Proto converters in gRPC** - Proto→DTO adaptation is appropriate
### Patterns to Preserve
- Mixin architecture in `grpc/_mixins/` - Apply to client
- Repository base class helpers - Keep shared utilities
- Export formatting helpers - Already well-centralized
- Domain utilities in `domain/utils/` - Appropriate location

File diff suppressed because it is too large Load Diff

View File

@@ -1,466 +0,0 @@
# Code Quality Analysis Report
**Date:** 2024-12-24
**Sprint:** Comprehensive Backend QA Scan
**Scope:** `/home/trav/repos/noteflow/src/noteflow/`
---
## Executive Summary
**Status:** PASS ✅
The NoteFlow Python backend demonstrates excellent code quality with:
- **0 type checking errors** (basedpyright clean)
- **0 remaining lint violations** (all Ruff issues auto-fixed)
- **0 security issues** detected
- **3 complexity violations** requiring architectural improvements
### Quality Metrics
| Category | Status | Details |
|----------|--------|---------|
| Type Safety | ✅ PASS | 0 errors (basedpyright strict mode) |
| Code Linting | ✅ PASS | 1 fix applied, 0 remaining |
| Formatting | ⚠️ SKIP | Black not installed in venv |
| Security | ✅ PASS | 0 vulnerabilities (Bandit rules) |
| Complexity | ⚠️ WARN | 3 functions exceed threshold |
| Architecture | ✅ GOOD | Modular mixin pattern, clean separation |
---
## 1. Type Safety Analysis (basedpyright)
### Result: PASS ✅
**Command:** `basedpyright --pythonversion 3.12 src/noteflow/`
**Outcome:** `0 errors, 0 warnings, 0 notes`
#### Configuration Strengths
- `typeCheckingMode = "standard"`
- Python 3.12 target with modern type syntax
- Appropriate exclusions for generated proto files
- SQLAlchemy-specific overrides for known false positives
#### Notes
The mypy output showed numerous errors, but these are **false positives** due to:
1. Missing type stubs for third-party libraries (`grpc`, `pgvector`, `diart`, `sounddevice`)
2. Generated protobuf files (excluded from analysis scope)
3. SQLAlchemy's dynamic attribute system (correctly configured in basedpyright)
**Recommendation:** Basedpyright is the authoritative type checker for this project. The mypy configuration should be removed or aligned with basedpyright's exclusions.
---
## 2. Linting Analysis (Ruff)
### Result: PASS ✅ (1 fix applied)
**Command:** `ruff check --fix src/noteflow/`
#### Fixed Issues
| File | Code | Issue | Fix Applied |
|------|------|-------|-------------|
| `grpc/_config.py:95` | UP037 | Quoted type annotation | Removed unnecessary quotes from `GrpcServerConfig` |
#### Configuration Issues
**Deprecated settings detected:**
```toml
# Current (deprecated)
[tool.ruff]
select = [...]
ignore = [...]
per-file-ignores = {...}
# Required migration
[tool.ruff.lint]
select = [...]
ignore = [...]
per-file-ignores = {...}
```
**Action Required:** Update `pyproject.toml` to use `[tool.ruff.lint]` section.
#### Selected Rules (Good Coverage)
- E/W: pycodestyle errors/warnings
- F: Pyflakes
- I: isort (import sorting)
- B: flake8-bugbear (bug detection)
- C4: flake8-comprehensions
- UP: pyupgrade (modern syntax)
- SIM: flake8-simplify
- RUF: Ruff-specific rules
---
## 3. Complexity Analysis
### Result: WARN ⚠️ (3 violations)
**Command:** `ruff check --select C901 src/noteflow/`
| File | Function | Complexity | Threshold | Severity |
|------|----------|------------|-----------|----------|
| `grpc/_mixins/diarization.py:102` | `_process_streaming_diarization` | 11 | ≤10 | 🟡 LOW |
| `grpc/_mixins/streaming.py:55` | `StreamTranscription` | 14 | ≤10 | 🟠 MEDIUM |
| `grpc/server.py:159` | `run_server_with_config` | 16 | ≤10 | 🔴 HIGH |
---
### 3.1 HIGH Priority: `run_server_with_config` (CC=16)
**Location:** `src/noteflow/grpc/server.py:159-254`
**Issues:**
- 96 lines with multiple initialization phases
- Deeply nested conditionals for database/diarization/consent logic
- Mixes infrastructure setup with business logic
**Suggested Refactoring:**
```python
# Extract helper functions to reduce complexity
async def _initialize_database(
config: GrpcServerConfig
) -> tuple[AsyncSessionFactory | None, RecoveryResult | None]:
"""Initialize database connection and run recovery."""
if not config.database_url:
return None, None
session_factory = create_async_session_factory(config.database_url)
await ensure_schema_ready(session_factory, config.database_url)
recovery_service = RecoveryService(
SqlAlchemyUnitOfWork(session_factory),
meetings_dir=get_settings().meetings_dir,
)
recovery_result = await recovery_service.recover_all()
return session_factory, recovery_result
async def _initialize_consent_persistence(
session_factory: AsyncSessionFactory,
summarization_service: SummarizationService,
) -> None:
"""Load cloud consent from DB and set up persistence callback."""
async with SqlAlchemyUnitOfWork(session_factory) as uow:
cloud_consent = await uow.preferences.get_bool("cloud_consent_granted", False)
summarization_service.settings.cloud_consent_granted = cloud_consent
async def persist_consent(granted: bool) -> None:
async with SqlAlchemyUnitOfWork(session_factory) as uow:
await uow.preferences.set("cloud_consent_granted", granted)
await uow.commit()
summarization_service.on_consent_change = persist_consent
def _initialize_diarization(
config: GrpcServerConfig
) -> DiarizationEngine | None:
"""Create diarization engine if enabled and configured."""
diarization = config.diarization
if not diarization.enabled:
return None
if not diarization.hf_token:
logger.warning("Diarization enabled but no HF token provided")
return None
diarization_kwargs = {
"device": diarization.device,
"hf_token": diarization.hf_token,
}
if diarization.streaming_latency is not None:
diarization_kwargs["streaming_latency"] = diarization.streaming_latency
if diarization.min_speakers is not None:
diarization_kwargs["min_speakers"] = diarization.min_speakers
if diarization.max_speakers is not None:
diarization_kwargs["max_speakers"] = diarization.max_speakers
return DiarizationEngine(**diarization_kwargs)
async def run_server_with_config(config: GrpcServerConfig) -> None:
"""Run the async gRPC server with structured configuration."""
# Initialize database and recovery
session_factory, recovery_result = await _initialize_database(config)
if recovery_result:
_log_recovery_results(recovery_result)
# Initialize summarization
summarization_service = create_summarization_service()
if session_factory:
await _initialize_consent_persistence(session_factory, summarization_service)
# Initialize diarization
diarization_engine = _initialize_diarization(config)
# Create and start server
server = NoteFlowServer(
port=config.port,
asr_model=config.asr.model,
asr_device=config.asr.device,
asr_compute_type=config.asr.compute_type,
session_factory=session_factory,
summarization_service=summarization_service,
diarization_engine=diarization_engine,
diarization_refinement_enabled=config.diarization.refinement_enabled,
)
await server.start()
await server.wait_for_termination()
```
**Expected Impact:** CC 16 → ~6 (main function becomes orchestration only)
---
### 3.2 MEDIUM Priority: `StreamTranscription` (CC=14)
**Location:** `src/noteflow/grpc/_mixins/streaming.py:55-115`
**Issues:**
- Multiple conditional checks for stream initialization
- Nested error handling with context managers
- Mixed concerns: stream lifecycle + chunk processing
**Suggested Refactoring:**
The codebase already has `_streaming_session.py` created. Recommendation:
```python
# Use StreamingSession to encapsulate per-meeting state
async def StreamTranscription(
self: ServicerHost,
request_iterator: AsyncIterator[noteflow_pb2.AudioChunk],
context: grpc.aio.ServicerContext,
) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]:
"""Handle bidirectional audio streaming with persistence."""
if self._asr_engine is None or not self._asr_engine.is_loaded:
await abort_failed_precondition(context, "ASR engine not loaded")
session: StreamingSession | None = None
try:
async for chunk in request_iterator:
# Initialize session on first chunk
if session is None:
session = await StreamingSession.create(chunk.meeting_id, self, context)
if session is None:
return
# Check for stop request
if session.should_stop():
logger.info("Stop requested, exiting stream gracefully")
break
# Process chunk
async for update in session.process_chunk(chunk):
yield update
# Flush remaining audio
if session:
async for update in session.flush():
yield update
finally:
if session:
await session.cleanup()
```
**Expected Impact:** CC 14 → ~8 (move complexity into StreamingSession methods)
---
### 3.3 LOW Priority: `_process_streaming_diarization` (CC=11)
**Location:** `src/noteflow/grpc/_mixins/diarization.py:102-174`
**Issues:**
- Multiple early returns (guard clauses)
- Lock-based session management
- Error handling for streaming pipeline
**Analysis:**
This function is already well-structured with clear separation:
1. Early validation checks (lines 114-119)
2. Session creation under lock (lines 124-145)
3. Chunk processing in thread pool (lines 148-164)
4. Turn persistence (lines 167-174)
**Recommendation:** Accept CC=11 as reasonable for this complex concurrent operation. The early returns are defensive programming, not complexity.
---
## 4. Security Analysis (Bandit/Ruff S Rules)
### Result: PASS ✅
**Command:** `ruff check --select S src/noteflow/`
**Outcome:** 0 security issues detected
**Scanned Patterns:**
- S101: Use of assert
- S102: Use of exec
- S103: Insecure file permissions
- S104-S113: Cryptographic issues
- S301-S324: SQL injection, pickle usage, etc.
**Notable Security Strengths:**
1. **Encryption:** `infrastructure/security/crypto.py` uses AES-GCM (authenticated encryption)
2. **Key Management:** `infrastructure/security/keystore.py` uses system keyring
3. **Database:** SQLAlchemy ORM prevents SQL injection
4. **No hardcoded secrets:** Uses environment variables and keyring
---
## 5. Architecture Quality
### Result: EXCELLENT ✅
**Strengths:**
#### 5.1 Hexagonal Architecture
```
domain/ (pure business logic)
↓ depends on
application/ (use cases)
↓ depends on
infrastructure/ (adapters)
```
Clean dependency direction with no circular imports.
#### 5.2 Modular gRPC Mixins
```
grpc/_mixins/
├── streaming.py # ASR streaming
├── diarization.py # Speaker diarization
├── summarization.py # Summary generation
├── meeting.py # Meeting CRUD
├── annotation.py # Annotations
├── export.py # Document export
└── protocols.py # ServicerHost protocol
```
Each mixin focuses on single responsibility, composed via `ServicerHost` protocol.
#### 5.3 Repository Pattern with Unit of Work
```python
async with SqlAlchemyUnitOfWork(session_factory) as uow:
meeting = await uow.meetings.get(meeting_id)
await uow.segments.add(segment)
await uow.commit() # Atomic transaction
```
Proper transaction boundaries and separation of concerns.
#### 5.4 Protocol-Based Dependency Injection
```python
# domain/ports/
class MeetingRepository(Protocol):
async def get(self, meeting_id: MeetingId) -> Meeting | None: ...
# infrastructure/persistence/repositories/
class SqlAlchemyMeetingRepository:
"""Concrete implementation."""
```
Testable, swappable implementations (DB vs memory).
---
## 6. File Size Analysis
### Result: GOOD ✅
| File | Lines | Status | Notes |
|------|-------|--------|-------|
| `grpc/server.py` | 489 | ✅ Good | Under 500-line soft limit |
| `grpc/_mixins/streaming.py` | 579 | ⚠️ Review | Near 750-line hard limit |
| `grpc/_mixins/diarization.py` | 578 | ⚠️ Review | Near 750-line hard limit |
**Recommendation:** Both large mixins are candidates for splitting into sub-modules once complexity is addressed.
---
## 7. Missing Quality Tools
### 7.1 Black Formatter
**Status:** Not installed in venv
**Impact:** Cannot verify formatting compliance
**Action Required:**
```bash
source .venv/bin/activate
uv pip install black
black --check src/noteflow/
```
### 7.2 Pyrefly
**Status:** Not available
**Impact:** Missing semantic bug detection
**Action:** Optional enhancement (not critical)
---
## Next Actions
### Critical (Do Before Next Commit)
1.**Fixed:** Remove quoted type annotation in `_config.py` (auto-fixed by Ruff)
2. ⚠️ **Required:** Update `pyproject.toml` to use `[tool.ruff.lint]` section
3. ⚠️ **Required:** Install Black and verify formatting: `uv pip install black && black src/noteflow/`
### High Priority (This Sprint)
4. **Extract helpers from `run_server_with_config`** to reduce CC from 16 → ~6
- Create `_initialize_database()`, `_initialize_consent_persistence()`, `_initialize_diarization()`
- Target: <10 complexity per function
5. **Complete `StreamingSession` refactoring** to reduce `StreamTranscription` CC from 14 → ~8
- File already created: `grpc/_streaming_session.py`
- Move per-meeting state into session class
- Simplify main async generator
### Medium Priority (Next Sprint)
6. **Split large mixin files** if they exceed 750 lines after complexity fixes
- `streaming.py` (579 lines) → `streaming/` package
- `diarization.py` (578 lines) → `diarization/` package
7. **Add mypy exclusions** to align with basedpyright configuration
- Exclude proto files, third-party libraries without stubs
### Low Priority (Backlog)
8. Consider adding `pyrefly` for additional semantic checks
9. Review duplication patterns from code-quality-correction-plan.md
---
## Summary
### Mechanical Fixes Applied ✅
- **Ruff:** Removed quoted type annotation in `grpc/_config.py:95`
### Configuration Issues ⚠️
- **pyproject.toml:** Migrate to `[tool.ruff.lint]` section (deprecated warning)
- **Black:** Not installed in venv (cannot verify formatting)
### Architectural Recommendations 📋
#### Complexity Violations (3 total)
| Priority | Function | Current CC | Target | Effort |
|----------|----------|------------|--------|--------|
| 🔴 HIGH | `run_server_with_config` | 16 | ≤10 | 2-3 hours |
| 🟠 MEDIUM | `StreamTranscription` | 14 | ≤10 | 3-4 hours |
| 🟡 LOW | `_process_streaming_diarization` | 11 | Accept | N/A |
**Total Estimated Effort:** 5-7 hours to address HIGH and MEDIUM priorities
### Pass Criteria Met ✅
- [x] Type safety (basedpyright): 0 errors
- [x] Linting (Ruff): 0 violations remaining
- [x] Security (Bandit): 0 vulnerabilities
- [x] Architecture: Clean hexagonal design
- [x] No critical issues blocking development
### Status: PASS ✅
The NoteFlow backend demonstrates **excellent code quality** with well-architected patterns, strong type safety, and zero critical issues. The complexity violations are isolated to 3 functions and have clear refactoring paths. All mechanical fixes have been applied successfully.
---
**QA Agent:** Code-Quality Agent
**Report Generated:** 2024-12-24
**Next Review:** After complexity refactoring (estimated 1 week)

View File

@@ -1,6 +1,6 @@
# NoteFlow Feature Gap Analysis & Development Roadmap
> Generated: 2025-12-23
> Generated: 2025-12-23 | Updated: 2025-12-25
> Focus: Core pipeline completion (transcription → summary → diarization → export)
---
@@ -19,10 +19,23 @@ This document identifies features not yet developed or fully connected between t
| **Export** | Partial | Markdown/HTML working, PDF missing |
| **Integrations** | Stub | UI exists, backend handlers missing |
### Sprint Overview
| Sprint | Name | Phase | Status | Prerequisites |
|--------|------|-------|--------|---------------|
| **0** | Proto & Schema Foundation | Foundation | New | — |
| 1 | AI Templates Pass-Through | Core Pipeline | Planned | Sprint 0 |
| 2 | Diarization Application Service | Core Pipeline | Planned | Sprint 0 |
| 3 | PDF Export | Core Pipeline | Planned | Sprint 0 |
| 4 | Named Entity Extraction | Intelligence | Planned | Sprint 0 |
| 5 | Calendar Sync | Integrations | Planned | Sprint 0 |
| 6 | Webhook Execution | Integrations | Planned | Sprint 0 |
### Feature Gap Summary
| Priority | Feature | Owner | Complexity | Status |
|----------|---------|-------|------------|--------|
| 0 | Proto & Schema Foundation | Backend | Medium | **NEW** - Consolidates proto/DB changes |
| 1 | AI Templates Pass-Through | Both | Low | Not connected |
| 2 | Diarization Application Service | Backend | Medium | Engine exists, service missing |
| 3 | PDF Export | Backend | Low-Medium | Not implemented |
@@ -32,6 +45,32 @@ This document identifies features not yet developed or fully connected between t
---
## Sprint 0: Proto & Schema Foundation (NEW)
> **Priority**: 0 | **Owner**: Backend | **Complexity**: Medium
> **Documentation**: `docs/sprints/phase-0-foundation/sprint-0-proto-schema/README.md`
### Objective
Consolidate all proto and database schema changes required by Sprints 1-6 into a single, atomic foundation sprint. This prevents proto version conflicts and ensures all sprints start from a consistent base.
### Key Components
1. **Consolidated Proto Definitions**: All RPCs and messages for Sprints 1-6
2. **Alembic Migrations**: `named_entities`, `webhooks`, `webhook_deliveries` tables
3. **Feature Flags**: Toggle experimental features (`ner_extraction_enabled`, `calendar_sync_enabled`)
4. **Docker Integration**: spaCy model downloads, database initialization
5. **Proto Regeneration Script**: Consistent stub generation
### Critical Fixes Included
- Resolves proto version conflicts across sprints
- Ensures database schema exists before feature sprints
- Provides feature flags for gradual rollout
- Documents proto changelog for sync points
---
## Ownership Guidelines
### Backend (Python gRPC) Responsibilities
@@ -70,7 +109,6 @@ This document identifies features not yet developed or fully connected between t
**Priority**: 1
**Owner**: Both (proto change + frontend wiring)
**Complexity**: Low
**Estimated Effort**: 1-2 days
#### Current State
@@ -222,7 +260,6 @@ async generateSummary(
**Priority**: 2
**Owner**: Backend
**Complexity**: Medium
**Estimated Effort**: 2-3 days
#### Current State
@@ -435,7 +472,6 @@ class DiarizationPort(Protocol):
**Priority**: 3
**Owner**: Backend
**Complexity**: Low-Medium
**Estimated Effort**: 1-2 days
#### Current State
@@ -645,7 +681,6 @@ Note: weasyprint requires system dependencies (cairo, pango). Document in README
**Priority**: 4
**Owner**: Backend
**Complexity**: Medium
**Estimated Effort**: 3-4 days
#### Current State
@@ -1012,7 +1047,6 @@ Post-install: `python -m spacy download en_core_web_sm`
**Priority**: 5
**Owner**: Backend
**Complexity**: Medium-High
**Estimated Effort**: 4-5 days
#### Current State
@@ -1317,7 +1351,6 @@ google-auth-oauthlib = "^1.1"
**Priority**: 6
**Owner**: Backend
**Complexity**: Medium
**Estimated Effort**: 2-3 days
#### Current State
@@ -1580,25 +1613,49 @@ After `GenerateSummary` completes successfully, call `webhook_service.trigger_su
## Implementation Order & Dependencies
```
Phase 1 (Parallel where possible):
── Feature 1: AI Templates ─────────────────┐
├── Feature 3: PDF Export ───────────────────┤─→ Proto regeneration
└── Feature 2: Diarization Service ──────────┘
Sprint 0: Foundation (MUST complete first)
── Proto & Schema Foundation ───────────────→ All proto + DB migrations + feature flags
Phase 2:
── Feature 4: NER ──────────────────────────→ Requires proto changes
Phase 1: Core Pipeline (Parallel, after Sprint 0):
── Sprint 1: AI Templates ─────────────────┐
├── Sprint 3: PDF Export ───────────────────┤─→ Use proto from Sprint 0
└── Sprint 2: Diarization Service ──────────┘ (DB persistence, application layer)
Phase 3 (Sequential):
── Feature 5: Calendar Sync ────────────────→ OAuth infrastructure
└── Feature 6: Webhooks ─────────────────────→ Can start after Phase 1
Phase 2: Intelligence (after Phase 1):
── Sprint 4: NER ──────────────────────────→ Uses NerService application layer
Phase 3: Integrations (Sequential, after Phase 2):
├── Sprint 5: Calendar Sync ────────────────→ Complete OAuth flow with PKCE
└── Sprint 6: Webhooks ─────────────────────→ HMAC signing, retry logic
```
### Critical Path
1. **Proto changes** (Features 1, 3, 4, 5) must be done and regenerated together
2. **Diarization Service** blocks nothing, can proceed independently
3. **Calendar Sync** requires OAuth token storage infrastructure
4. **Webhooks** can be implemented at any time
1. **Sprint 0** is the **mandatory prerequisite** for all other sprints
2. **All proto/DB changes consolidated** in Sprint 0 - no more scattered migrations
3. **Feature flags** control feature availability before full rollout
4. **Application service layer** required for Sprints 2, 4, 5 (hexagonal architecture)
5. **Sprint 5 OAuth** now includes complete PKCE flow, token persistence, and refresh
### Architectural Decisions (Updated)
| Sprint | Key Improvement |
|--------|-----------------|
| Sprint 0 | Consolidated proto + feature flags + Docker model downloads |
| Sprint 2 | Database persistence via repository (not in-memory `_jobs` dict) |
| Sprint 4 | `NerService` application layer (gRPC → Service → Engine) |
| Sprint 5 | Complete OAuth with PKCE, token storage, and auto-refresh |
### Quality Gates
Each sprint must pass before merge:
```bash
pytest tests/quality/ # 23+ quality checks
ruff check src/noteflow # Linting
basedpyright # Type checking
```
See `docs/sprints/QUALITY_STANDARDS.md` for thresholds and reduction targets.
---

View File

@@ -0,0 +1,575 @@
# NoteFlow Quality Standards Reference
> All sprint implementations MUST comply with these standards. Run quality gates before PR.
---
## Quick Reference: Quality Commands
```bash
# Python Backend
pytest tests/quality/ # All quality checks (23+ rules)
ruff check src/noteflow # Linting
mypy src/noteflow # Type checking (strict)
basedpyright # Additional type checks
# TypeScript/React Frontend
cd client
npm run test:quality # Frontend quality checks
npm run lint # ESLint
# Rust/Tauri
cd client
npm run quality:rs # Rust quality script
cargo clippy # Rust linting
# Full Suite
npm run quality:all # TS + Rust quality
```
---
## Python Standards (`src/noteflow/`)
### Type Safety (STRICT)
| Rule | Description | Enforcement |
|------|-------------|-------------|
| No `# type: ignore` | Forbidden without justification | mypy strict |
| No `Any` type | Use specific types always | basedpyright |
| Union syntax | Use `str \| None` over `Optional[str]` | ruff UP |
| Return annotations | All public functions must have returns | mypy |
### Code Limits
| Metric | Soft Limit | Hard Limit | Location |
|--------|------------|------------|----------|
| Module lines | 500 | 750 | `test_code_smells.py` |
| Function lines | 50 (tests), 75 (src) | — | `test_code_smells.py` |
| Function complexity | 15 | — | `test_code_smells.py` |
| Parameters | 7 | — | `test_code_smells.py` |
| Class methods | 20 | — | `test_code_smells.py` |
| Nesting depth | 5 | — | `test_code_smells.py` |
### Test Requirements
**Current thresholds** (to be reduced each sprint):
| Rule | Max Allowed | Target | File |
|------|-------------|--------|------|
| Assertion roulette (>3 assertions without msg) | 25 | 0 | `test_test_smells.py` |
| Conditional test logic | 15 | 0 | `test_test_smells.py` |
| Empty tests | 0 | 0 | `test_test_smells.py` |
| Sleepy tests (time.sleep) | 3 | 0 | `test_test_smells.py` |
| Tests without assertions | 3 | 0 | `test_test_smells.py` |
| Redundant assertions | 0 | 0 | `test_test_smells.py` |
| Print statements in tests | 3 | 0 | `test_test_smells.py` |
| Skipped tests without reason | 0 | 0 | `test_test_smells.py` |
| Exception handling (try/except) | 3 | 0 | `test_test_smells.py` |
| Magic numbers in assertions | 25 | 10 | `test_test_smells.py` |
| Duplicate test names | 5 | 0 | `test_test_smells.py` |
| Long test methods (>50 lines) | 3 | 0 | `test_test_smells.py` |
| unittest-style assertions | 0 | 0 | `test_test_smells.py` |
| Fixtures without type hints | 5 | 0 | `test_test_smells.py` |
| Unused fixture parameters | 3 | 0 | `test_test_smells.py` |
| pytest.raises without match= | 20 | 0 | `test_test_smells.py` |
| Cross-file fixture duplicates | 0 | 0 | `test_test_smells.py` |
**Reduction schedule**:
- After each sprint, reduce non-zero thresholds by 20% (rounded down)
- Goal: All thresholds at target values by Sprint 6
### Docstring Requirements
- Write imperatively with proper punctuation
- All public functions, classes, modules documented
- Document complex business rules and edge cases
---
## TypeScript/React Standards (`client/src/`)
### Type Safety
| Rule | Max Allowed | File |
|------|-------------|------|
| `any` type usage | 10 | `code-quality.test.ts` |
| Unsafe type assertions (`as any/unknown/never`) | 5 | `code-quality.test.ts` |
| TypeScript suppressions (@ts-ignore) | 3 | `code-quality.test.ts` |
### Code Quality
| Rule | Max Allowed | Description |
|------|-------------|-------------|
| Repeated string literals | 5 | Same string in multiple files |
| Complex JSX patterns | 10 | Repeated component structures |
| Scattered helper functions | 2 | format/parse/convert scattered |
| TODO/FIXME comments | 15 | Unaddressed tech debt |
| Commented-out code | 10 | Stale code blocks |
| Trivial wrapper components | 3 | Components that just spread props |
| Magic numbers (>3 digits) | 5 | Use named constants |
| Hardcoded colors in JSX | 3 | Use theme/CSS variables |
| Hardcoded API endpoints | 0 | Use config |
| Long files (>500 lines) | 3 | Split into modules |
| Complex inline styles | 5 | Use CSS/Tailwind |
| Deeply nested ternaries | 0 | Use if/switch |
| Excessive prop spreading | 2 | Consider context |
### Naming Conventions
- Components: PascalCase (`RecordingPanel`, not `recordingPanel`)
- Hooks: `use` prefix (`useAudioLevel`)
- Utils: camelCase (`formatDuration`)
- Constants: SCREAMING_SNAKE_CASE (`MAX_RETRIES`)
---
## Rust/Tauri Standards (`client/src-tauri/src/`)
### Code Quality Checks
| Check | Threshold | Description |
|-------|-----------|-------------|
| Magic numbers | Warning | Numbers >100 not in const |
| Repeated strings | >3 occurrences | Extract to constants |
| TODO/FIXME comments | >10 | Address or remove |
| Long functions | >100 lines | Split into helpers |
| Deep nesting | >5 levels (20 spaces) | Flatten control flow |
| unwrap() calls | >20 | Use ? or expect() |
| clone() per file | >10 | Review ownership |
| Parameters | >5 | Use struct/builder |
| Duplicate error messages | >2 | Use error enum |
| File size | >500 lines | Split module |
### Clippy Enforcement
```bash
cargo clippy -- -W unused_imports -W dead_code
```
Must pass with zero warnings for:
- Unused imports
- Dead code
- Missing docs on public items
---
## Pre-Commit Checklist
Before any PR:
```markdown
## Python
- [ ] `pytest tests/quality/` passes
- [ ] `ruff check --fix .` run
- [ ] `mypy src/noteflow` clean
- [ ] No `# type: ignore` without comment
- [ ] Docstrings on all new public functions
## TypeScript/React
- [ ] `npm run test:quality` passes
- [ ] `npm run lint` clean
- [ ] No `any` types added
- [ ] Components use PascalCase
## Rust
- [ ] `npm run quality:rs` passes
- [ ] `cargo clippy` clean
- [ ] No unwrap() in error paths
- [ ] Error types documented
```
---
## Architecture Patterns
### Hexagonal Architecture (Python)
```
domain/ → Entities, value objects, ports (interfaces)
application/ → Use cases, services, orchestration
infrastructure/ → Implementations, adapters, external services
grpc/ → Transport layer, proto definitions
```
### File Organization
| Layer | Pattern | Example |
|-------|---------|---------|
| Domain | `entities/meeting.py`, `ports/repository.py` | Pure business logic |
| Application | `services/meeting_service.py` | Orchestrates domain |
| Infrastructure | `persistence/repositories/meeting_repo.py` | Implements ports |
| gRPC | `_mixins/meeting.py` | Transport handlers |
### Naming Conventions
| Type | Convention | Example |
|------|------------|---------|
| Domain entity | Singular noun | `Meeting`, `Segment` |
| Service | NounService | `MeetingService`, `SummarizationService` |
| Repository | NounRepository | `MeetingRepository` |
| Port | NounPort (Protocol) | `SummarizationPort` |
| Mixin | NounMixin | `StreamingMixin` |
| Factory | create_noun() | `create_summarization_service()` |
---
## Testable Code Patterns
### Protocol-Based Dependency Injection
All services MUST use **constructor injection** with **Protocol-based abstractions** for testability.
**References**:
- [ArjanCodes: Python DI Best Practices](https://arjancodes.com/blog/python-dependency-injection-best-practices/)
- [Real Python: SOLID Principles](https://realpython.com/solid-principles-python/)
### Key Principles
| Principle | Description | Example |
|-----------|-------------|---------|
| Constructor injection | All dependencies passed via `__init__` | `Service(repo: RepositoryPort)` |
| Protocol abstractions | Use `typing.Protocol` for interfaces | `class RepositoryPort(Protocol)` |
| Factory functions | Create configured instances | `create_service() -> Service` |
| No global state | Avoid singletons and module-level state | Use DI instead of `get_instance()` |
### Pattern: Service with Protocol Dependencies
```python
from typing import Protocol
# 1. Define port (interface) in domain layer
class NerPort(Protocol):
"""Port for NER operations."""
def extract(self, text: str) -> list[NamedEntity]:
"""Extract named entities from text."""
...
# 2. Application service depends on protocol (not concrete impl)
class NerService:
"""Application service for NER operations."""
def __init__(
self,
ner_engine: NerPort, # Protocol, not SpacyNerEngine
uow_factory: Callable[[], UnitOfWork],
) -> None:
self._ner_engine = ner_engine
self._uow_factory = uow_factory
async def extract_entities(self, meeting_id: MeetingId) -> list[NamedEntity]:
"""Extract entities from meeting transcript."""
async with self._uow_factory() as uow:
meeting = await uow.meetings.get(meeting_id)
return self._ner_engine.extract(meeting.transcript)
# 3. Infrastructure implements the protocol
class SpacyNerEngine:
"""spaCy implementation of NerPort."""
def __init__(self, model_name: str = "en_core_web_sm") -> None:
self._nlp = spacy.load(model_name)
def extract(self, text: str) -> list[NamedEntity]:
"""Extract entities using spaCy."""
doc = self._nlp(text)
return [NamedEntity.from_spacy(ent) for ent in doc.ents]
# 4. Factory function wires dependencies
def create_ner_service(
model_name: str = "en_core_web_sm",
uow_factory: Callable[[], UnitOfWork] | None = None,
) -> NerService:
"""Create NER service with dependencies."""
engine = SpacyNerEngine(model_name)
factory = uow_factory or SQLAlchemyUnitOfWork
return NerService(engine, factory)
```
### Testing with Mock Protocols
```python
@pytest.fixture
def mock_ner_engine() -> MagicMock:
"""Create mock NER engine implementing NerPort."""
engine = MagicMock(spec=NerPort)
engine.extract.return_value = [
NamedEntity.create("Test Person", EntityCategory.PERSON, [1], 0.9),
]
return engine
@pytest.fixture
def ner_service(mock_ner_engine: MagicMock, mock_uow_factory: Callable) -> NerService:
"""Create NER service with mock dependencies."""
return NerService(mock_ner_engine, mock_uow_factory)
def test_extract_entities_calls_engine(
ner_service: NerService,
mock_ner_engine: MagicMock,
) -> None:
"""Extraction delegates to NER engine."""
await ner_service.extract_entities(MeetingId(uuid4()))
mock_ner_engine.extract.assert_called_once()
```
### Anti-Patterns (AVOID)
```python
# ❌ WRONG: Direct instantiation in service
class BadService:
def __init__(self) -> None:
self._engine = SpacyNerEngine() # Untestable!
# ❌ WRONG: Module-level singleton
_engine = SpacyNerEngine() # Global state!
def get_engine() -> SpacyNerEngine:
return _engine
# ❌ WRONG: Concrete type dependency
class BadService:
def __init__(self, engine: SpacyNerEngine) -> None: # Concrete, not Protocol!
self._engine = engine
```
### Modern Library Recommendations
| Category | Library | Rationale |
|----------|---------|-----------|
| OAuth 2.0 | **Authlib** | Built-in PKCE, async support, handles edge cases |
| HTTP Client | **httpx** | Modern async, compatible with Authlib |
| NER | **spaCy** or **GLiNER** | spaCy for production, GLiNER for zero-shot |
| Validation | **Pydantic** | Already used in project |
| Testing | **pytest** | With `pytest.mark.parametrize` |
---
## Test Patterns
### CRITICAL: No Conditionals in Tests
**FORBIDDEN in test code:**
- `if`/`else` statements with assertions
- `for` loops with assertions
- `while` loops
- Conditional logic that determines test behavior
**USE INSTEAD: `pytest.mark.parametrize`**
```python
# ❌ WRONG: Conditional test logic
def test_entity_extraction(engine: NerEngine) -> None:
for text, expected in test_cases: # FORBIDDEN
entities = engine.extract(text)
if expected: # FORBIDDEN
assert entities
# ✅ CORRECT: Parametrized tests
@pytest.mark.parametrize(
("text", "expected_category"),
[
pytest.param("John Smith", EntityCategory.PERSON, id="person"),
pytest.param("Google", EntityCategory.COMPANY, id="company"),
pytest.param("New York", EntityCategory.LOCATION, id="location"),
],
)
def test_entity_extraction(
engine: NerEngine,
text: str,
expected_category: EntityCategory,
) -> None:
"""Extract entity of expected category."""
entities = engine.extract(text)
matching = [e for e in entities if e.category == expected_category]
assert matching, f"Expected {expected_category.value} in: {text}"
```
### Parametrization Best Practices
```python
# Use pytest.param with descriptive IDs
@pytest.mark.parametrize(
("input_value", "expected_output", "description"),
[
pytest.param("", [], id="empty-input"),
pytest.param("hello", ["hello"], id="single-word"),
pytest.param("a b c", ["a", "b", "c"], id="multiple-words"),
],
)
def test_tokenize(input_value: str, expected_output: list[str], description: str) -> None:
"""Tokenize input produces expected tokens."""
assert tokenize(input_value) == expected_output
# Class-based organization for related tests
class TestAuthorizationUrl:
"""Test authorization URL generation."""
@pytest.mark.parametrize(
("provider", "expected_host"),
[
pytest.param(OAuthProvider.GOOGLE, "accounts.google.com", id="google"),
pytest.param(OAuthProvider.MICROSOFT, "login.microsoftonline.com", id="microsoft"),
],
)
def test_generates_valid_url(self, provider: OAuthProvider, expected_host: str) -> None:
"""Generate URL for each provider."""
url = generate_auth_url(provider)
assert expected_host in url
```
### Fixture Scoping for Performance
```python
# Module-scoped for expensive operations (model loading, DB setup)
@pytest.fixture(scope="module")
def ner_engine() -> SpacyNerEngine:
"""Load spaCy model once per test module."""
return SpacyNerEngine("en_core_web_sm")
# Function-scoped for mutable state
@pytest.fixture
def mock_uow() -> AsyncMock:
"""Fresh mock for each test."""
return AsyncMock(spec=UnitOfWork)
```
### Required Test Elements
1. **Type hints** on fixtures and test functions
2. **Docstring** explaining what's being tested
3. **AAA pattern** (Arrange/Act/Assert) with comments
4. **Specific assertions** with messages for complex checks
5. **pytest.raises with match=** for exception tests
6. **`pytest.param` with IDs** for parametrized tests
7. **No conditionals or loops** around assertions
---
## Code Reuse Checklist
Before creating new code, check:
| Location | Contains |
|----------|----------|
| `domain/entities/` | Existing entity types |
| `domain/ports/` | Existing port interfaces |
| `infrastructure/converters/` | Entity ↔ ORM converters |
| `grpc/_mixins/converters.py` | Proto ↔ Domain converters |
| `infrastructure/*/protocols.py` | Infrastructure interfaces |
| `application/services/` | Existing service patterns |
### Shared Utilities
| File | Functions |
|------|-----------|
| `infrastructure/export/_formatting.py` | `format_timestamp()`, `format_datetime()` |
| `infrastructure/security/keystore.py` | `_generate_key()`, `_decode_and_validate_key()` |
| `infrastructure/summarization/_parsing.py` | `build_transcript_prompt()`, `parse_llm_response()` |
| `infrastructure/diarization/assigner.py` | `assign_speaker()`, `assign_speakers_batch()` |
---
## Documentation Requirements
Every new feature must include:
1. **Unit tests** covering core logic
2. **Integration tests** for end-to-end flow (where applicable)
3. **Docstrings** on all public APIs
4. **CLAUDE.md updates** if architectural patterns change
5. **Proto changes** documented in commit message
---
## Shared Test Fixtures
### Available Fixtures (`tests/conftest.py`)
**DO NOT redefine these fixtures in test files**. Use them from conftest.py.
| Fixture | Type | Scope | Description |
|---------|------|-------|-------------|
| `crypto` | `CryptoService` | function | Encryption service for test data |
| `meetings_dir` | `Path` | function | Temporary directory for meeting assets |
| `mock_uow` | `AsyncMock` | function | Mock Unit of Work with all repositories |
| `mock_uow_factory` | `type` | function | Factory that returns mock_uow |
| `temp_db` | `Engine` | session | Temporary SQLite database |
| `async_session` | `AsyncSession` | function | Async SQLAlchemy session |
| `grpc_server` | `NoteFlowServicer` | function | Test gRPC server instance |
| `grpc_client` | `NoteFlowClient` | function | Test gRPC client |
| `sample_meeting` | `Meeting` | function | Pre-populated meeting entity |
| `sample_segments` | `list[Segment]` | function | Sample transcript segments |
| `mock_ner_engine` | `MagicMock` | function | Mock NER engine |
| `mock_oauth_manager` | `MagicMock` | function | Mock OAuth manager |
| `mock_calendar_settings` | `CalendarSettings` | function | Calendar settings with test OAuth creds |
### Usage Pattern
```python
# CORRECT: Use shared fixtures
def test_meeting_creation(mock_uow: AsyncMock, sample_meeting: Meeting) -> None:
"""Create meeting uses repository correctly."""
mock_uow.meetings.save.return_value = None
# ... test logic
# INCORRECT: Do not redefine fixtures
@pytest.fixture
def mock_uow(): # DON'T DO THIS - use conftest.py fixture
return AsyncMock()
```
### Cross-File Fixture Detection
The `test_test_smells.py` quality check detects when fixtures are redefined:
```python
# Fails quality check - fixture "mock_uow" already in conftest.py
@pytest.fixture
def mock_uow():
...
```
Move new shared fixtures to `tests/conftest.py` to avoid duplication.
### Adding New Shared Fixtures
When adding a fixture that could be reused:
1. Check if a similar fixture exists in `tests/conftest.py`
2. If not, add it to `tests/conftest.py` with:
- Type annotation on the return
- Docstring explaining the fixture
- Appropriate scope (`function`, `class`, `module`, `session`)
```python
@pytest.fixture
def new_shared_fixture() -> SomeType:
"""Provide X for Y tests.
Returns:
Configured SomeType instance.
"""
return SomeType(...)
```
---
## Sprint-Specific Quality Requirements
Each sprint must:
1. **Not increase** any quality threshold violations
2. **Reduce** at least one threshold toward target
3. **Add fixtures** to conftest.py (not test files)
4. **Run quality suite** before PR:
```bash
pytest tests/quality/ -v
```
5. **Document** any threshold exceptions in PR description

View File

@@ -0,0 +1,989 @@
# Sprint 0: Proto & Schema Foundation
> **Priority**: 0 | **Owner**: Backend | **Complexity**: Medium | **Prerequisite for all other sprints**
---
## Objective
Consolidate all protobuf schema changes and database migrations required by Sprints 1-6 into a single coordinated release. This prevents proto conflicts, ensures backward compatibility, and establishes the persistence foundation for all features.
---
## Rationale
Multiple sprints modify shared infrastructure:
| Sprint | Proto Changes | DB Changes |
|--------|---------------|------------|
| 1 (AI Templates) | `SummarizationOptions` message | None |
| 3 (PDF Export) | `EXPORT_FORMAT_PDF` enum | None |
| 4 (NER) | `ExtractEntities` RPC + messages | `named_entities` table |
| 5 (Calendar) | `ListCalendarEvents` RPC + messages | Uses existing tables |
| 6 (Webhooks) | None | `webhook_configs`, `webhook_deliveries` tables |
Without coordination:
- Proto regeneration conflicts between parallel sprints
- Migration ordering issues
- Client/server version mismatches
---
## Phased Implementation
Sprint 0 is split into four sub-increments to enable independent verification and reduce blast radius:
| Increment | Scope | Verification Gate |
|-----------|-------|-------------------|
| **0a** | Proto schema + stub regeneration | `python -c "from noteflow.grpc.proto import noteflow_pb2"` |
| **0b** | Database schema (schema.sql) | `psql -f docker/db/schema.sql` on fresh DB |
| **0c** | Alembic migrations | `alembic upgrade head && alembic downgrade -1` |
| **0d** | Dependencies + Docker + Feature flags | `pip install -e ".[all]" && pytest tests/` |
### Increment 0a: Proto Schema
**Files**: `noteflow.proto`, `*_pb2.py`, `*_pb2_grpc.py`, `*_pb2.pyi`
**Tasks**: Task 1, Task 7, Task 9
**Done when**:
- [ ] Proto compiles without errors
- [ ] Python stubs import cleanly
- [ ] Rust/TS stubs generate via `client/build.rs`
- [ ] PROTO_CHANGELOG.md committed
### Increment 0b: Database Schema
**Files**: `docker/db/schema.sql`
**Tasks**: Task 2
**Done when**:
- [ ] Schema applies to fresh PostgreSQL
- [ ] All tables have proper indexes
- [ ] Foreign key constraints validated
- [ ] Triggers for `updated_at` in place
### Increment 0c: Alembic Migrations
**Files**: `migrations/versions/001_*.py`, `migrations/versions/002_*.py`
**Tasks**: Task 3
**Done when**:
- [ ] Migrations apply to existing database
- [ ] Downgrade path works for each migration
- [ ] Schema matches schema.sql output
### Increment 0d: Dependencies and Docker
**Files**: `pyproject.toml`, `Dockerfile`, `docker-compose.yml`, `settings.py`, `cli/models.py`
**Tasks**: Task 4, Task 5, Task 6, Task 8
**Done when**:
- [ ] All optional dependencies install
- [ ] Feature flags control availability
- [ ] Model download CLI works
- [ ] Docker build completes with NER support
---
## Target/Affected Code
### Files to Modify
| File | Change Type |
|------|-------------|
| `src/noteflow/grpc/proto/noteflow.proto` | All proto additions |
| `src/noteflow/grpc/proto/noteflow_pb2.py` | Regenerated |
| `src/noteflow/grpc/proto/noteflow_pb2_grpc.py` | Regenerated |
| `src/noteflow/grpc/proto/noteflow_pb2.pyi` | Regenerated |
| `docker/db/schema.sql` | All table additions |
| `pyproject.toml` | All new dependencies |
| `client/src-tauri/build.rs` | Proto path verification |
### Files to Create
| File | Purpose |
|------|---------|
| `src/noteflow/infrastructure/persistence/migrations/versions/001_add_named_entities.py` | NER tables |
| `src/noteflow/infrastructure/persistence/migrations/versions/002_add_webhooks.py` | Webhook tables |
| `docs/sprints/phase-0-foundation/PROTO_CHANGELOG.md` | Proto version history |
---
## Implementation Tasks
### Task 1: Proto Schema Consolidation
**File**: `src/noteflow/grpc/proto/noteflow.proto`
Add all new messages and RPCs in a single commit:
```protobuf
// =============================================================================
// Sprint 0: Consolidated Proto Changes
// Version: 2.0.0
// Date: 2025-XX-XX
// =============================================================================
// -----------------------------------------------------------------------------
// Sprint 1: AI Templates
// -----------------------------------------------------------------------------
// Summarization style options passed from frontend settings
message SummarizationOptions {
// Tone: professional, casual, technical, friendly
string tone = 1;
// Format: bullet_points, narrative, structured, concise
string format = 2;
// Verbosity: minimal, balanced, detailed, comprehensive
string verbosity = 3;
}
// Modify existing GenerateSummaryRequest (add field 3)
// message GenerateSummaryRequest {
// string meeting_id = 1;
// bool force_regenerate = 2;
// SummarizationOptions options = 3; // NEW
// }
// -----------------------------------------------------------------------------
// Sprint 3: PDF Export
// -----------------------------------------------------------------------------
// Add to existing ExportFormat enum
// enum ExportFormat {
// EXPORT_FORMAT_UNSPECIFIED = 0;
// EXPORT_FORMAT_MARKDOWN = 1;
// EXPORT_FORMAT_HTML = 2;
// EXPORT_FORMAT_PDF = 3; // NEW
// }
// -----------------------------------------------------------------------------
// Sprint 4: Named Entity Extraction
// -----------------------------------------------------------------------------
// Add to service definition
// rpc ExtractEntities(ExtractEntitiesRequest) returns (ExtractEntitiesResponse);
message ExtractEntitiesRequest {
string meeting_id = 1;
bool force_refresh = 2; // Re-extract even if entities exist
}
message ExtractedEntity {
string id = 1;
string text = 2;
// Category: person, company, product, technical, acronym, location, date, other
string category = 3;
repeated int32 segment_ids = 4;
float confidence = 5;
bool is_pinned = 6; // User-confirmed
}
message ExtractEntitiesResponse {
repeated ExtractedEntity entities = 1;
int32 total_count = 2;
bool cached = 3; // True if returning cached results
}
// -----------------------------------------------------------------------------
// Sprint 5: Calendar Sync
// -----------------------------------------------------------------------------
// Add to service definition
// rpc ListCalendarEvents(ListCalendarEventsRequest) returns (ListCalendarEventsResponse);
// rpc GetCalendarProviders(GetCalendarProvidersRequest) returns (GetCalendarProvidersResponse);
// rpc InitiateCalendarAuth(InitiateCalendarAuthRequest) returns (InitiateCalendarAuthResponse);
// rpc CompleteCalendarAuth(CompleteCalendarAuthRequest) returns (CompleteCalendarAuthResponse);
message CalendarEvent {
string id = 1;
string title = 2;
int64 start_time = 3; // Unix timestamp (seconds)
int64 end_time = 4; // Unix timestamp (seconds)
repeated string attendees = 5;
string location = 6;
string description = 7;
string meeting_url = 8;
bool is_recurring = 9;
string provider = 10; // google, outlook
}
message ListCalendarEventsRequest {
int32 hours_ahead = 1; // How far ahead to look (default: 24)
int32 limit = 2; // Max events to return (default: 10)
string provider = 3; // Optional: specific provider name
}
message ListCalendarEventsResponse {
repeated CalendarEvent events = 1;
int32 total_count = 2;
}
message GetCalendarProvidersRequest {}
message CalendarProvider {
string name = 1;
bool is_authenticated = 2;
string display_name = 3; // "Google Calendar", "Microsoft Outlook"
}
message GetCalendarProvidersResponse {
repeated CalendarProvider providers = 1;
}
// OAuth flow messages
message InitiateCalendarAuthRequest {
string provider = 1; // google, outlook
string redirect_uri = 2; // Where to redirect after auth
}
message InitiateCalendarAuthResponse {
string auth_url = 1; // URL to redirect user to
string state = 2; // CSRF token to verify callback
}
message CompleteCalendarAuthRequest {
string provider = 1;
string code = 2; // Authorization code from OAuth callback
string state = 3; // CSRF token for verification
}
message CompleteCalendarAuthResponse {
bool success = 1;
string error_message = 2;
string provider_email = 3; // Email of authenticated account
}
```
---
### Task 2: Database Schema Additions
**File**: `docker/db/schema.sql`
Add after existing tables (preserve insertion order for foreign keys):
```sql
--------------------------------------------------------------------------------
-- Sprint 4: Named Entities
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS noteflow.named_entities (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
text text NOT NULL,
normalized_text text NOT NULL, -- Lowercase, trimmed for deduplication
category varchar(50) NOT NULL, -- person, company, product, location, etc.
segment_ids integer[] NOT NULL DEFAULT '{}'::integer[],
confidence double precision NOT NULL DEFAULT 0.0,
is_pinned boolean NOT NULL DEFAULT false,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
-- Unique constraint for deduplication within a meeting
CONSTRAINT uq_named_entities_meeting_text UNIQUE (meeting_id, normalized_text)
);
CREATE TRIGGER trg_named_entities_updated_at
BEFORE UPDATE ON noteflow.named_entities
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE INDEX IF NOT EXISTS idx_named_entities_meeting_id
ON noteflow.named_entities(meeting_id);
CREATE INDEX IF NOT EXISTS idx_named_entities_category
ON noteflow.named_entities(category);
--------------------------------------------------------------------------------
-- Sprint 6: Webhooks
--------------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS noteflow.webhook_configs (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
name varchar(255) NOT NULL DEFAULT 'Webhook',
url text NOT NULL,
events text[] NOT NULL DEFAULT '{}'::text[],
secret text NULL, -- HMAC signing secret
enabled boolean NOT NULL DEFAULT true,
timeout_ms integer NOT NULL DEFAULT 10000,
max_retries integer NOT NULL DEFAULT 3,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
-- Validate URL format
CONSTRAINT chk_webhook_url_format CHECK (url ~ '^https?://')
);
CREATE TRIGGER trg_webhook_configs_updated_at
BEFORE UPDATE ON noteflow.webhook_configs
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
CREATE INDEX IF NOT EXISTS idx_webhook_configs_workspace_id
ON noteflow.webhook_configs(workspace_id);
CREATE TABLE IF NOT EXISTS noteflow.webhook_deliveries (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
webhook_id uuid NOT NULL REFERENCES noteflow.webhook_configs(id) ON DELETE CASCADE,
event_type text NOT NULL,
payload jsonb NOT NULL DEFAULT '{}'::jsonb,
status_code integer NULL,
response_body text NULL, -- First 1KB of response for debugging
error_message text NULL,
attempt_count integer NOT NULL DEFAULT 1,
duration_ms integer NULL, -- Request duration for monitoring
delivered_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_webhook_id
ON noteflow.webhook_deliveries(webhook_id, delivered_at DESC);
CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_event_type
ON noteflow.webhook_deliveries(event_type, delivered_at DESC);
-- Partition by month for large deployments (optional)
-- CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_delivered_at
-- ON noteflow.webhook_deliveries(delivered_at);
```
---
### Task 3: Alembic Migrations
**File**: `src/noteflow/infrastructure/persistence/migrations/versions/001_add_named_entities.py`
```python
"""Add named_entities table.
Revision ID: 001_named_entities
Revises: <previous_revision>
Create Date: 2025-XX-XX
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision = "001_named_entities"
down_revision = "<previous_revision>"
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Create named_entities table."""
op.create_table(
"named_entities",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"meeting_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("normalized_text", sa.Text(), nullable=False),
sa.Column("category", sa.String(50), nullable=False),
sa.Column(
"segment_ids",
postgresql.ARRAY(sa.Integer()),
nullable=False,
server_default="{}",
),
sa.Column(
"confidence",
sa.Float(),
nullable=False,
server_default="0.0",
),
sa.Column(
"is_pinned",
sa.Boolean(),
nullable=False,
server_default="false",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.UniqueConstraint(
"meeting_id",
"normalized_text",
name="uq_named_entities_meeting_text",
),
schema="noteflow",
)
op.create_index(
"idx_named_entities_meeting_id",
"named_entities",
["meeting_id"],
schema="noteflow",
)
op.create_index(
"idx_named_entities_category",
"named_entities",
["category"],
schema="noteflow",
)
def downgrade() -> None:
"""Drop named_entities table."""
op.drop_index("idx_named_entities_category", schema="noteflow")
op.drop_index("idx_named_entities_meeting_id", schema="noteflow")
op.drop_table("named_entities", schema="noteflow")
```
**File**: `src/noteflow/infrastructure/persistence/migrations/versions/002_add_webhooks.py`
```python
"""Add webhook tables.
Revision ID: 002_webhooks
Revises: 001_named_entities
Create Date: 2025-XX-XX
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision = "002_webhooks"
down_revision = "001_named_entities"
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Create webhook tables."""
# webhook_configs
op.create_table(
"webhook_configs",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"workspace_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("name", sa.String(255), nullable=False, server_default="Webhook"),
sa.Column("url", sa.Text(), nullable=False),
sa.Column(
"events",
postgresql.ARRAY(sa.Text()),
nullable=False,
server_default="{}",
),
sa.Column("secret", sa.Text(), nullable=True),
sa.Column("enabled", sa.Boolean(), nullable=False, server_default="true"),
sa.Column("timeout_ms", sa.Integer(), nullable=False, server_default="10000"),
sa.Column("max_retries", sa.Integer(), nullable=False, server_default="3"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.CheckConstraint("url ~ '^https?://'", name="chk_webhook_url_format"),
schema="noteflow",
)
op.create_index(
"idx_webhook_configs_workspace_id",
"webhook_configs",
["workspace_id"],
schema="noteflow",
)
# webhook_deliveries
op.create_table(
"webhook_deliveries",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"webhook_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("noteflow.webhook_configs.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("event_type", sa.Text(), nullable=False),
sa.Column(
"payload",
postgresql.JSONB(),
nullable=False,
server_default="{}",
),
sa.Column("status_code", sa.Integer(), nullable=True),
sa.Column("response_body", sa.Text(), nullable=True),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column("attempt_count", sa.Integer(), nullable=False, server_default="1"),
sa.Column("duration_ms", sa.Integer(), nullable=True),
sa.Column(
"delivered_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
schema="noteflow",
)
op.create_index(
"idx_webhook_deliveries_webhook_id",
"webhook_deliveries",
["webhook_id", "delivered_at"],
schema="noteflow",
)
op.create_index(
"idx_webhook_deliveries_event_type",
"webhook_deliveries",
["event_type", "delivered_at"],
schema="noteflow",
)
def downgrade() -> None:
"""Drop webhook tables."""
op.drop_index("idx_webhook_deliveries_event_type", schema="noteflow")
op.drop_index("idx_webhook_deliveries_webhook_id", schema="noteflow")
op.drop_table("webhook_deliveries", schema="noteflow")
op.drop_index("idx_webhook_configs_workspace_id", schema="noteflow")
op.drop_table("webhook_configs", schema="noteflow")
```
---
### Task 4: Dependency Consolidation
**File**: `pyproject.toml`
Add all new dependencies in a single update:
```toml
[project]
dependencies = [
# ... existing dependencies ...
# Sprint 0: Consolidated new dependencies
"httpx>=0.27", # HTTP client (webhooks, future integrations)
]
[project.optional-dependencies]
# PDF Export (Sprint 3)
pdf = [
"weasyprint>=62.0",
]
# Named Entity Recognition (Sprint 4)
ner = [
"spacy>=3.7",
]
# Calendar Integration (Sprint 5)
calendar = [
"google-api-python-client>=2.100",
"google-auth>=2.23",
"google-auth-oauthlib>=1.1",
# Outlook support (future)
# "msal>=1.24",
]
# All optional features
all = [
"noteflow[pdf,ner,calendar]",
]
[project.scripts]
# Model download helper
noteflow-download-models = "noteflow.cli.models:download_all"
```
---
### Task 5: Model Download CLI
**File**: `src/noteflow/cli/models.py`
```python
"""CLI for downloading ML models."""
from __future__ import annotations
import subprocess
import sys
def download_spacy_model(model: str = "en_core_web_sm") -> None:
"""Download spaCy model.
Args:
model: Model name to download.
"""
print(f"Downloading spaCy model: {model}")
subprocess.run(
[sys.executable, "-m", "spacy", "download", model],
check=True,
)
print(f"Successfully downloaded: {model}")
def download_all() -> None:
"""Download all required ML models."""
print("Downloading all NoteFlow ML models...")
try:
download_spacy_model("en_core_web_sm")
except subprocess.CalledProcessError as e:
print(f"Failed to download spaCy model: {e}")
sys.exit(1)
print("\nAll models downloaded successfully!")
print("You can now use NER features.")
if __name__ == "__main__":
download_all()
```
---
### Task 6: Docker Integration
**File**: `Dockerfile` (additions)
```dockerfile
# Stage: Download ML models (optional, for NER support)
FROM python:3.12-slim AS models
# Install spacy and download model
RUN pip install spacy>=3.7 && \
python -m spacy download en_core_web_sm
# Stage: Runtime with models
FROM noteflow-base AS runtime-with-ner
# Copy spaCy model from models stage
COPY --from=models /usr/local/lib/python3.12/site-packages/en_core_web_sm \
/usr/local/lib/python3.12/site-packages/en_core_web_sm
# Verify model is available
RUN python -c "import spacy; spacy.load('en_core_web_sm')"
```
**File**: `docker-compose.yml` (additions)
```yaml
services:
noteflow:
build:
context: .
target: runtime-with-ner # Use runtime-with-ner for NER support
environment:
# Feature flags
NOTEFLOW_FEATURE_NER_ENABLED: "true"
NOTEFLOW_FEATURE_CALENDAR_ENABLED: "true"
NOTEFLOW_FEATURE_WEBHOOKS_ENABLED: "true"
```
---
### Task 7: Proto Regeneration Script
**File**: `scripts/regenerate_proto.sh`
```bash
#!/usr/bin/env bash
set -euo pipefail
PROTO_DIR="src/noteflow/grpc/proto"
PROTO_FILE="$PROTO_DIR/noteflow.proto"
echo "Regenerating protobuf stubs..."
python -m grpc_tools.protoc \
-I "$PROTO_DIR" \
--python_out="$PROTO_DIR" \
--grpc_python_out="$PROTO_DIR" \
--pyi_out="$PROTO_DIR" \
"$PROTO_FILE"
echo "Fixing imports for Python 3.12+ compatibility..."
# Fix relative imports in generated files
sed -i '' 's/^import noteflow_pb2/from . import noteflow_pb2/' "$PROTO_DIR/noteflow_pb2_grpc.py" 2>/dev/null || \
sed -i 's/^import noteflow_pb2/from . import noteflow_pb2/' "$PROTO_DIR/noteflow_pb2_grpc.py"
echo "Proto stubs regenerated successfully!"
echo ""
echo "Files updated:"
echo " - $PROTO_DIR/noteflow_pb2.py"
echo " - $PROTO_DIR/noteflow_pb2_grpc.py"
echo " - $PROTO_DIR/noteflow_pb2.pyi"
echo ""
echo "Next steps:"
echo " 1. Run 'cd client && npm run build:proto' to update Rust/TS stubs"
echo " 2. Run tests: pytest tests/grpc/"
echo " 3. Commit all generated files together"
```
---
### Task 8: Feature Flags
**File**: `src/noteflow/config/settings.py` (additions)
```python
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class FeatureFlags(BaseSettings):
"""Feature flag settings for gradual rollout."""
model_config = SettingsConfigDict(env_prefix="NOTEFLOW_FEATURE_")
# Sprint 1: AI Templates
templates_enabled: bool = Field(
default=True,
description="Enable summarization template options",
)
# Sprint 3: PDF Export
pdf_export_enabled: bool = Field(
default=True,
description="Enable PDF export format",
)
# Sprint 4: NER
ner_enabled: bool = Field(
default=False, # Disabled by default (requires model download)
description="Enable named entity extraction",
)
# Sprint 5: Calendar
calendar_enabled: bool = Field(
default=False, # Disabled by default (requires OAuth setup)
description="Enable calendar integration",
)
# Sprint 6: Webhooks
webhooks_enabled: bool = Field(
default=True,
description="Enable webhook notifications",
)
class Settings(BaseSettings):
"""Main application settings."""
# ... existing fields ...
features: FeatureFlags = Field(default_factory=FeatureFlags)
@lru_cache
def get_feature_flags() -> FeatureFlags:
"""Get cached feature flags."""
return get_settings().features
```
---
### Task 9: Proto Changelog
**File**: `docs/sprints/phase-0-foundation/PROTO_CHANGELOG.md`
```markdown
# Proto Changelog
All notable changes to `noteflow.proto` are documented here.
## [2.0.0] - 2025-XX-XX
### Added
#### Messages
- `SummarizationOptions` - AI template preferences (tone, format, verbosity)
- `ExtractEntitiesRequest` / `ExtractEntitiesResponse` - NER extraction
- `ExtractedEntity` - Named entity with category, segments, confidence
- `CalendarEvent` - Calendar event representation
- `ListCalendarEventsRequest` / `ListCalendarEventsResponse` - Calendar listing
- `CalendarProvider` - Provider info with auth status
- `GetCalendarProvidersRequest` / `GetCalendarProvidersResponse` - Provider listing
- `InitiateCalendarAuthRequest` / `InitiateCalendarAuthResponse` - OAuth initiation
- `CompleteCalendarAuthRequest` / `CompleteCalendarAuthResponse` - OAuth completion
#### RPCs
- `ExtractEntities` - Extract named entities from meeting
- `ListCalendarEvents` - List upcoming calendar events
- `GetCalendarProviders` - Get available calendar providers
- `InitiateCalendarAuth` - Start OAuth flow
- `CompleteCalendarAuth` - Complete OAuth flow
#### Enums
- `ExportFormat.EXPORT_FORMAT_PDF` - PDF export support
### Modified
#### Messages
- `GenerateSummaryRequest` - Added optional `options` field (field 3)
### Compatibility Notes
- All new fields are optional or have defaults
- Existing clients will continue to work without changes
- New features require updated clients to access
## [1.x.x] - Previous Versions
See git history for earlier changes.
```
---
## Acceptance Criteria
### Functional
- [ ] All proto messages compile without errors
- [ ] Proto stubs regenerate cleanly
- [ ] Alembic migrations apply to fresh database
- [ ] Alembic migrations apply to existing database (upgrade path)
- [ ] Feature flags control feature availability
- [ ] Model download CLI works correctly
### Technical
- [ ] Proto backward compatible (existing clients work)
- [ ] No breaking changes to existing RPCs
- [ ] All new tables have proper indexes
- [ ] Foreign key constraints correct
- [ ] Triggers for `updated_at` in place
### Quality Gates
- [ ] `pytest tests/quality/` passes
- [ ] `ruff check src/noteflow` clean
- [ ] `mypy src/noteflow` clean
- [ ] `alembic upgrade head` succeeds on fresh DB
- [ ] `alembic downgrade -1` succeeds for each migration
- [ ] Proto regeneration produces identical output (idempotent)
---
## Test Plan
### Migration Tests
**File**: `tests/infrastructure/persistence/test_migrations.py`
```python
import pytest
from alembic import command
from alembic.config import Config
@pytest.fixture
def alembic_config(tmp_path) -> Config:
"""Create Alembic config for testing."""
config = Config()
config.set_main_option("script_location", "src/noteflow/infrastructure/persistence/migrations")
config.set_main_option("sqlalchemy.url", f"sqlite:///{tmp_path}/test.db")
return config
def test_migrations_upgrade_downgrade(alembic_config: Config) -> None:
"""All migrations can upgrade and downgrade."""
# Upgrade to head
command.upgrade(alembic_config, "head")
# Downgrade each migration
command.downgrade(alembic_config, "-1")
command.downgrade(alembic_config, "-1")
# Upgrade again
command.upgrade(alembic_config, "head")
```
### Proto Tests
**File**: `tests/grpc/test_proto_compilation.py`
```python
def test_proto_imports() -> None:
"""Proto stubs import without errors."""
from noteflow.grpc.proto import noteflow_pb2, noteflow_pb2_grpc
# Verify new messages exist
assert hasattr(noteflow_pb2, "SummarizationOptions")
assert hasattr(noteflow_pb2, "ExtractEntitiesRequest")
assert hasattr(noteflow_pb2, "CalendarEvent")
# Verify new enum values
assert noteflow_pb2.EXPORT_FORMAT_PDF == 3
def test_proto_message_defaults() -> None:
"""New messages have correct defaults."""
from noteflow.grpc.proto import noteflow_pb2
# SummarizationOptions defaults
opts = noteflow_pb2.SummarizationOptions()
assert opts.tone == ""
assert opts.format == ""
assert opts.verbosity == ""
# ExtractedEntity defaults
entity = noteflow_pb2.ExtractedEntity()
assert entity.confidence == 0.0
assert entity.is_pinned is False
```
---
## Definition of Done
- [ ] All proto changes committed in single commit
- [ ] All migrations committed and tested
- [ ] Proto regeneration script works
- [ ] Feature flags documented
- [ ] PROTO_CHANGELOG.md updated
- [ ] Client proto sync verified (`cd client && npm run build:proto`)
- [ ] Integration tests pass with new schema
- [ ] CLAUDE.md updated with new proto messages
- [ ] README updated with new optional dependencies
---
## Dependencies
- None (this is the foundation sprint)
## Blocks
- All other sprints depend on Sprint 0
## Post-Sprint
- Monitor for proto compatibility issues
- Consider proto versioning strategy for future breaking changes
- Document migration rollback procedures

View File

@@ -0,0 +1,548 @@
# Sprint 1: AI Templates Pass-Through
> **Priority**: 1 | **Owner**: Both (Backend + Frontend) | **Complexity**: Low
---
## Objective
Enable user-configured summarization style preferences (tone, format, verbosity) to flow from frontend settings through gRPC to the LLM prompt builder.
---
## Current State Analysis
### What Exists
| Component | Location | Status |
|-----------|----------|--------|
| Frontend UI | `client/src/pages/Settings.tsx` | AI template controls saved to local preferences |
| gRPC Proto | `src/noteflow/grpc/proto/noteflow.proto:291` | `GenerateSummaryRequest` lacks options field |
| Summarization Service | `src/noteflow/application/services/summarization_service.py:167` | `summarize()` has no template params |
| Prompt Builder | `src/noteflow/infrastructure/summarization/_parsing.py` | Only `build_transcript_prompt()`, no style builder |
### Gap
User preferences in Settings are never transmitted to the backend. `GenerateSummaryRequest` only contains:
```protobuf
message GenerateSummaryRequest {
string meeting_id = 1;
bool force_regenerate = 2;
// Missing: SummarizationOptions options = 3;
}
```
---
## Target/Affected Code
### Files to Modify
| File | Change Type | Lines Est. |
|------|-------------|------------|
| `src/noteflow/grpc/proto/noteflow.proto` | Add message + field | +15 |
| `src/noteflow/infrastructure/summarization/_parsing.py` | Add `build_template_prompt()` | +40 |
| `src/noteflow/application/services/summarization_service.py` | Accept options param | +10 |
| `src/noteflow/grpc/_mixins/summarization.py` | Extract and pass options | +15 |
| `client/src-tauri/src/commands/summary.rs` | Accept template params | +20 |
| `client/src/api/tauri-adapter.ts` | Read prefs, pass to command | +15 |
### Files to Create
None - all changes are modifications to existing files.
---
## Implementation Tasks
### Task 1: Proto Update
**File**: `src/noteflow/grpc/proto/noteflow.proto`
```protobuf
// Add after line 288 (before GenerateSummaryRequest)
message SummarizationOptions {
// Tone: professional, casual, technical, friendly
string tone = 1;
// Format: bullet_points, narrative, structured, concise
string format = 2;
// Verbosity: minimal, balanced, detailed, comprehensive
string verbosity = 3;
}
// Modify existing GenerateSummaryRequest (line 291)
message GenerateSummaryRequest {
string meeting_id = 1;
bool force_regenerate = 2;
SummarizationOptions options = 3; // NEW
}
```
**Post-change**: Regenerate proto stubs:
```bash
python -m grpc_tools.protoc -I src/noteflow/grpc/proto \
--python_out=src/noteflow/grpc/proto \
--grpc_python_out=src/noteflow/grpc/proto \
--pyi_out=src/noteflow/grpc/proto \
src/noteflow/grpc/proto/noteflow.proto
```
---
### Task 2: Template Prompt Builder
**File**: `src/noteflow/infrastructure/summarization/_parsing.py`
**Insert after** `SYSTEM_PROMPT` constant:
```python
from noteflow.grpc.proto import noteflow_pb2
_TONE_INSTRUCTIONS: dict[str, str] = {
"professional": "Use formal, business-appropriate language.",
"casual": "Use conversational, approachable language.",
"technical": "Use precise technical terminology.",
"friendly": "Use warm, personable language.",
}
_FORMAT_INSTRUCTIONS: dict[str, str] = {
"bullet_points": "Present information in bullet points.",
"narrative": "Write in flowing paragraphs.",
"structured": "Use headers and organized sections.",
"concise": "Be extremely brief and to the point.",
}
_VERBOSITY_INSTRUCTIONS: dict[str, str] = {
"minimal": "Provide only essential information.",
"balanced": "Include moderate detail.",
"detailed": "Include comprehensive information.",
"comprehensive": "Include all relevant details and context.",
}
def build_template_prompt(
options: noteflow_pb2.SummarizationOptions | None,
) -> str:
"""Build prompt prefix based on user template preferences.
Args:
options: User's summarization style preferences.
Returns:
Style instruction string to prepend to system prompt.
"""
if not options:
return ""
parts: list[str] = []
if options.tone and options.tone in _TONE_INSTRUCTIONS:
parts.append(_TONE_INSTRUCTIONS[options.tone])
if options.format and options.format in _FORMAT_INSTRUCTIONS:
parts.append(_FORMAT_INSTRUCTIONS[options.format])
if options.verbosity and options.verbosity in _VERBOSITY_INSTRUCTIONS:
parts.append(_VERBOSITY_INSTRUCTIONS[options.verbosity])
return " ".join(parts)
```
---
### Task 3: Service Update
**File**: `src/noteflow/application/services/summarization_service.py`
**Modify** `summarize()` signature (line 167):
```python
async def summarize(
self,
meeting_id: MeetingId,
segments: Sequence[Segment],
mode: SummarizationMode | None = None,
max_key_points: int | None = None,
max_action_items: int | None = None,
style_prompt: str | None = None, # NEW PARAMETER
) -> SummarizationServiceResult:
```
**Update** request building (around line 205):
```python
request = SummarizationRequest(
meeting_id=meeting_id,
segments=segments,
max_key_points=max_key_points or self.settings.max_key_points,
max_action_items=max_action_items or self.settings.max_action_items,
style_prompt=style_prompt, # NEW FIELD
)
```
**Note**: Also update `SummarizationRequest` dataclass in domain to include `style_prompt`.
---
### Task 4: gRPC Mixin Update
**File**: `src/noteflow/grpc/_mixins/summarization.py`
**Modify** `GenerateSummary` method:
```python
async def GenerateSummary(
self: ServicerHost,
request: noteflow_pb2.GenerateSummaryRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.Summary:
"""Generate AI summary for meeting."""
from noteflow.infrastructure.summarization._parsing import build_template_prompt
meeting_id = self._parse_meeting_id(request.meeting_id)
# Build style prompt from options
style_prompt = build_template_prompt(request.options) if request.options else None
# ... existing meeting fetch logic ...
result = await self._summarization_service.summarize(
meeting_id=meeting_id,
segments=meeting.segments,
style_prompt=style_prompt, # Pass style prompt
)
```
---
### Task 5: Rust Command Update
**File**: `client/src-tauri/src/commands/summary.rs`
```rust
#[derive(Debug, Serialize, Deserialize)]
pub struct SummarizationOptions {
pub tone: Option<String>,
pub format: Option<String>,
pub verbosity: Option<String>,
}
#[tauri::command]
pub async fn generate_summary(
meeting_id: String,
force_regenerate: Option<bool>,
options: Option<SummarizationOptions>, // NEW
state: State<'_, AppState>,
) -> Result<Summary, String> {
let client = state.grpc_client.lock().await;
let proto_options = options.map(|o| proto::SummarizationOptions {
tone: o.tone.unwrap_or_default(),
format: o.format.unwrap_or_default(),
verbosity: o.verbosity.unwrap_or_default(),
});
let request = proto::GenerateSummaryRequest {
meeting_id,
force_regenerate: force_regenerate.unwrap_or(false),
options: proto_options,
};
// ... rest of gRPC call
}
```
---
### Task 6: TypeScript Adapter Update
**File**: `client/src/api/tauri-adapter.ts`
```typescript
interface SummarizationOptions {
tone?: 'professional' | 'casual' | 'technical' | 'friendly';
format?: 'bullet_points' | 'narrative' | 'structured' | 'concise';
verbosity?: 'minimal' | 'balanced' | 'detailed' | 'comprehensive';
}
async generateSummary(
meetingId: string,
forceRegenerate?: boolean,
): Promise<Summary> {
// Read from local preferences
const prefs = await this.getPreferences();
const template = prefs.ai_template;
const options: SummarizationOptions | undefined = template ? {
tone: template.tone,
format: template.format,
verbosity: template.verbosity,
} : undefined;
return invoke(Commands.GENERATE_SUMMARY, {
meetingId,
forceRegenerate,
options,
});
}
```
---
## Code Segments to Reuse
### Existing Prompt Building
**Location**: `src/noteflow/infrastructure/summarization/_parsing.py:20-80`
```python
SYSTEM_PROMPT = """You are an expert meeting analyst..."""
def build_transcript_prompt(segments: Sequence[Segment], ...) -> str:
"""Build transcript with segment markers."""
```
Use this pattern for `build_template_prompt()`.
### Existing Service Pattern
**Location**: `src/noteflow/application/services/summarization_service.py:167-249`
The `summarize()` method shows how to:
- Accept optional parameters with defaults
- Pass through to providers
- Handle verification and persistence
### Rust Command Pattern
**Location**: `client/src-tauri/src/commands/meeting.rs`
Follow the pattern for:
- Deriving `Serialize`, `Deserialize` on structs
- Using `Option<T>` for optional command params
- Converting to proto types
---
## Acceptance Criteria
### Functional
- [ ] User can select tone (professional/casual/technical/friendly) in Settings
- [ ] User can select format (bullet_points/narrative/structured/concise) in Settings
- [ ] User can select verbosity (minimal/balanced/detailed/comprehensive) in Settings
- [ ] When generating summary, selected options affect the output style
- [ ] Default behavior (no options) produces same result as before
### Technical
- [ ] Proto regenerated and compiles cleanly
- [ ] No breaking changes to existing clients (options field is optional)
- [ ] Style prompt logged at DEBUG level for troubleshooting
- [ ] Unit tests cover all tone/format/verbosity combinations
### Quality Gates
- [ ] `pytest tests/quality/` passes
- [ ] `ruff check src/noteflow` clean
- [ ] `mypy src/noteflow` clean
- [ ] `npm run test:quality` passes (client)
- [ ] `cargo clippy` clean (Rust)
---
## Test Plan
### Unit Tests
**File**: `tests/infrastructure/summarization/test_parsing.py`
```python
import pytest
from noteflow.grpc.proto import noteflow_pb2
from noteflow.infrastructure.summarization._parsing import build_template_prompt
@pytest.mark.parametrize(
"tone,expected_fragment",
[
("professional", "formal, business-appropriate"),
("casual", "conversational, approachable"),
("technical", "precise technical terminology"),
("friendly", "warm, personable"),
],
)
def test_build_template_prompt_tone(tone: str, expected_fragment: str) -> None:
"""Template prompt includes correct tone instruction."""
options = noteflow_pb2.SummarizationOptions(tone=tone)
result = build_template_prompt(options)
assert expected_fragment in result
def test_build_template_prompt_combines_all_options() -> None:
"""Template prompt combines tone, format, and verbosity."""
options = noteflow_pb2.SummarizationOptions(
tone="professional",
format="bullet_points",
verbosity="detailed",
)
result = build_template_prompt(options)
assert "formal" in result
assert "bullet points" in result
assert "comprehensive" in result.lower() or "detailed" in result.lower()
def test_build_template_prompt_none_returns_empty() -> None:
"""No options returns empty string."""
result = build_template_prompt(None)
assert result == ""
def test_build_template_prompt_unknown_values_ignored() -> None:
"""Unknown option values are safely ignored."""
options = noteflow_pb2.SummarizationOptions(
tone="unknown_tone",
format="unknown_format",
)
result = build_template_prompt(options)
assert result == ""
```
### Integration Tests
**File**: `tests/integration/test_summarization_templates.py`
```python
@pytest.mark.integration
async def test_generate_summary_with_professional_tone(
grpc_client: NoteFlowClient,
meeting_with_segments: Meeting,
) -> None:
"""Summary generation respects professional tone setting."""
options = noteflow_pb2.SummarizationOptions(tone="professional")
summary = await grpc_client.generate_summary(
meeting_id=str(meeting_with_segments.id),
options=options,
)
# Verify summary was generated (content verification is model-dependent)
assert summary.executive_summary
assert summary.key_points
```
### Frontend Tests
**File**: `client/src/api/tauri-adapter.test.ts`
```typescript
describe('generateSummary', () => {
it('should pass template options from preferences', async () => {
// Mock preferences with AI template
mockPreferences.ai_template = {
tone: 'professional',
format: 'bullet_points',
verbosity: 'detailed',
};
await adapter.generateSummary('meeting-123');
expect(invoke).toHaveBeenCalledWith(
Commands.GENERATE_SUMMARY,
expect.objectContaining({
options: {
tone: 'professional',
format: 'bullet_points',
verbosity: 'detailed',
},
})
);
});
});
```
---
## Rollback Plan
If issues arise:
1. **Proto rollback**: Remove `options` field (clients ignore unknown fields)
2. **Backend**: `build_template_prompt()` returns empty string if options invalid
3. **Frontend**: Gracefully handle missing options in existing summaries
---
## Frontend/Backend Sync Protocol
### Architecture Decision: Per-Request Transmission
Preferences are stored **only on the frontend** (local storage) and transmitted **per-request** via gRPC. The backend is stateless regarding user preferences.
```
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ Local Storage │────────▶│ Tauri Command │────────▶│ gRPC Request │
│ (preferences) │ read │ (summary.rs) │ proto │ (options field)│
└─────────────────┘ └─────────────────┘ └─────────────────┘
```
### Why Per-Request (Not Persisted on Backend)
| Approach | Pros | Cons |
|----------|------|------|
| **Per-request (chosen)** | No sync conflicts; works offline; privacy-preserving | Slightly larger request payloads |
| Backend-persisted | Single source of truth | Sync complexity; requires user accounts; offline failures |
### Failure Handling
| Scenario | Behavior |
|----------|----------|
| Backend unreachable | Summary generation fails (as expected); preferences remain in local storage |
| Invalid preference value | Backend ignores unknown values; uses default behavior |
| Missing preferences | `options` field omitted; backend uses default prompts |
| Corrupted local storage | `getPreferences()` returns defaults; user re-configures in Settings |
### Implementation Notes
1. **No caching on backend**: Each `GenerateSummary` call reads `options` fresh from the request
2. **No version conflicts**: Frontend preferences are authoritative; no bidirectional sync
3. **Offline-first**: Preferences are always available locally; only summary generation requires connectivity
4. **Migration path**: If backend persistence is needed later, add `UserPreferences` table and sync endpoint
### TypeScript Preference Loading
```typescript
// client/src/api/tauri-adapter.ts
private async getPreferences(): Promise<UserPreferences> {
try {
const stored = localStorage.getItem('noteflow_preferences');
return stored ? JSON.parse(stored) : DEFAULT_PREFERENCES;
} catch {
// Corrupted storage: reset to defaults
localStorage.removeItem('noteflow_preferences');
return DEFAULT_PREFERENCES;
}
}
```
---
## Dependencies
- None (standalone feature)
## Blocks
- None
## Post-Sprint
- Update CLAUDE.md with new proto message
- Consider adding template presets ("Meeting Notes", "Executive Brief")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,778 @@
# Sprint 3: PDF Export
> **Priority**: 3 | **Owner**: Backend | **Complexity**: Low-Medium
---
## Objective
Add PDF export capability to complement existing Markdown and HTML exports. Users expect to export transcripts as PDF for sharing and archival.
---
## Current State Analysis
### What Exists
| Component | Location | Status |
|-----------|----------|--------|
| Export Protocol | `src/noteflow/infrastructure/export/protocols.py` | `TranscriptExporter` interface |
| Markdown Exporter | `src/noteflow/infrastructure/export/markdown.py` | Working |
| HTML Exporter | `src/noteflow/infrastructure/export/html.py` | Working |
| Formatting Utils | `src/noteflow/infrastructure/export/_formatting.py` | `format_timestamp()`, `format_datetime()` |
| gRPC Mixin | `src/noteflow/grpc/_mixins/export.py` | `ExportTranscript` RPC |
| Proto Enum | `noteflow.proto:420` | `ExportFormat` (MARKDOWN, HTML only) |
### Gap
No PDF exporter exists. The `ExportFormat` proto enum lacks `EXPORT_FORMAT_PDF`.
---
## Target/Affected Code
### Files to Create
| File | Purpose | Lines Est. |
|------|---------|------------|
| `src/noteflow/infrastructure/export/pdf.py` | PDF exporter class | ~100 |
| `tests/infrastructure/export/test_pdf.py` | Unit tests | ~80 |
### Files to Modify
| File | Change Type | Lines Est. |
|------|-------------|------------|
| `src/noteflow/grpc/proto/noteflow.proto` | Add PDF enum value | +1 |
| `src/noteflow/infrastructure/export/__init__.py` | Export `PdfExporter` | +2 |
| `src/noteflow/grpc/_mixins/export.py` | Handle PDF format | +15 |
| `pyproject.toml` | Add weasyprint dependency | +1 |
| `client/src-tauri/src/commands/export.rs` | Handle PDF format | +5 |
| `client/src/pages/MeetingDetail.tsx` | Add PDF button | +5 |
---
## Implementation Tasks
### Task 1: Add Dependency
**File**: `pyproject.toml`
Add to dependencies:
```toml
dependencies = [
# ... existing ...
"weasyprint>=62.0",
]
```
**Note**: weasyprint requires system dependencies (cairo, pango, gdk-pixbuf). Document in README.
System packages (Ubuntu/Debian):
```bash
apt-get install libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0
```
System packages (macOS):
```bash
brew install pango cairo gdk-pixbuf
```
---
### Task 2: Proto Update
**File**: `src/noteflow/grpc/proto/noteflow.proto`
Modify `ExportFormat` enum (around line 420):
```protobuf
enum ExportFormat {
EXPORT_FORMAT_UNSPECIFIED = 0;
EXPORT_FORMAT_MARKDOWN = 1;
EXPORT_FORMAT_HTML = 2;
EXPORT_FORMAT_PDF = 3; // NEW
}
```
Regenerate stubs after change.
---
### Task 3: Create PDF Exporter
**File**: `src/noteflow/infrastructure/export/pdf.py`
```python
"""PDF transcript exporter using weasyprint."""
from __future__ import annotations
from typing import TYPE_CHECKING
from weasyprint import HTML
from noteflow.infrastructure.export._formatting import (
format_datetime,
format_timestamp,
)
from noteflow.infrastructure.export.protocols import TranscriptExporter
if TYPE_CHECKING:
from noteflow.domain.entities.meeting import Meeting
# PDF-optimized CSS
_PDF_CSS = """
@page {
size: A4;
margin: 2cm;
}
body {
font-family: 'Helvetica Neue', Arial, sans-serif;
font-size: 11pt;
line-height: 1.6;
color: #333;
}
h1 {
color: #1a1a1a;
border-bottom: 2px solid #333;
padding-bottom: 8px;
margin-bottom: 16px;
}
h2 {
color: #444;
margin-top: 24px;
margin-bottom: 12px;
}
.metadata {
color: #666;
font-size: 10pt;
margin-bottom: 20px;
padding-bottom: 10px;
border-bottom: 1px solid #ddd;
}
.summary {
background-color: #f8f9fa;
padding: 16px;
border-radius: 4px;
margin-bottom: 24px;
page-break-inside: avoid;
}
.summary h2 {
color: #2563eb;
margin-top: 0;
}
.key-points {
margin: 12px 0;
}
.key-points li {
margin-bottom: 8px;
}
.action-item {
background-color: #fef3c7;
padding: 8px 12px;
margin: 8px 0;
border-left: 3px solid #f59e0b;
page-break-inside: avoid;
}
.segment {
margin: 12px 0;
padding: 8px 0;
border-bottom: 1px solid #eee;
page-break-inside: avoid;
}
.speaker {
font-weight: bold;
color: #2563eb;
}
.timestamp {
color: #888;
font-size: 9pt;
margin-left: 8px;
}
.text {
margin-top: 4px;
}
"""
class PdfExporter(TranscriptExporter):
"""Export transcripts to PDF format."""
def export(self, meeting: Meeting) -> bytes:
"""Export meeting transcript to PDF bytes.
Args:
meeting: Meeting entity with segments and optional summary.
Returns:
PDF document as bytes.
"""
html_content = self._build_html(meeting)
pdf_bytes: bytes = HTML(string=html_content).write_pdf()
return pdf_bytes
def _build_html(self, meeting: Meeting) -> str:
"""Build HTML content for PDF rendering."""
title = meeting.title or f"Meeting {meeting.id}"
date = format_datetime(meeting.created_at) if meeting.created_at else "Unknown"
duration = (
format_timestamp(meeting.duration_seconds)
if meeting.duration_seconds
else "Unknown"
)
# Build segments HTML
segments_html = self._build_segments_html(meeting)
# Build summary HTML
summary_html = self._build_summary_html(meeting) if meeting.summary else ""
return f"""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{self._escape(title)}</title>
<style>{_PDF_CSS}</style>
</head>
<body>
<h1>{self._escape(title)}</h1>
<div class="metadata">
<strong>Date:</strong> {date} |
<strong>Duration:</strong> {duration} |
<strong>Segments:</strong> {len(meeting.segments)}
</div>
{summary_html}
<h2>Transcript</h2>
{segments_html}
</body>
</html>"""
def _build_segments_html(self, meeting: Meeting) -> str:
"""Build HTML for transcript segments."""
parts: list[str] = []
for segment in meeting.segments:
speaker = self._escape(segment.speaker_id or "Unknown")
timestamp = format_timestamp(segment.start_time)
text = self._escape(segment.text)
parts.append(f"""
<div class="segment">
<span class="speaker">{speaker}</span>
<span class="timestamp">[{timestamp}]</span>
<div class="text">{text}</div>
</div>""")
return "\n".join(parts)
def _build_summary_html(self, meeting: Meeting) -> str:
"""Build HTML for meeting summary."""
summary = meeting.summary
if not summary:
return ""
# Executive summary
exec_summary = self._escape(summary.executive_summary)
# Key points
key_points_html = ""
if summary.key_points:
items = "\n".join(
f"<li>{self._escape(kp.text)}</li>"
for kp in summary.key_points
)
key_points_html = f"""
<h3>Key Points</h3>
<ul class="key-points">
{items}
</ul>"""
# Action items
action_items_html = ""
if summary.action_items:
items = "\n".join(
f'<div class="action-item">{self._escape(ai.text)}</div>'
for ai in summary.action_items
)
action_items_html = f"""
<h3>Action Items</h3>
{items}"""
return f"""
<div class="summary">
<h2>Summary</h2>
<p>{exec_summary}</p>
{key_points_html}
{action_items_html}
</div>"""
@staticmethod
def _escape(text: str) -> str:
"""Escape HTML special characters."""
return (
text.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace('"', "&quot;")
.replace("'", "&#39;")
)
```
---
### Task 4: Register Exporter
**File**: `src/noteflow/infrastructure/export/__init__.py`
```python
"""Export infrastructure module."""
from noteflow.infrastructure.export.html import HtmlExporter
from noteflow.infrastructure.export.markdown import MarkdownExporter
from noteflow.infrastructure.export.pdf import PdfExporter
from noteflow.infrastructure.export.protocols import TranscriptExporter
__all__ = [
"HtmlExporter",
"MarkdownExporter",
"PdfExporter",
"TranscriptExporter",
]
```
---
### Task 5: Update gRPC Mixin
**File**: `src/noteflow/grpc/_mixins/export.py`
Modify `ExportTranscript` to handle PDF:
```python
from noteflow.infrastructure.export import (
HtmlExporter,
MarkdownExporter,
PdfExporter,
)
from noteflow.grpc.proto import noteflow_pb2
# Exporter registry
_EXPORTERS = {
noteflow_pb2.EXPORT_FORMAT_MARKDOWN: (MarkdownExporter, "markdown", ".md"),
noteflow_pb2.EXPORT_FORMAT_HTML: (HtmlExporter, "html", ".html"),
noteflow_pb2.EXPORT_FORMAT_PDF: (PdfExporter, "pdf", ".pdf"),
}
class ExportMixin:
"""Mixin for export RPC methods."""
async def ExportTranscript(
self: ServicerHost,
request: noteflow_pb2.ExportTranscriptRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.ExportTranscriptResponse:
"""Export meeting transcript to specified format."""
meeting_id = self._parse_meeting_id(request.meeting_id)
# Get exporter
exporter_info = _EXPORTERS.get(request.format)
if not exporter_info:
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
context.set_details(f"Unsupported format: {request.format}")
return noteflow_pb2.ExportTranscriptResponse()
exporter_class, format_name, extension = exporter_info
# Fetch meeting
async with self._create_repository_provider() as provider:
meeting = await provider.meetings.get(meeting_id)
if not meeting:
context.set_code(grpc.StatusCode.NOT_FOUND)
context.set_details(f"Meeting {meeting_id} not found")
return noteflow_pb2.ExportTranscriptResponse()
# Export
exporter = exporter_class()
result = exporter.export(meeting)
# Handle bytes vs string
if isinstance(result, bytes):
# PDF returns bytes - base64 encode for transport
import base64
content = base64.b64encode(result).decode("ascii")
else:
content = result
return noteflow_pb2.ExportTranscriptResponse(
content=content,
format_name=format_name,
file_extension=extension,
)
```
**Note**: For PDF, content is base64-encoded. Frontend must decode.
---
### Task 6: Frontend Updates
**File**: `client/src-tauri/src/commands/export.rs`
```rust
#[tauri::command]
pub async fn export_transcript(
meeting_id: String,
format: String,
state: State<'_, AppState>,
) -> Result<ExportResult, String> {
let proto_format = match format.as_str() {
"markdown" => proto::ExportFormat::Markdown,
"html" => proto::ExportFormat::Html,
"pdf" => proto::ExportFormat::Pdf, // NEW
_ => return Err(format!("Invalid format: {}", format)),
};
// ... gRPC call ...
// For PDF, decode base64 before saving
let content = if format == "pdf" {
// Content is base64-encoded bytes
response.content
} else {
response.content
};
Ok(ExportResult {
content,
format_name: response.format_name,
file_extension: response.file_extension,
})
}
```
**File**: `client/src/pages/MeetingDetail.tsx`
Add PDF export button alongside existing exports:
```tsx
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button variant="outline">
<Download className="w-4 h-4 mr-2" />
Export
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent>
<DropdownMenuItem onClick={() => handleExport('markdown')}>
Markdown (.md)
</DropdownMenuItem>
<DropdownMenuItem onClick={() => handleExport('html')}>
HTML (.html)
</DropdownMenuItem>
<DropdownMenuItem onClick={() => handleExport('pdf')}>
PDF (.pdf)
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
```
---
## Code Segments to Reuse
### Existing Formatting Utilities
**Location**: `src/noteflow/infrastructure/export/_formatting.py`
```python
def format_timestamp(seconds: float) -> str:
"""Format seconds as MM:SS or HH:MM:SS."""
def format_datetime(dt: datetime) -> str:
"""Format datetime for display."""
```
### Existing HTML Exporter Pattern
**Location**: `src/noteflow/infrastructure/export/html.py`
Follow the same structure:
- `export()` method returning string
- `_build_*` helper methods
- CSS embedded in output
### Existing Exporter Protocol
**Location**: `src/noteflow/infrastructure/export/protocols.py`
```python
class TranscriptExporter(Protocol):
"""Protocol for transcript exporters."""
def export(self, meeting: Meeting) -> str:
"""Export meeting to string format."""
...
```
**Note**: PDF returns `bytes`, not `str`. Either:
1. Update protocol to `str | bytes`
2. Create separate `BinaryExporter` protocol
---
## Acceptance Criteria
### Functional
- [ ] Export dropdown includes PDF option
- [ ] Clicking PDF export downloads valid PDF file
- [ ] PDF contains title, date, duration, segment count
- [ ] PDF contains all transcript segments with speakers/timestamps
- [ ] PDF contains summary (if present) with key points and action items
- [ ] PDF renders cleanly on A4 paper
### Technical
- [ ] PDF generation uses weasyprint (not reportlab)
- [ ] Content properly HTML-escaped to prevent injection
- [ ] Base64 encoding/decoding works correctly
- [ ] Error handling for missing weasyprint
### Quality Gates
- [ ] `pytest tests/quality/` passes
- [ ] Module size < 200 lines
- [ ] All functions documented
- [ ] No hardcoded strings (use constants)
---
## Test Plan
### Unit Tests
**File**: `tests/infrastructure/export/test_pdf.py`
```python
import pytest
from unittest.mock import MagicMock
from datetime import datetime, UTC
from uuid import uuid4
from noteflow.domain.entities.meeting import Meeting, MeetingId, MeetingState
from noteflow.domain.entities.segment import Segment
from noteflow.domain.entities.summary import Summary, KeyPoint, ActionItem
from noteflow.infrastructure.export.pdf import PdfExporter
@pytest.fixture
def meeting_with_segments() -> Meeting:
"""Create meeting with segments for testing."""
return Meeting(
id=MeetingId(uuid4()),
title="Test Meeting",
state=MeetingState.COMPLETED,
created_at=datetime.now(UTC),
duration_seconds=3600.0,
segments=[
Segment(
segment_id=1,
text="Hello, welcome to the meeting.",
start_time=0.0,
end_time=5.0,
speaker_id="Alice",
),
Segment(
segment_id=2,
text="Thank you for joining.",
start_time=5.0,
end_time=10.0,
speaker_id="Bob",
),
],
)
@pytest.fixture
def meeting_with_summary(meeting_with_segments: Meeting) -> Meeting:
"""Add summary to meeting."""
meeting_with_segments.summary = Summary(
meeting_id=meeting_with_segments.id,
executive_summary="This was a productive meeting.",
key_points=[
KeyPoint(text="Discussed project timeline", segment_ids=[1]),
],
action_items=[
ActionItem(text="Follow up with client", assignee="Alice", segment_ids=[2]),
],
generated_at=datetime.now(UTC),
)
return meeting_with_segments
def test_export_returns_bytes(meeting_with_segments: Meeting) -> None:
"""PDF export returns bytes."""
exporter = PdfExporter()
result = exporter.export(meeting_with_segments)
assert isinstance(result, bytes)
assert len(result) > 0
def test_export_is_valid_pdf(meeting_with_segments: Meeting) -> None:
"""PDF export produces valid PDF file."""
exporter = PdfExporter()
result = exporter.export(meeting_with_segments)
# PDF files start with %PDF-
assert result.startswith(b"%PDF-")
def test_export_includes_title(meeting_with_segments: Meeting) -> None:
"""PDF contains meeting title."""
exporter = PdfExporter()
# Check HTML content (before PDF conversion)
html = exporter._build_html(meeting_with_segments)
assert "Test Meeting" in html
def test_export_includes_segments(meeting_with_segments: Meeting) -> None:
"""PDF contains all segments."""
exporter = PdfExporter()
html = exporter._build_html(meeting_with_segments)
assert "Hello, welcome" in html
assert "Thank you for joining" in html
assert "Alice" in html
assert "Bob" in html
def test_export_includes_summary(meeting_with_summary: Meeting) -> None:
"""PDF contains summary when present."""
exporter = PdfExporter()
html = exporter._build_html(meeting_with_summary)
assert "productive meeting" in html
assert "project timeline" in html
assert "Follow up with client" in html
def test_export_escapes_html_characters(meeting_with_segments: Meeting) -> None:
"""PDF properly escapes HTML special characters."""
meeting_with_segments.segments[0].text = "<script>alert('xss')</script>"
exporter = PdfExporter()
html = exporter._build_html(meeting_with_segments)
assert "<script>" not in html
assert "&lt;script&gt;" in html
def test_export_handles_empty_meeting() -> None:
"""PDF export handles meeting with no segments."""
meeting = Meeting(
id=MeetingId(uuid4()),
title="Empty Meeting",
state=MeetingState.COMPLETED,
segments=[],
)
exporter = PdfExporter()
result = exporter.export(meeting)
assert isinstance(result, bytes)
assert result.startswith(b"%PDF-")
```
### Integration Tests
**File**: `tests/integration/test_export_pdf.py`
```python
@pytest.mark.integration
async def test_export_pdf_via_grpc(
grpc_client: NoteFlowClient,
meeting_with_segments: Meeting,
) -> None:
"""Export PDF via gRPC."""
import base64
response = await grpc_client.export_transcript(
meeting_id=str(meeting_with_segments.id),
format=ExportFormat.PDF,
)
assert response.format_name == "pdf"
assert response.file_extension == ".pdf"
# Decode base64 content
pdf_bytes = base64.b64decode(response.content)
assert pdf_bytes.startswith(b"%PDF-")
```
---
## Dependencies
- **weasyprint**: PDF generation library
- **System packages**: cairo, pango (documented in README)
## Blocks
- None (can proceed independently)
## Performance Note: Base64 Encoding Overhead
The current implementation base64-encodes PDF bytes for gRPC transport, which inflates payload size by ~33%.
**Alternative for large PDFs** (future enhancement):
For transcripts > 1MB, consider streaming the PDF file directly:
```python
# Alternative: Stream PDF to temp file, return file path
async def ExportTranscriptToFile(
self: ServicerHost,
request: noteflow_pb2.ExportTranscriptRequest,
context: grpc.aio.ServicerContext,
) -> noteflow_pb2.ExportTranscriptFileResponse:
"""Export to file and return path (for large exports)."""
# ... generate PDF ...
temp_path = Path(tempfile.gettempdir()) / f"export_{meeting_id}.pdf"
temp_path.write_bytes(pdf_bytes)
return noteflow_pb2.ExportTranscriptFileResponse(
file_path=str(temp_path),
format_name="pdf",
)
```
This avoids base64 overhead for large files while maintaining the current API for typical exports.
---
## Post-Sprint
- Add PDF settings (page size, margins)
- Consider async PDF generation for large transcripts
- Add print-optimized CSS media query

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,93 +0,0 @@
# Uncommitted Changes Review (2025-12-21)
## Scope
- Reviewed uncommitted changes in Tauri playback/audio, annotations UI/commands, and preferences UI/commands.
## Resolution Status (Session 3 - 2025-12-21)
| # | Issue | Status |
|---|-------|--------|
| 1 | Playback position tracking stops after pause/resume | ✅ FIXED |
| 2 | Highlight state sticks on gaps/after seek | ✅ Already correct |
| 3 | Hard-coded 16k sample rate | ✅ Already correct |
| 4 | Sample rate validation | ✅ Already correct |
| 5 | Selecting meeting doesn't stop playback | ✅ Already correct |
| 6 | Audio device IDs unstable | ✅ FIXED |
| 7 | Preferences in-memory only | ✅ FIXED |
| 8 | Annotation UI wired to stubs | ✅ FIXED |
**Fixes Applied:**
- `playback.rs`: Position tracker now respawns on resume, accumulates samples in state
- `devices.rs`: Device IDs now use stable hash of device name
- `preferences.rs`: Preferences persist to JSON file on disk
- `preferences.rs`: API keys stored securely in system keychain
- `grpc/client.rs`: Annotation methods now make actual gRPC calls via `NoteFlowServiceClient`
---
## Findings & Recommendations
### 1) Playback position tracking stops after pause/resume (High) ✅ FIXED
Observation: `spawn_position_tracker` exits when `playing_flag` flips false, but `resume_playback` never restarts it, so position/highlight updates stop after the first pause. `pause` flips the flag to false. Evidence: `client/src-tauri/src/commands/playback.rs:146`, `client/src-tauri/src/commands/playback.rs:166`, `client/src-tauri/src/audio/playback.rs:73`.
Example: user pauses at 00:30, resumes, audio plays but playback position and highlights stop updating.
Recommendation: keep a single tracker thread alive and gate it on `playback_state`, or re-spawn the tracker inside `resume_playback` when resuming. Also consider syncing position from the playback sink instead of only time math. Evidence: `client/src-tauri/src/commands/playback.rs:146`.
### 2) Highlight state can stick on gaps or after seek (Medium) ✅ Already correct
Observation: `seek` emits `HIGHLIGHT_CHANGE` only when a segment is found, and the tracker only emits when entering a segment, never clearing on gaps. Evidence: `client/src-tauri/src/commands/playback.rs:83`, `client/src-tauri/src/commands/playback.rs:86`, `client/src-tauri/src/commands/playback.rs:183`.
Example: seek into silence or between segments and the previous segment remains highlighted indefinitely.
Recommendation: emit `HIGHLIGHT_CHANGE` with `null` when `find_segment_at_position` returns `None`, and clear when leaving a segment in the tracker loop. Evidence: `client/src-tauri/src/commands/playback.rs:83`.
### 3) Hard-coded 16k sample rate ignores actual file sample rate (High) ✅ Already correct
Observation: `load_audio_file` reads the sample rate, but `select_meeting` ignores it and playback uses `DEFAULT_SAMPLE_RATE` for both audio and position tracking. Evidence: `client/src-tauri/src/audio/loader.rs:40`, `client/src-tauri/src/commands/meeting.rs:147`, `client/src-tauri/src/commands/playback.rs:123`, `client/src-tauri/src/commands/playback.rs:160`.
Example: a 48kHz recording will play at ~3x speed and the UI highlight will drift from the audio.
Recommendation: store `sample_rate` in `AppState` when loading audio, pass it into `AudioPlayback::play_buffer`, and use it for the tracker loop. Fallback to 16k only when the value is missing. Evidence: `client/src-tauri/src/commands/meeting.rs:147`.
### 4) Missing validation for `sample_rate` can infinite-loop or divide by zero (Medium) ✅ Already correct
Observation: `samples_to_chunks` computes `chunk_samples` from `sample_rate` and loops until offset advances; if `sample_rate` is 0 (or extremely small), `chunk_samples` becomes 0 and the loop never progresses. `play_buffer` also divides by `sample_rate`. Evidence: `client/src-tauri/src/audio/loader.rs:88`, `client/src-tauri/src/audio/loader.rs:92`, `client/src-tauri/src/audio/playback.rs:61`.
Example: a corrupted audio file with `sample_rate = 0` will hang the loader or produce invalid duration math.
Recommendation: validate `sample_rate > 0` and `chunk_samples >= 1` in `load_audio_file`, returning a clear error for invalid files. Guard divisions in playback accordingly. Evidence: `client/src-tauri/src/audio/loader.rs:40`.
### 5) Selecting a meeting doesn't stop active playback; stale position when audio missing (Medium) ✅ Already correct
Observation: `select_meeting` only flips `playback_state` to `Stopped`; it never calls `AudioPlayback::stop` or clears the playback handle. When no audio is found, it clears duration but doesnt reset `playback_position`. Evidence: `client/src-tauri/src/commands/meeting.rs:90`, `client/src-tauri/src/commands/meeting.rs:128`.
Example: switching meetings mid-playback can continue the old audio; selecting a meeting with no audio leaves the previous playback position in the UI.
Recommendation: reuse the stop logic (or shared helper) to stop playback and clear highlight/position when changing meetings; explicitly reset `playback_position` in the no-audio path. Evidence: `client/src-tauri/src/commands/playback.rs:53`.
### 6) Audio device IDs are unstable across runs (Medium) ✅ FIXED
Observation: device IDs are assigned from enumeration order, and `get_default_input_device` always returns `id = 0`. Preferences store that id and later match by id. Evidence: `client/src-tauri/src/audio/devices.rs:16`, `client/src-tauri/src/audio/devices.rs:57`, `client/src-tauri/src/commands/audio.rs:22`, `client/src-tauri/src/commands/audio.rs:42`.
Example: unplugging/replugging devices changes enumeration order; the stored id may point to a different mic next launch.
Recommendation: persist a stable identifier (device name + host, or a hashed name), and resolve by that; handle duplicate names gracefully. Evidence: `client/src-tauri/src/audio/devices.rs:21`.
### 7) Preferences are in-memory only and include sensitive fields (Low/Medium) ✅ FIXED
Observation: preferences are stored in an in-memory `HashMap`; theres no persistence or secure storage, even for API keys. The UI stores and loads these values. Evidence: `client/src-tauri/src/state/app_state.rs:262`, `client/src-tauri/src/commands/preferences.rs:120`, `client/src/components/settings/SettingsPanel.tsx:167`.
Example: restarting the app loses `serverUrl`, `dataDirectory`, and `cloudApiKey`; API keys are kept in plain memory and re-exposed to the UI.
Recommendation: persist preferences to disk (config file) and store secrets in the OS keychain/credential vault; avoid returning stored secrets to the UI unless explicitly requested. Evidence: `client/src-tauri/src/commands/preferences.rs:151`.
### 8) Annotation UI wired to stubbed gRPC methods (Medium) ✅ FIXED
Observation: the new UI calls annotation add/delete, but Rust gRPC client methods were TODO/NotImplemented or returned empty lists.
**Fix Applied:** Replaced all 5 annotation stub methods in `grpc/client.rs` with actual gRPC calls:
- Added `tonic_client()` helper to create `NoteFlowServiceClient` from existing `Channel`
- Added `annotation_from_proto()` converter for proto → local type mapping
- Added `impl From<i32> for AnnotationType` in `types.rs`
- `add_annotation`, `get_annotation`, `list_annotations`, `update_annotation`, `delete_annotation` now make real server calls
- Removed dead `AnnotationInfo::new()` constructor (no longer needed)
## Suggested Tests
- Playback pause/resume keeps position/highlight updates flowing (unit/integration around playback events).
- Playback speed/duration is correct for a 48kHz `.nfaudio` fixture.
- `select_meeting` stops audio and resets position when switching meetings or when audio is missing.
- Device selection resolves the intended microphone across restarts.

9032
docs/ui.md

File diff suppressed because it is too large Load Diff

View File

@@ -12,7 +12,7 @@
"files": true,
"removeComments": true,
"removeEmptyLines": true,
"compress": false,
"compress": true,
"topFilesLength": 5,
"showLineNumbers": false,
"truncateBase64": false,
@@ -26,7 +26,7 @@
"includeLogsCount": 50
}
},
"include": ["src/"],
"include": ["src/", "client/"],
"ignore": {
"useGitignore": true,
"useDefaultPatterns": true,

View File

@@ -324,7 +324,8 @@ class MeetingService:
executive_summary: str,
key_points: list[KeyPoint] | None = None,
action_items: list[ActionItem] | None = None,
model_version: str = "",
provider_name: str = "",
model_name: str = "",
) -> Summary:
"""Save or update a meeting summary.
@@ -333,7 +334,8 @@ class MeetingService:
executive_summary: Executive summary text.
key_points: List of key points.
action_items: List of action items.
model_version: Model version that generated the summary.
provider_name: Name of the provider that generated the summary.
model_name: Name of the model that generated the summary.
Returns:
Saved summary.
@@ -344,7 +346,8 @@ class MeetingService:
key_points=key_points or [],
action_items=action_items or [],
generated_at=datetime.now(UTC),
model_version=model_version,
provider_name=provider_name,
model_name=model_name,
)
async with self._uow:

View File

@@ -21,6 +21,7 @@ class KeyPoint:
segment_ids: list[int] = field(default_factory=list)
start_time: float = 0.0
end_time: float = 0.0
position: int = 0 # Ordering within the summary
# Database primary key (set after persistence)
db_id: int | None = None
@@ -42,6 +43,9 @@ class ActionItem:
due_date: datetime | None = None
priority: int = 0 # 0=unspecified, 1=low, 2=medium, 3=high
segment_ids: list[int] = field(default_factory=list)
start_time: float = 0.0
end_time: float = 0.0
position: int = 0 # Ordering within the summary
# Database primary key (set after persistence)
db_id: int | None = None
@@ -72,7 +76,15 @@ class Summary:
key_points: list[KeyPoint] = field(default_factory=list)
action_items: list[ActionItem] = field(default_factory=list)
generated_at: datetime | None = None
model_version: str = ""
# Provider tracking
provider_name: str = ""
model_name: str = ""
tokens_used: int | None = None
latency_ms: float | None = None
# Verification/citation metadata
verification: dict[str, object] = field(default_factory=dict)
# Database primary key (set after persistence)
db_id: int | None = None
@@ -108,3 +120,13 @@ class Summary:
def unevidenced_actions(self) -> list[ActionItem]:
"""Action items without transcript evidence."""
return [ai for ai in self.action_items if not ai.has_evidence()]
@property
def model_version(self) -> str:
"""Backward-compatible model version string.
Computes from provider_name and model_name for API compatibility.
"""
if self.provider_name and self.model_name:
return f"{self.provider_name}/{self.model_name}"
return self.model_name or self.provider_name

View File

@@ -114,5 +114,6 @@ class SummarizationMixin:
executive_summary=executive,
key_points=[],
action_items=[],
model_version="placeholder-v0",
provider_name="placeholder",
model_name="v0",
)

View File

@@ -60,7 +60,9 @@ class OrmConverter:
)
@staticmethod
def word_timing_to_orm_kwargs(word: DomainWordTiming) -> dict[str, str | float]:
def word_timing_to_orm_kwargs(
word: DomainWordTiming, word_index: int
) -> dict[str, str | float | int]:
"""Convert domain WordTiming to ORM model kwargs.
Return a dict of kwargs rather than instantiating WordTimingModel directly
@@ -68,12 +70,14 @@ class OrmConverter:
Args:
word: Domain WordTiming entity.
word_index: Position of word in the segment.
Returns:
Dict with word, start_time, end_time, probability for ORM construction.
Dict with word, word_index, start_time, end_time, probability for ORM construction.
"""
return {
"word": word.word,
"word_index": word_index,
"start_time": word.start_time,
"end_time": word.end_time,
"probability": word.probability,
@@ -180,6 +184,7 @@ class OrmConverter:
segment_ids=model.segment_ids,
start_time=model.start_time,
end_time=model.end_time,
position=model.position,
db_id=model.id,
)
@@ -199,6 +204,9 @@ class OrmConverter:
due_date=model.due_date,
priority=model.priority,
segment_ids=model.segment_ids,
start_time=model.start_time,
end_time=model.end_time,
position=model.position,
db_id=model.id,
)
@@ -219,6 +227,10 @@ class OrmConverter:
key_points=[OrmConverter.key_point_to_domain(kp) for kp in model.key_points],
action_items=[OrmConverter.action_item_to_domain(ai) for ai in model.action_items],
generated_at=model.generated_at,
model_version=model.model_version or "",
provider_name=model.provider_name,
model_name=model.model_name,
tokens_used=model.tokens_used,
latency_ms=model.latency_ms,
verification=dict(model.verification),
db_id=model.id,
)

View File

@@ -1,396 +0,0 @@
"""SQLAlchemy ORM models for NoteFlow."""
from __future__ import annotations
from datetime import datetime
from typing import ClassVar
from uuid import uuid4
from pgvector.sqlalchemy import Vector
from sqlalchemy import (
DateTime,
Float,
ForeignKey,
Integer,
LargeBinary,
String,
Text,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
# Vector dimension for embeddings (OpenAI compatible)
EMBEDDING_DIM = 1536
class Base(DeclarativeBase):
"""Base class for all ORM models."""
pass
class MeetingModel(Base):
"""SQLAlchemy model for meetings table."""
__tablename__ = "meetings"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
title: Mapped[str] = mapped_column(String(255), nullable=False)
state: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
started_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
ended_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
metadata_: Mapped[dict[str, str]] = mapped_column(
"metadata",
JSONB,
nullable=False,
default=dict,
)
wrapped_dek: Mapped[bytes | None] = mapped_column(
LargeBinary,
nullable=True,
)
asset_path: Mapped[str | None] = mapped_column(
Text,
nullable=True,
)
# Relationships
segments: Mapped[list[SegmentModel]] = relationship(
"SegmentModel",
back_populates="meeting",
cascade="all, delete-orphan",
lazy="selectin",
)
summary: Mapped[SummaryModel | None] = relationship(
"SummaryModel",
back_populates="meeting",
cascade="all, delete-orphan",
uselist=False,
lazy="selectin",
)
annotations: Mapped[list[AnnotationModel]] = relationship(
"AnnotationModel",
back_populates="meeting",
cascade="all, delete-orphan",
lazy="selectin",
)
class SegmentModel(Base):
"""SQLAlchemy model for segments table."""
__tablename__ = "segments"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
meeting_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
)
segment_id: Mapped[int] = mapped_column(Integer, nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
language_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
avg_logprob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
no_speech_prob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
embedding: Mapped[list[float] | None] = mapped_column(
Vector(EMBEDDING_DIM),
nullable=True,
)
speaker_id: Mapped[str | None] = mapped_column(String(50), nullable=True)
speaker_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="segments",
)
words: Mapped[list[WordTimingModel]] = relationship(
"WordTimingModel",
back_populates="segment",
cascade="all, delete-orphan",
lazy="selectin",
)
class WordTimingModel(Base):
"""SQLAlchemy model for word_timings table."""
__tablename__ = "word_timings"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
segment_pk: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.segments.id", ondelete="CASCADE"),
nullable=False,
)
word: Mapped[str] = mapped_column(String(255), nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
probability: Mapped[float] = mapped_column(Float, nullable=False)
# Relationships
segment: Mapped[SegmentModel] = relationship(
"SegmentModel",
back_populates="words",
)
class SummaryModel(Base):
"""SQLAlchemy model for summaries table."""
__tablename__ = "summaries"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
meeting_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
unique=True,
)
executive_summary: Mapped[str | None] = mapped_column(Text, nullable=True)
generated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
model_version: Mapped[str | None] = mapped_column(String(50), nullable=True)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="summary",
)
key_points: Mapped[list[KeyPointModel]] = relationship(
"KeyPointModel",
back_populates="summary",
cascade="all, delete-orphan",
lazy="selectin",
)
action_items: Mapped[list[ActionItemModel]] = relationship(
"ActionItemModel",
back_populates="summary",
cascade="all, delete-orphan",
lazy="selectin",
)
class KeyPointModel(Base):
"""SQLAlchemy model for key_points table."""
__tablename__ = "key_points"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
summary_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
)
text: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
# Relationships
summary: Mapped[SummaryModel] = relationship(
"SummaryModel",
back_populates="key_points",
)
class ActionItemModel(Base):
"""SQLAlchemy model for action_items table."""
__tablename__ = "action_items"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
summary_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
)
text: Mapped[str] = mapped_column(Text, nullable=False)
assignee: Mapped[str] = mapped_column(String(255), nullable=False, default="")
due_date: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
# Relationships
summary: Mapped[SummaryModel] = relationship(
"SummaryModel",
back_populates="action_items",
)
class AnnotationModel(Base):
"""SQLAlchemy model for annotations table.
User-created annotations during recording. Distinct from LLM-extracted
ActionItem/KeyPoint which belong to Summary. Annotations belong directly
to Meeting and are created in real-time.
"""
__tablename__ = "annotations"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
annotation_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
nullable=False,
unique=True,
default=uuid4,
)
meeting_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
)
annotation_type: Mapped[str] = mapped_column(String(50), nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="annotations",
)
class UserPreferencesModel(Base):
"""SQLAlchemy model for user_preferences table.
Stores key-value user preferences for persistence across server restarts.
Currently used for cloud consent and other settings.
"""
__tablename__ = "user_preferences"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
key: Mapped[str] = mapped_column(String(64), unique=True, index=True, nullable=False)
value: Mapped[dict[str, object]] = mapped_column(JSONB, nullable=False, default=dict)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
class DiarizationJobModel(Base):
"""SQLAlchemy model for diarization_jobs table.
Tracks background speaker diarization jobs. Persisting job state
allows recovery after server restart and provides client visibility.
"""
__tablename__ = "diarization_jobs"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[str] = mapped_column(String(36), primary_key=True)
meeting_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
status: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
segments_updated: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
speaker_ids: Mapped[list[str]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
error_message: Mapped[str] = mapped_column(Text, nullable=False, default="")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
class StreamingDiarizationTurnModel(Base):
"""SQLAlchemy model for streaming_diarization_turns table.
Stores speaker turns from real-time streaming diarization for crash
resilience. These turns are persisted as they arrive and can be reloaded
if the server restarts during a recording session.
"""
__tablename__ = "streaming_diarization_turns"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
meeting_id: Mapped[UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
speaker: Mapped[str] = mapped_column(String(50), nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)

View File

@@ -0,0 +1,100 @@
"""SQLAlchemy ORM models for NoteFlow.
All models are re-exported here for backward compatibility with existing imports.
Models are organized into subdomain packages:
- core/: Meeting, segments, summaries, annotations, diarization
- identity/: Workspaces, users, settings
- entities/: Persons, speakers (knowledge graph entities)
- organization/: Tags, tasks
- integrations/: External service integrations, calendar
"""
from noteflow.infrastructure.persistence.models._base import (
DEFAULT_USER_ID,
DEFAULT_WORKSPACE_ID,
EMBEDDING_DIM,
Base,
)
# Core domain models
from noteflow.infrastructure.persistence.models.core import (
ActionItemModel,
AnnotationModel,
DiarizationJobModel,
KeyPointModel,
MeetingModel,
SegmentModel,
StreamingDiarizationTurnModel,
SummaryModel,
WordTimingModel,
)
# Entity models (knowledge graph)
from noteflow.infrastructure.persistence.models.entities import (
MeetingSpeakerModel,
PersonModel,
)
# Identity and tenancy models
from noteflow.infrastructure.persistence.models.identity import (
SettingsModel,
UserModel,
UserPreferencesModel,
WorkspaceMembershipModel,
WorkspaceModel,
)
# Integration models
from noteflow.infrastructure.persistence.models.integrations import (
CalendarEventModel,
ExternalRefModel,
IntegrationModel,
IntegrationSecretModel,
IntegrationSyncRunModel,
MeetingCalendarLinkModel,
)
# Organization models
from noteflow.infrastructure.persistence.models.organization import (
MeetingTagModel,
TagModel,
TaskModel,
)
__all__ = [
"DEFAULT_USER_ID",
"DEFAULT_WORKSPACE_ID",
"EMBEDDING_DIM",
# Core domain
"ActionItemModel",
"AnnotationModel",
# Base and constants
"Base",
# Integrations
"CalendarEventModel",
"DiarizationJobModel",
"ExternalRefModel",
"IntegrationModel",
"IntegrationSecretModel",
"IntegrationSyncRunModel",
"KeyPointModel",
"MeetingCalendarLinkModel",
"MeetingModel",
# Entities
"MeetingSpeakerModel",
# Organization
"MeetingTagModel",
"PersonModel",
"SegmentModel",
# Identity
"SettingsModel",
"StreamingDiarizationTurnModel",
"SummaryModel",
"TagModel",
"TaskModel",
"UserModel",
"UserPreferencesModel",
"WordTimingModel",
"WorkspaceMembershipModel",
"WorkspaceModel",
]

View File

@@ -0,0 +1,18 @@
"""Base class and shared constants for SQLAlchemy ORM models."""
from __future__ import annotations
from sqlalchemy.orm import DeclarativeBase
# Vector dimension for embeddings (OpenAI compatible)
EMBEDDING_DIM = 1536
# Default workspace/user UUID for single-user mode
DEFAULT_WORKSPACE_ID = "00000000-0000-0000-0000-000000000001"
DEFAULT_USER_ID = "00000000-0000-0000-0000-000000000001"
class Base(DeclarativeBase):
"""Base class for all ORM models."""
pass

View File

@@ -0,0 +1,29 @@
"""Core meeting domain models."""
from noteflow.infrastructure.persistence.models.core.annotation import AnnotationModel
from noteflow.infrastructure.persistence.models.core.diarization import (
DiarizationJobModel,
StreamingDiarizationTurnModel,
)
from noteflow.infrastructure.persistence.models.core.meeting import (
MeetingModel,
SegmentModel,
WordTimingModel,
)
from noteflow.infrastructure.persistence.models.core.summary import (
ActionItemModel,
KeyPointModel,
SummaryModel,
)
__all__ = [
"ActionItemModel",
"AnnotationModel",
"DiarizationJobModel",
"KeyPointModel",
"MeetingModel",
"SegmentModel",
"StreamingDiarizationTurnModel",
"SummaryModel",
"WordTimingModel",
]

View File

@@ -0,0 +1,64 @@
"""User annotation models."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from uuid import uuid4
from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
class AnnotationModel(Base):
"""Represent a user-created annotation during recording.
Distinct from LLM-extracted ActionItem/KeyPoint which belong to Summary.
Annotations belong directly to Meeting and are created in real-time.
"""
__tablename__ = "annotations"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
annotation_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
nullable=False,
unique=True,
default=uuid4,
)
meeting_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
annotation_type: Mapped[str] = mapped_column(String(50), nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="annotations",
)

View File

@@ -0,0 +1,96 @@
"""Speaker diarization models."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
class DiarizationJobModel(Base):
"""Track background speaker diarization jobs.
Persisting job state allows recovery after server restart and provides
client visibility into job progress.
"""
__tablename__ = "diarization_jobs"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[str] = mapped_column(String(36), primary_key=True)
meeting_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
status: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
segments_updated: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
speaker_ids: Mapped[list[str]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
error_message: Mapped[str] = mapped_column(Text, nullable=False, default="")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="diarization_jobs",
)
class StreamingDiarizationTurnModel(Base):
"""Store speaker turns from real-time streaming diarization.
These turns are persisted as they arrive for crash resilience
and can be reloaded if the server restarts during a recording session.
"""
__tablename__ = "streaming_diarization_turns"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
meeting_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
speaker: Mapped[str] = mapped_column(String(50), nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="streaming_turns",
)

View File

@@ -0,0 +1,244 @@
"""Core meeting domain models."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from uuid import uuid4
from pgvector.sqlalchemy import Vector
from sqlalchemy import (
DateTime,
Float,
ForeignKey,
Integer,
LargeBinary,
String,
Text,
UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import DEFAULT_USER_ID, DEFAULT_WORKSPACE_ID, EMBEDDING_DIM, Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.annotation import AnnotationModel
from noteflow.infrastructure.persistence.models.core.diarization import (
DiarizationJobModel,
StreamingDiarizationTurnModel,
)
from noteflow.infrastructure.persistence.models.core.summary import SummaryModel
from noteflow.infrastructure.persistence.models.entities.speaker import (
MeetingSpeakerModel,
)
from noteflow.infrastructure.persistence.models.identity.identity import (
UserModel,
WorkspaceModel,
)
from noteflow.infrastructure.persistence.models.integrations.integration import (
MeetingCalendarLinkModel,
)
from noteflow.infrastructure.persistence.models.organization.tagging import (
MeetingTagModel,
)
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
class MeetingModel(Base):
"""Represent a meeting recording session."""
__tablename__ = "meetings"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
# Forward-looking tenancy fields with safe defaults for current single-user mode
workspace_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.workspaces.id", ondelete="RESTRICT"),
nullable=False,
default=lambda: PyUUID(DEFAULT_WORKSPACE_ID),
)
created_by_id: Mapped[PyUUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.users.id", ondelete="SET NULL"),
nullable=True,
default=lambda: PyUUID(DEFAULT_USER_ID),
)
title: Mapped[str] = mapped_column(String(255), nullable=False)
state: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
started_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
ended_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
metadata_: Mapped[dict[str, object]] = mapped_column(
"metadata",
JSONB,
nullable=False,
default=dict,
)
wrapped_dek: Mapped[bytes | None] = mapped_column(
LargeBinary,
nullable=True,
)
asset_path: Mapped[str | None] = mapped_column(
Text,
nullable=True,
)
# Soft delete support
deleted_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
# Relationships
workspace: Mapped[WorkspaceModel] = relationship(
"WorkspaceModel",
back_populates="meetings",
)
created_by: Mapped[UserModel | None] = relationship(
"UserModel",
back_populates="created_meetings",
foreign_keys=[created_by_id],
)
segments: Mapped[list[SegmentModel]] = relationship(
"SegmentModel",
back_populates="meeting",
cascade="all, delete-orphan",
lazy="selectin",
)
summary: Mapped[SummaryModel | None] = relationship(
"SummaryModel",
back_populates="meeting",
cascade="all, delete-orphan",
uselist=False,
lazy="selectin",
)
annotations: Mapped[list[AnnotationModel]] = relationship(
"AnnotationModel",
back_populates="meeting",
cascade="all, delete-orphan",
lazy="selectin",
)
diarization_jobs: Mapped[list[DiarizationJobModel]] = relationship(
"DiarizationJobModel",
back_populates="meeting",
cascade="all, delete-orphan",
)
streaming_turns: Mapped[list[StreamingDiarizationTurnModel]] = relationship(
"StreamingDiarizationTurnModel",
back_populates="meeting",
cascade="all, delete-orphan",
)
speakers: Mapped[list[MeetingSpeakerModel]] = relationship(
"MeetingSpeakerModel",
back_populates="meeting",
cascade="all, delete-orphan",
)
meeting_tags: Mapped[list[MeetingTagModel]] = relationship(
"MeetingTagModel",
back_populates="meeting",
cascade="all, delete-orphan",
)
tasks: Mapped[list[TaskModel]] = relationship(
"TaskModel",
back_populates="meeting",
)
calendar_links: Mapped[list[MeetingCalendarLinkModel]] = relationship(
"MeetingCalendarLinkModel",
back_populates="meeting",
cascade="all, delete-orphan",
)
class SegmentModel(Base):
"""Represent a transcript segment within a meeting."""
__tablename__ = "segments"
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
UniqueConstraint("meeting_id", "segment_id", name="segments_unique_per_meeting"),
{"schema": "noteflow"},
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
meeting_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
)
segment_id: Mapped[int] = mapped_column(Integer, nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
language_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
avg_logprob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
no_speech_prob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
embedding: Mapped[list[float] | None] = mapped_column(
Vector(EMBEDDING_DIM),
nullable=True,
)
speaker_id: Mapped[str | None] = mapped_column(String(50), nullable=True)
speaker_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="segments",
)
words: Mapped[list[WordTimingModel]] = relationship(
"WordTimingModel",
back_populates="segment",
cascade="all, delete-orphan",
lazy="selectin",
)
class WordTimingModel(Base):
"""Represent word-level timing within a segment."""
__tablename__ = "word_timings"
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
UniqueConstraint("segment_pk", "word_index", name="word_timings_unique_per_segment"),
{"schema": "noteflow"},
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
segment_pk: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.segments.id", ondelete="CASCADE"),
nullable=False,
)
word_index: Mapped[int] = mapped_column(Integer, nullable=False)
word: Mapped[str] = mapped_column(String(255), nullable=False)
start_time: Mapped[float] = mapped_column(Float, nullable=False)
end_time: Mapped[float] = mapped_column(Float, nullable=False)
probability: Mapped[float] = mapped_column(Float, nullable=False)
# Relationships
segment: Mapped[SegmentModel] = relationship(
"SegmentModel",
back_populates="words",
)

View File

@@ -0,0 +1,143 @@
"""Summary and intelligence output models."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from sqlalchemy import DateTime, Float, ForeignKey, Integer, Text, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
class SummaryModel(Base):
"""Represent an LLM-generated meeting summary."""
__tablename__ = "summaries"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
meeting_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
unique=True,
)
executive_summary: Mapped[str] = mapped_column(Text, nullable=False, default="")
generated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Provider tracking
provider_name: Mapped[str] = mapped_column(Text, nullable=False, default="")
model_name: Mapped[str] = mapped_column(Text, nullable=False, default="")
tokens_used: Mapped[int | None] = mapped_column(Integer, nullable=True)
latency_ms: Mapped[float | None] = mapped_column(Float, nullable=True)
# Verification/citation data
verification: Mapped[dict[str, object]] = mapped_column(
JSONB,
nullable=False,
default=dict,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="summary",
)
key_points: Mapped[list[KeyPointModel]] = relationship(
"KeyPointModel",
back_populates="summary",
cascade="all, delete-orphan",
lazy="selectin",
)
action_items: Mapped[list[ActionItemModel]] = relationship(
"ActionItemModel",
back_populates="summary",
cascade="all, delete-orphan",
lazy="selectin",
)
class KeyPointModel(Base):
"""Represent an extracted key point from a summary."""
__tablename__ = "key_points"
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
UniqueConstraint("summary_id", "position", name="key_points_unique_position"),
{"schema": "noteflow"},
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
summary_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
)
position: Mapped[int] = mapped_column(Integer, nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
# Relationships
summary: Mapped[SummaryModel] = relationship(
"SummaryModel",
back_populates="key_points",
)
class ActionItemModel(Base):
"""Represent an extracted action item from a summary."""
__tablename__ = "action_items"
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
UniqueConstraint("summary_id", "position", name="action_items_unique_position"),
{"schema": "noteflow"},
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
summary_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
)
position: Mapped[int] = mapped_column(Integer, nullable=False)
text: Mapped[str] = mapped_column(Text, nullable=False)
segment_ids: Mapped[list[int]] = mapped_column(
JSONB,
nullable=False,
default=list,
)
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
assignee: Mapped[str] = mapped_column(Text, nullable=False, default="")
due_date: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
# Relationships
summary: Mapped[SummaryModel] = relationship(
"SummaryModel",
back_populates="action_items",
)
tasks: Mapped[list[TaskModel]] = relationship(
"TaskModel",
back_populates="action_item",
)

View File

@@ -0,0 +1,11 @@
"""Entity models for knowledge graph (persons, speakers, future: orgs, topics)."""
from noteflow.infrastructure.persistence.models.entities.speaker import (
MeetingSpeakerModel,
PersonModel,
)
__all__ = [
"MeetingSpeakerModel",
"PersonModel",
]

View File

@@ -0,0 +1,115 @@
"""Speaker identity models."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from uuid import uuid4
from sqlalchemy import DateTime, ForeignKey, String, Text, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
from noteflow.infrastructure.persistence.models.identity.identity import (
WorkspaceModel,
)
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
class PersonModel(Base):
"""Represent a known person (speaker identity) in a workspace.
Enables cross-meeting speaker recognition once voice embeddings are added.
"""
__tablename__ = "persons"
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
UniqueConstraint("workspace_id", "email", name="persons_unique_email_per_workspace"),
{"schema": "noteflow"},
)
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
workspace_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
nullable=False,
)
display_name: Mapped[str] = mapped_column(Text, nullable=False)
email: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
metadata_: Mapped[dict[str, object]] = mapped_column(
"metadata",
JSONB,
nullable=False,
default=dict,
)
# Relationships
workspace: Mapped[WorkspaceModel] = relationship(
"WorkspaceModel",
back_populates="persons",
)
meeting_speakers: Mapped[list[MeetingSpeakerModel]] = relationship(
"MeetingSpeakerModel",
back_populates="person",
)
assigned_tasks: Mapped[list[TaskModel]] = relationship(
"TaskModel",
back_populates="assignee_person",
)
class MeetingSpeakerModel(Base):
"""Map speaker labels to display names and persons within a meeting."""
__tablename__ = "meeting_speakers"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
meeting_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
primary_key=True,
)
speaker_id: Mapped[str] = mapped_column(String(50), primary_key=True)
display_name: Mapped[str | None] = mapped_column(Text, nullable=True)
person_id: Mapped[PyUUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.persons.id", ondelete="SET NULL"),
nullable=True,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="speakers",
)
person: Mapped[PersonModel | None] = relationship(
"PersonModel",
back_populates="meeting_speakers",
)

View File

@@ -0,0 +1,19 @@
"""Identity and tenancy models."""
from noteflow.infrastructure.persistence.models.identity.identity import (
UserModel,
WorkspaceMembershipModel,
WorkspaceModel,
)
from noteflow.infrastructure.persistence.models.identity.settings import (
SettingsModel,
UserPreferencesModel,
)
__all__ = [
"SettingsModel",
"UserModel",
"UserPreferencesModel",
"WorkspaceMembershipModel",
"WorkspaceModel",
]

View File

@@ -0,0 +1,150 @@
"""Identity and tenancy models for multi-user support."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from sqlalchemy import DateTime, ForeignKey, String, Text
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
from noteflow.infrastructure.persistence.models.entities.speaker import PersonModel
from noteflow.infrastructure.persistence.models.organization.tagging import TagModel
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
class WorkspaceModel(Base):
"""Represent a workspace for multi-tenant support."""
__tablename__ = "workspaces"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[PyUUID] = mapped_column(UUID(as_uuid=True), primary_key=True)
slug: Mapped[str | None] = mapped_column(Text, unique=True, nullable=True)
name: Mapped[str] = mapped_column(Text, nullable=False)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
metadata_: Mapped[dict[str, object]] = mapped_column(
"metadata",
JSONB,
nullable=False,
default=dict,
)
# Relationships
memberships: Mapped[list[WorkspaceMembershipModel]] = relationship(
"WorkspaceMembershipModel",
back_populates="workspace",
cascade="all, delete-orphan",
)
meetings: Mapped[list[MeetingModel]] = relationship(
"MeetingModel",
back_populates="workspace",
cascade="all, delete-orphan",
)
persons: Mapped[list[PersonModel]] = relationship(
"PersonModel",
back_populates="workspace",
cascade="all, delete-orphan",
)
tags: Mapped[list[TagModel]] = relationship(
"TagModel",
back_populates="workspace",
cascade="all, delete-orphan",
)
tasks: Mapped[list[TaskModel]] = relationship(
"TaskModel",
back_populates="workspace",
cascade="all, delete-orphan",
)
class UserModel(Base):
"""Represent a user account."""
__tablename__ = "users"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
id: Mapped[PyUUID] = mapped_column(UUID(as_uuid=True), primary_key=True)
email: Mapped[str | None] = mapped_column(Text, unique=True, nullable=True)
display_name: Mapped[str] = mapped_column(Text, nullable=False)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
metadata_: Mapped[dict[str, object]] = mapped_column(
"metadata",
JSONB,
nullable=False,
default=dict,
)
# Relationships
memberships: Mapped[list[WorkspaceMembershipModel]] = relationship(
"WorkspaceMembershipModel",
back_populates="user",
cascade="all, delete-orphan",
)
created_meetings: Mapped[list[MeetingModel]] = relationship(
"MeetingModel",
back_populates="created_by",
foreign_keys="MeetingModel.created_by_id",
)
class WorkspaceMembershipModel(Base):
"""Represent workspace membership with role."""
__tablename__ = "workspace_memberships"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
workspace_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
primary_key=True,
)
user_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.users.id", ondelete="CASCADE"),
primary_key=True,
)
role: Mapped[str] = mapped_column(String(50), nullable=False, default="owner")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
workspace: Mapped[WorkspaceModel] = relationship(
"WorkspaceModel",
back_populates="memberships",
)
user: Mapped[UserModel] = relationship(
"UserModel",
back_populates="memberships",
)

View File

@@ -0,0 +1,101 @@
"""Settings and preferences models."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from uuid import uuid4
from sqlalchemy import CheckConstraint, DateTime, ForeignKey, String, Text, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.identity.identity import (
UserModel,
WorkspaceModel,
)
class SettingsModel(Base):
"""Represent scoped settings (system, workspace, or user level)."""
__tablename__ = "settings"
__table_args__: ClassVar[tuple[UniqueConstraint, CheckConstraint, dict[str, str]]] = (
UniqueConstraint(
"scope",
"workspace_id",
"user_id",
"key",
name="settings_unique_scope_key",
),
CheckConstraint(
"scope IN ('system', 'workspace', 'user')",
name="settings_scope_chk",
),
{"schema": "noteflow"},
)
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
scope: Mapped[str] = mapped_column(Text, nullable=False)
workspace_id: Mapped[PyUUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
nullable=True,
)
user_id: Mapped[PyUUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.users.id", ondelete="CASCADE"),
nullable=True,
)
key: Mapped[str] = mapped_column(Text, nullable=False)
value: Mapped[dict[str, object]] = mapped_column(
JSONB,
nullable=False,
default=dict,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
# Relationships
workspace: Mapped[WorkspaceModel | None] = relationship("WorkspaceModel")
user: Mapped[UserModel | None] = relationship("UserModel")
class UserPreferencesModel(Base):
"""Store key-value user preferences for persistence across server restarts.
Simple KV store compatible with current codebase pattern.
Currently used for cloud consent and other settings.
"""
__tablename__ = "user_preferences"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
# Using key as primary key (matching schema.sql design for KV store simplicity)
key: Mapped[str] = mapped_column(String(64), primary_key=True)
value: Mapped[dict[str, object]] = mapped_column(JSONB, nullable=False, default=dict)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)

View File

@@ -0,0 +1,19 @@
"""External integration models (calendar, auth, PKM, etc.)."""
from noteflow.infrastructure.persistence.models.integrations.integration import (
CalendarEventModel,
ExternalRefModel,
IntegrationModel,
IntegrationSecretModel,
IntegrationSyncRunModel,
MeetingCalendarLinkModel,
)
__all__ = [
"CalendarEventModel",
"ExternalRefModel",
"IntegrationModel",
"IntegrationSecretModel",
"IntegrationSyncRunModel",
"MeetingCalendarLinkModel",
]

View File

@@ -0,0 +1,323 @@
"""Integration and calendar models."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from uuid import uuid4
from sqlalchemy import (
CheckConstraint,
DateTime,
ForeignKey,
Integer,
LargeBinary,
Text,
UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
from noteflow.infrastructure.persistence.models.identity.identity import (
WorkspaceModel,
)
class IntegrationModel(Base):
"""Represent an external service integration."""
__tablename__ = "integrations"
__table_args__: ClassVar[tuple[CheckConstraint, CheckConstraint, dict[str, str]]] = (
CheckConstraint(
"type IN ('auth', 'email', 'calendar', 'pkm', 'custom')",
name="integrations_type_chk",
),
CheckConstraint(
"status IN ('disconnected', 'connected', 'error')",
name="integrations_status_chk",
),
{"schema": "noteflow"},
)
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
workspace_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
nullable=False,
)
name: Mapped[str] = mapped_column(Text, nullable=False)
type: Mapped[str] = mapped_column(Text, nullable=False)
status: Mapped[str] = mapped_column(Text, nullable=False, default="disconnected")
config: Mapped[dict[str, object]] = mapped_column(
JSONB,
nullable=False,
default=dict,
)
last_sync: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
# Relationships
workspace: Mapped[WorkspaceModel] = relationship("WorkspaceModel")
secrets: Mapped[list[IntegrationSecretModel]] = relationship(
"IntegrationSecretModel",
back_populates="integration",
cascade="all, delete-orphan",
)
sync_runs: Mapped[list[IntegrationSyncRunModel]] = relationship(
"IntegrationSyncRunModel",
back_populates="integration",
cascade="all, delete-orphan",
)
calendar_events: Mapped[list[CalendarEventModel]] = relationship(
"CalendarEventModel",
back_populates="integration",
cascade="all, delete-orphan",
)
external_refs: Mapped[list[ExternalRefModel]] = relationship(
"ExternalRefModel",
back_populates="integration",
cascade="all, delete-orphan",
)
class IntegrationSecretModel(Base):
"""Store encrypted secrets for an integration."""
__tablename__ = "integration_secrets"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
integration_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.integrations.id", ondelete="CASCADE"),
primary_key=True,
)
secret_key: Mapped[str] = mapped_column(Text, primary_key=True)
secret_value: Mapped[bytes] = mapped_column(LargeBinary, nullable=False)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
# Relationships
integration: Mapped[IntegrationModel] = relationship(
"IntegrationModel",
back_populates="secrets",
)
class IntegrationSyncRunModel(Base):
"""Track sync operation history for an integration."""
__tablename__ = "integration_sync_runs"
__table_args__: ClassVar[tuple[CheckConstraint, dict[str, str]]] = (
CheckConstraint(
"status IN ('running', 'success', 'error')",
name="integration_sync_runs_status_chk",
),
{"schema": "noteflow"},
)
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
integration_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.integrations.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
status: Mapped[str] = mapped_column(Text, nullable=False)
started_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
ended_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
stats: Mapped[dict[str, object]] = mapped_column(
JSONB,
nullable=False,
default=dict,
)
# Relationships
integration: Mapped[IntegrationModel] = relationship(
"IntegrationModel",
back_populates="sync_runs",
)
class CalendarEventModel(Base):
"""Cache calendar event data from an integration."""
__tablename__ = "calendar_events"
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
UniqueConstraint(
"integration_id",
"external_id",
name="calendar_events_unique_external_id",
),
{"schema": "noteflow"},
)
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
integration_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.integrations.id", ondelete="CASCADE"),
nullable=False,
)
external_id: Mapped[str] = mapped_column(Text, nullable=False)
calendar_id: Mapped[str] = mapped_column(Text, nullable=False)
calendar_name: Mapped[str] = mapped_column(Text, nullable=False)
title: Mapped[str] = mapped_column(Text, nullable=False)
description: Mapped[str | None] = mapped_column(Text, nullable=True)
start_time: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
)
end_time: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
)
location: Mapped[str | None] = mapped_column(Text, nullable=True)
attendees: Mapped[list[str] | None] = mapped_column(ARRAY(Text), nullable=True)
is_all_day: Mapped[bool] = mapped_column(default=False)
meeting_link: Mapped[str | None] = mapped_column(Text, nullable=True)
raw: Mapped[dict[str, object]] = mapped_column(
JSONB,
nullable=False,
default=dict,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
# Relationships
integration: Mapped[IntegrationModel] = relationship(
"IntegrationModel",
back_populates="calendar_events",
)
meeting_links: Mapped[list[MeetingCalendarLinkModel]] = relationship(
"MeetingCalendarLinkModel",
back_populates="calendar_event",
cascade="all, delete-orphan",
)
class MeetingCalendarLinkModel(Base):
"""Junction table linking meetings to calendar events."""
__tablename__ = "meeting_calendar_links"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
meeting_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
primary_key=True,
)
calendar_event_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.calendar_events.id", ondelete="CASCADE"),
primary_key=True,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="calendar_links",
)
calendar_event: Mapped[CalendarEventModel] = relationship(
"CalendarEventModel",
back_populates="meeting_links",
)
class ExternalRefModel(Base):
"""Track references to external entities (generic ID mapping)."""
__tablename__ = "external_refs"
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
UniqueConstraint(
"integration_id",
"entity_type",
"entity_id",
name="external_refs_unique_entity",
),
{"schema": "noteflow"},
)
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
integration_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.integrations.id", ondelete="CASCADE"),
nullable=False,
)
entity_type: Mapped[str] = mapped_column(Text, nullable=False)
entity_id: Mapped[str] = mapped_column(Text, nullable=False)
external_id: Mapped[str] = mapped_column(Text, nullable=False)
external_url: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
integration: Mapped[IntegrationModel] = relationship(
"IntegrationModel",
back_populates="external_refs",
)

View File

@@ -0,0 +1,13 @@
"""Organization and workflow models (tagging, tasks)."""
from noteflow.infrastructure.persistence.models.organization.tagging import (
MeetingTagModel,
TagModel,
)
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
__all__ = [
"MeetingTagModel",
"TagModel",
"TaskModel",
]

View File

@@ -0,0 +1,89 @@
"""Tagging models for meetings."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from uuid import uuid4
from sqlalchemy import DateTime, ForeignKey, Text, UniqueConstraint
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
from noteflow.infrastructure.persistence.models.identity.identity import (
WorkspaceModel,
)
class TagModel(Base):
"""Represent a tag that can be applied to meetings."""
__tablename__ = "tags"
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
UniqueConstraint("workspace_id", "name", name="tags_unique_name_per_workspace"),
{"schema": "noteflow"},
)
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
workspace_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
nullable=False,
)
name: Mapped[str] = mapped_column(Text, nullable=False)
color: Mapped[str] = mapped_column(Text, nullable=False, default="#888888")
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
# Relationships
workspace: Mapped[WorkspaceModel] = relationship(
"WorkspaceModel",
back_populates="tags",
)
meeting_tags: Mapped[list[MeetingTagModel]] = relationship(
"MeetingTagModel",
back_populates="tag",
cascade="all, delete-orphan",
)
class MeetingTagModel(Base):
"""Junction table linking meetings to tags."""
__tablename__ = "meeting_tags"
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
meeting_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
primary_key=True,
)
tag_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.tags.id", ondelete="CASCADE"),
primary_key=True,
)
# Relationships
meeting: Mapped[MeetingModel] = relationship(
"MeetingModel",
back_populates="meeting_tags",
)
tag: Mapped[TagModel] = relationship(
"TagModel",
back_populates="meeting_tags",
)

View File

@@ -0,0 +1,110 @@
"""Task management models."""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING, ClassVar
from uuid import UUID as PyUUID
from uuid import uuid4
from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Integer, Text
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from noteflow.domain.utils.time import utc_now
from .._base import Base
if TYPE_CHECKING:
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
from noteflow.infrastructure.persistence.models.core.summary import ActionItemModel
from noteflow.infrastructure.persistence.models.entities.speaker import PersonModel
from noteflow.infrastructure.persistence.models.identity.identity import (
WorkspaceModel,
)
class TaskModel(Base):
"""Represent a user-managed task, optionally derived from an action item."""
__tablename__ = "tasks"
__table_args__: ClassVar[tuple[CheckConstraint, dict[str, str]]] = (
CheckConstraint(
"status IN ('open', 'done', 'dismissed')",
name="tasks_status_chk",
),
{"schema": "noteflow"},
)
id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
primary_key=True,
default=uuid4,
)
workspace_id: Mapped[PyUUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
meeting_id: Mapped[PyUUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.meetings.id", ondelete="SET NULL"),
nullable=True,
)
action_item_id: Mapped[int | None] = mapped_column(
Integer,
ForeignKey("noteflow.action_items.id", ondelete="SET NULL"),
nullable=True,
)
text: Mapped[str] = mapped_column(Text, nullable=False)
status: Mapped[str] = mapped_column(Text, nullable=False, default="open")
assignee_person_id: Mapped[PyUUID | None] = mapped_column(
UUID(as_uuid=True),
ForeignKey("noteflow.persons.id", ondelete="SET NULL"),
nullable=True,
)
due_date: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
default=utc_now,
onupdate=utc_now,
)
completed_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True),
nullable=True,
)
metadata_: Mapped[dict[str, object]] = mapped_column(
"metadata",
JSONB,
nullable=False,
default=dict,
)
# Relationships
workspace: Mapped[WorkspaceModel] = relationship(
"WorkspaceModel",
back_populates="tasks",
)
meeting: Mapped[MeetingModel | None] = relationship(
"MeetingModel",
back_populates="tasks",
)
action_item: Mapped[ActionItemModel | None] = relationship(
"ActionItemModel",
back_populates="tasks",
)
assignee_person: Mapped[PersonModel | None] = relationship(
"PersonModel",
back_populates="assigned_tasks",
)

View File

@@ -40,9 +40,9 @@ class SqlAlchemySegmentRepository(BaseRepository):
speaker_confidence=segment.speaker_confidence,
)
# Add word timings
for word in segment.words:
word_kwargs = OrmConverter.word_timing_to_orm_kwargs(word)
# Add word timings with position index
for word_index, word in enumerate(segment.words):
word_kwargs = OrmConverter.word_timing_to_orm_kwargs(word, word_index)
word_model = WordTimingModel(**word_kwargs)
model.words.append(word_model)

View File

@@ -32,9 +32,10 @@ class SqlAlchemySummaryRepository(BaseRepository):
key_points: Key points to add. Their db_id fields are updated in place.
"""
models: list[tuple[KeyPointModel, KeyPoint]] = []
for kp in key_points:
for position, kp in enumerate(key_points):
kp_model = KeyPointModel(
summary_id=summary_id,
position=position,
text=kp.text,
start_time=kp.start_time,
end_time=kp.end_time,
@@ -46,6 +47,7 @@ class SqlAlchemySummaryRepository(BaseRepository):
await self._session.flush()
for kp_model, kp in models:
kp.db_id = kp_model.id
kp.position = kp_model.position
async def _add_action_items(self, summary_id: int, action_items: Sequence[ActionItem]) -> None:
"""Add action items to a summary.
@@ -55,14 +57,17 @@ class SqlAlchemySummaryRepository(BaseRepository):
action_items: Action items to add. Their db_id fields are updated in place.
"""
models: list[tuple[ActionItemModel, ActionItem]] = []
for ai in action_items:
for position, ai in enumerate(action_items):
ai_model = ActionItemModel(
summary_id=summary_id,
position=position,
text=ai.text,
assignee=ai.assignee,
due_date=ai.due_date,
priority=ai.priority,
segment_ids=ai.segment_ids,
start_time=ai.start_time,
end_time=ai.end_time,
)
self._session.add(ai_model)
models.append((ai_model, ai))
@@ -70,6 +75,7 @@ class SqlAlchemySummaryRepository(BaseRepository):
await self._session.flush()
for ai_model, ai in models:
ai.db_id = ai_model.id
ai.position = ai_model.position
async def save(self, summary: Summary) -> Summary:
"""Save or update a meeting summary.
@@ -88,7 +94,11 @@ class SqlAlchemySummaryRepository(BaseRepository):
existing.executive_summary = summary.executive_summary
if summary.generated_at is not None:
existing.generated_at = summary.generated_at
existing.model_version = summary.model_version
existing.provider_name = summary.provider_name
existing.model_name = summary.model_name
existing.tokens_used = summary.tokens_used
existing.latency_ms = summary.latency_ms
existing.verification = summary.verification
# Delete old key points and action items
await self._session.execute(
@@ -108,7 +118,11 @@ class SqlAlchemySummaryRepository(BaseRepository):
meeting_id=UUID(str(summary.meeting_id)),
executive_summary=summary.executive_summary,
generated_at=summary.generated_at,
model_version=summary.model_version,
provider_name=summary.provider_name,
model_name=summary.model_name,
tokens_used=summary.tokens_used,
latency_ms=summary.latency_ms,
verification=summary.verification,
)
self._session.add(model)
await self._session.flush()

View File

@@ -119,6 +119,10 @@ class SegmentCitationVerifier:
key_points=filtered_key_points,
action_items=filtered_action_items,
generated_at=summary.generated_at,
model_version=summary.model_version,
provider_name=summary.provider_name,
model_name=summary.model_name,
tokens_used=summary.tokens_used,
latency_ms=summary.latency_ms,
verification=summary.verification,
db_id=summary.db_id,
)

View File

@@ -157,7 +157,8 @@ class CloudSummarizer:
key_points=[],
action_items=[],
generated_at=datetime.now(UTC),
model_version=self._model,
provider_name=self.provider_name,
model_name=self._model,
),
model_name=self._model,
provider_name=self.provider_name,
@@ -173,14 +174,15 @@ class CloudSummarizer:
content, tokens_used = await asyncio.to_thread(self._call_anthropic, user_prompt)
# Parse into Summary
summary = parse_llm_response(content, request)
parsed = parse_llm_response(content, request)
summary = Summary(
meeting_id=summary.meeting_id,
executive_summary=summary.executive_summary,
key_points=summary.key_points,
action_items=summary.action_items,
generated_at=summary.generated_at,
model_version=self._model,
meeting_id=parsed.meeting_id,
executive_summary=parsed.executive_summary,
key_points=parsed.key_points,
action_items=parsed.action_items,
generated_at=parsed.generated_at,
provider_name=self.provider_name,
model_name=self._model,
)
elapsed_ms = (time.monotonic() - start) * 1000

View File

@@ -97,7 +97,8 @@ class MockSummarizer:
key_points=key_points,
action_items=action_items,
generated_at=datetime.now(UTC),
model_version="mock-1.0",
provider_name=self.provider_name,
model_name="mock-1.0",
)
elapsed = (time.monotonic() - start) * 1000 + self._latency_ms

View File

@@ -116,7 +116,8 @@ class OllamaSummarizer:
key_points=[],
action_items=[],
generated_at=datetime.now(UTC),
model_version=self._model,
provider_name=self.provider_name,
model_name=self._model,
),
model_name=self._model,
provider_name=self.provider_name,
@@ -157,14 +158,15 @@ class OllamaSummarizer:
raise InvalidResponseError("Empty response from Ollama")
# Parse into Summary
summary = parse_llm_response(content, request)
parsed = parse_llm_response(content, request)
summary = Summary(
meeting_id=summary.meeting_id,
executive_summary=summary.executive_summary,
key_points=summary.key_points,
action_items=summary.action_items,
generated_at=summary.generated_at,
model_version=self._model,
meeting_id=parsed.meeting_id,
executive_summary=parsed.executive_summary,
key_points=parsed.key_points,
action_items=parsed.action_items,
generated_at=parsed.generated_at,
provider_name=self.provider_name,
model_name=self._model,
)
elapsed_ms = (time.monotonic() - start) * 1000

View File

@@ -333,7 +333,8 @@ class TestMeetingServiceSummaries:
meeting_id=meeting_id,
executive_summary="Test summary",
generated_at=datetime.now(UTC),
model_version="test-v1",
provider_name="test",
model_name="v1",
)
mock_uow.summaries.save = AsyncMock(return_value=summary)
@@ -341,7 +342,8 @@ class TestMeetingServiceSummaries:
result = await service.save_summary(
meeting_id=meeting_id,
executive_summary="Test summary",
model_version="test-v1",
provider_name="test",
model_name="v1",
)
assert result.executive_summary == "Test summary"

View File

@@ -36,7 +36,7 @@ async def test_generate_summary_uses_placeholder_when_service_missing() -> None:
)
assert response.executive_summary != ""
assert response.model_version == "placeholder-v0"
assert response.model_version == "placeholder/v0"
retrieved_meeting = store.get(str(meeting.id))
assert retrieved_meeting is not None, "Meeting should exist after creation"
assert retrieved_meeting.summary is not None
@@ -70,4 +70,4 @@ async def test_generate_summary_falls_back_when_provider_unavailable() -> None:
)
assert response.executive_summary != ""
assert response.model_version == "placeholder-v0"
assert response.model_version == "placeholder/v0"

View File

@@ -231,7 +231,7 @@ class TestFilterInvalidCitations:
("key_points[0].start_time", 1.0),
("action_items[0].assignee", "Alice"),
("action_items[0].priority", 2),
("model_version", "test-1.0"),
("model_version", "test/1.0"),
],
)
def test_filter_preserves_other_fields(
@@ -244,7 +244,8 @@ class TestFilterInvalidCitations:
executive_summary="Important meeting",
key_points=[KeyPoint(text="Key point", segment_ids=[0], start_time=1.0, end_time=2.0)],
action_items=[ActionItem(text="Action", segment_ids=[0], assignee="Alice", priority=2)],
model_version="test-1.0",
provider_name="test",
model_name="1.0",
)
filtered = verifier.filter_invalid_citations(summary, segments)
# Navigate the attribute path

View File

@@ -100,13 +100,14 @@ class TestOrmConverterToOrmKwargs:
probability=0.9,
)
result = OrmConverter.word_timing_to_orm_kwargs(word)
result = OrmConverter.word_timing_to_orm_kwargs(word, word_index=0)
assert result == {
"word": "test",
"start_time": 1.5,
"end_time": 2.0,
"probability": 0.9,
"word_index": 0,
}
def test_preserves_precision(self) -> None:
@@ -118,8 +119,9 @@ class TestOrmConverterToOrmKwargs:
probability=0.111111,
)
result = OrmConverter.word_timing_to_orm_kwargs(word)
result = OrmConverter.word_timing_to_orm_kwargs(word, word_index=5)
assert result["start_time"] == 0.123456789
assert result["end_time"] == 0.987654321
assert result["probability"] == 0.111111
assert result["word_index"] == 5

View File

@@ -75,7 +75,8 @@ class TestSummarizationGeneration:
action_items=[
ActionItem(text="Action 1", assignee="Alice"),
],
model_version="test-model-v1",
provider_name="test-model",
model_name="v1",
)
mock_service = MagicMock()
@@ -207,7 +208,7 @@ class TestSummarizationGeneration:
result = await servicer.GenerateSummary(request, MockContext())
assert "Segment 0" in result.executive_summary or "Segment 1" in result.executive_summary
assert result.model_version == "placeholder-v0"
assert result.model_version == "placeholder/v0"
async def test_generate_summary_placeholder_on_service_error(
self, session_factory: async_sessionmaker[AsyncSession]
@@ -242,7 +243,7 @@ class TestSummarizationGeneration:
result = await servicer.GenerateSummary(request, MockContext())
assert "Content that should appear" in result.executive_summary
assert result.model_version == "placeholder-v0"
assert result.model_version == "placeholder/v0"
@pytest.mark.integration

View File

@@ -321,7 +321,8 @@ class TestSummaryRepository:
meeting_id=meeting.id,
executive_summary="This was a productive meeting.",
generated_at=datetime.now(UTC),
model_version="test-v1",
provider_name="test",
model_name="v1",
)
await summary_repo.save(summary)
await session.commit()
@@ -330,7 +331,7 @@ class TestSummaryRepository:
assert result is not None
assert result.executive_summary == "This was a productive meeting."
assert result.model_version == "test-v1"
assert result.model_version == "test/v1"
async def test_save_summary_with_key_points(self, session: AsyncSession) -> None:
"""Test saving summary with key points."""