Enhance summarization model attributes and database schema
- Updated the Summary entity to include provider and model names, along with tokens used and latency metrics for better tracking of summarization performance. - Modified the ORM converters and repository methods to accommodate new attributes, ensuring backward compatibility. - Introduced word timing position indexing to maintain order within summaries. - Added a new SQLAlchemy model structure for improved organization of persistence layers, including core, identity, and integration models. - Removed deprecated models and files to streamline the codebase.
This commit is contained in:
5
.claude/settings.json
Normal file
5
.claude/settings.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"enabledPlugins": {
|
||||
"pyright-lsp@claude-plugins-official": true
|
||||
}
|
||||
}
|
||||
@@ -15,9 +15,7 @@
|
||||
# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
|
||||
languages:
|
||||
- python
|
||||
|
||||
# the encoding used by text files in the project
|
||||
# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
|
||||
- typescript
|
||||
encoding: "utf-8"
|
||||
|
||||
# whether to use the project's gitignore file to ignore files
|
||||
|
||||
599
docker/db/schema.sql
Normal file
599
docker/db/schema.sql
Normal file
@@ -0,0 +1,599 @@
|
||||
-- noteflow_init.sql
|
||||
-- Creates schema + tables + placeholder data for local dev.
|
||||
|
||||
-- Extensions (safe to run repeatedly)
|
||||
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
||||
CREATE EXTENSION IF NOT EXISTS citext;
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- Schema
|
||||
CREATE SCHEMA IF NOT EXISTS noteflow;
|
||||
SET search_path TO noteflow, public;
|
||||
|
||||
-- updated_at trigger helper
|
||||
CREATE OR REPLACE FUNCTION noteflow.set_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = now();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Identity / tenancy (future-ready)
|
||||
--------------------------------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS noteflow.workspaces (
|
||||
id uuid PRIMARY KEY,
|
||||
slug text UNIQUE,
|
||||
name text NOT NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
metadata jsonb NOT NULL DEFAULT '{}'::jsonb
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.users (
|
||||
id uuid PRIMARY KEY,
|
||||
email citext UNIQUE,
|
||||
display_name text NOT NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
metadata jsonb NOT NULL DEFAULT '{}'::jsonb
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.workspace_memberships (
|
||||
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
|
||||
user_id uuid NOT NULL REFERENCES noteflow.users(id) ON DELETE CASCADE,
|
||||
role text NOT NULL DEFAULT 'owner',
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (workspace_id, user_id)
|
||||
);
|
||||
|
||||
CREATE TRIGGER trg_workspaces_updated_at
|
||||
BEFORE UPDATE ON noteflow.workspaces
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE TRIGGER trg_users_updated_at
|
||||
BEFORE UPDATE ON noteflow.users
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Core domain (matches current project shape)
|
||||
--------------------------------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS noteflow.meetings (
|
||||
id uuid PRIMARY KEY,
|
||||
-- Forward-looking fields: safe defaults for current code
|
||||
workspace_id uuid NOT NULL DEFAULT '00000000-0000-0000-0000-000000000001'::uuid
|
||||
REFERENCES noteflow.workspaces(id) ON DELETE RESTRICT,
|
||||
created_by_id uuid NULL DEFAULT '00000000-0000-0000-0000-000000000001'::uuid
|
||||
REFERENCES noteflow.users(id) ON DELETE SET NULL,
|
||||
|
||||
title varchar(255) NOT NULL,
|
||||
state integer NOT NULL DEFAULT 1, -- 1..5 (Created..Error)
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
started_at timestamptz NULL,
|
||||
ended_at timestamptz NULL,
|
||||
|
||||
metadata jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
wrapped_dek bytea NULL,
|
||||
asset_path text NULL,
|
||||
|
||||
deleted_at timestamptz NULL
|
||||
);
|
||||
|
||||
ALTER TABLE noteflow.meetings
|
||||
ADD CONSTRAINT IF NOT EXISTS meetings_state_chk
|
||||
CHECK (state BETWEEN 1 AND 5);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_meetings_workspace_created_at
|
||||
ON noteflow.meetings(workspace_id, created_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_meetings_state
|
||||
ON noteflow.meetings(state);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.segments (
|
||||
id bigserial PRIMARY KEY,
|
||||
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
segment_id integer NOT NULL, -- stable ordering within meeting
|
||||
text text NOT NULL,
|
||||
start_time double precision NOT NULL,
|
||||
end_time double precision NOT NULL,
|
||||
|
||||
language varchar(10) NOT NULL DEFAULT 'en',
|
||||
language_confidence double precision NOT NULL DEFAULT 0,
|
||||
avg_logprob double precision NOT NULL DEFAULT 0,
|
||||
no_speech_prob double precision NOT NULL DEFAULT 0,
|
||||
|
||||
embedding vector(1536) NULL,
|
||||
|
||||
speaker_id varchar(50) NULL,
|
||||
speaker_confidence double precision NOT NULL DEFAULT 0.0,
|
||||
|
||||
created_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
ALTER TABLE noteflow.segments
|
||||
ADD CONSTRAINT IF NOT EXISTS segments_unique_per_meeting
|
||||
UNIQUE (meeting_id, segment_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_segments_meeting_id
|
||||
ON noteflow.segments(meeting_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_segments_meeting_time
|
||||
ON noteflow.segments(meeting_id, start_time);
|
||||
|
||||
-- Vector index (ivfflat is broadly supported; you can switch to hnsw later)
|
||||
CREATE INDEX IF NOT EXISTS idx_segments_embedding_ivfflat
|
||||
ON noteflow.segments USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.word_timings (
|
||||
id bigserial PRIMARY KEY,
|
||||
segment_pk bigint NOT NULL REFERENCES noteflow.segments(id) ON DELETE CASCADE,
|
||||
word_index integer NOT NULL,
|
||||
word varchar(255) NOT NULL,
|
||||
start_time double precision NOT NULL,
|
||||
end_time double precision NOT NULL,
|
||||
probability double precision NOT NULL,
|
||||
UNIQUE (segment_pk, word_index)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_word_timings_segment_pk
|
||||
ON noteflow.word_timings(segment_pk);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.summaries (
|
||||
id bigserial PRIMARY KEY,
|
||||
meeting_id uuid NOT NULL UNIQUE REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
executive_summary text NOT NULL DEFAULT '',
|
||||
generated_at timestamptz NOT NULL DEFAULT now(),
|
||||
|
||||
provider_name text NOT NULL DEFAULT '',
|
||||
model_name text NOT NULL DEFAULT '',
|
||||
tokens_used integer NULL,
|
||||
latency_ms double precision NULL,
|
||||
|
||||
verification jsonb NOT NULL DEFAULT '{}'::jsonb
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.key_points (
|
||||
id bigserial PRIMARY KEY,
|
||||
summary_id bigint NOT NULL REFERENCES noteflow.summaries(id) ON DELETE CASCADE,
|
||||
position integer NOT NULL,
|
||||
text text NOT NULL,
|
||||
segment_ids integer[] NOT NULL DEFAULT '{}'::integer[],
|
||||
start_time double precision NOT NULL DEFAULT 0,
|
||||
end_time double precision NOT NULL DEFAULT 0,
|
||||
UNIQUE (summary_id, position)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.action_items (
|
||||
id bigserial PRIMARY KEY,
|
||||
summary_id bigint NOT NULL REFERENCES noteflow.summaries(id) ON DELETE CASCADE,
|
||||
position integer NOT NULL,
|
||||
text text NOT NULL,
|
||||
segment_ids integer[] NOT NULL DEFAULT '{}'::integer[],
|
||||
start_time double precision NOT NULL DEFAULT 0,
|
||||
end_time double precision NOT NULL DEFAULT 0,
|
||||
|
||||
assignee text NOT NULL DEFAULT '',
|
||||
due_date timestamptz NULL,
|
||||
priority integer NOT NULL DEFAULT 0,
|
||||
UNIQUE (summary_id, position)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.annotations (
|
||||
id bigserial PRIMARY KEY,
|
||||
annotation_id uuid NOT NULL DEFAULT gen_random_uuid(),
|
||||
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
annotation_type varchar(50) NOT NULL,
|
||||
text text NOT NULL,
|
||||
start_time double precision NOT NULL DEFAULT 0,
|
||||
end_time double precision NOT NULL DEFAULT 0,
|
||||
segment_ids integer[] NOT NULL DEFAULT '{}'::integer[],
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (annotation_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_annotations_meeting_id
|
||||
ON noteflow.annotations(meeting_id);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.diarization_jobs (
|
||||
id varchar(36) PRIMARY KEY,
|
||||
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
status integer NOT NULL DEFAULT 0,
|
||||
segments_updated integer NOT NULL DEFAULT 0,
|
||||
speaker_ids text[] NOT NULL DEFAULT '{}'::text[],
|
||||
error_message text NOT NULL DEFAULT '',
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE TRIGGER trg_diarization_jobs_updated_at
|
||||
BEFORE UPDATE ON noteflow.diarization_jobs
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.streaming_diarization_turns (
|
||||
id bigserial PRIMARY KEY,
|
||||
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
speaker varchar(50) NOT NULL,
|
||||
start_time double precision NOT NULL,
|
||||
end_time double precision NOT NULL,
|
||||
confidence double precision NOT NULL DEFAULT 0.0,
|
||||
created_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_streaming_turns_meeting_time
|
||||
ON noteflow.streaming_diarization_turns(meeting_id, start_time);
|
||||
|
||||
-- Existing style KV preferences (compat with current repo pattern) [oai_citation:11‡repomix-output.md](sediment://file_000000004f2c722fbba5e8a81215dabf)
|
||||
CREATE TABLE IF NOT EXISTS noteflow.user_preferences (
|
||||
key varchar(64) PRIMARY KEY,
|
||||
value jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Future-facing but safe additions: people, tags, tasks, integrations, settings
|
||||
--------------------------------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS noteflow.persons (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
|
||||
display_name text NOT NULL,
|
||||
email citext NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
metadata jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
UNIQUE (workspace_id, email)
|
||||
);
|
||||
|
||||
CREATE TRIGGER trg_persons_updated_at
|
||||
BEFORE UPDATE ON noteflow.persons
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.meeting_speakers (
|
||||
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
speaker_id varchar(50) NOT NULL,
|
||||
display_name text NULL,
|
||||
person_id uuid NULL REFERENCES noteflow.persons(id) ON DELETE SET NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (meeting_id, speaker_id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.tags (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
|
||||
name text NOT NULL,
|
||||
color text NOT NULL DEFAULT '#888888',
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (workspace_id, name)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.meeting_tags (
|
||||
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
tag_id uuid NOT NULL REFERENCES noteflow.tags(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (meeting_id, tag_id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.tasks (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
|
||||
meeting_id uuid NULL REFERENCES noteflow.meetings(id) ON DELETE SET NULL,
|
||||
action_item_id bigint NULL REFERENCES noteflow.action_items(id) ON DELETE SET NULL,
|
||||
text text NOT NULL,
|
||||
status text NOT NULL DEFAULT 'open',
|
||||
assignee_person_id uuid NULL REFERENCES noteflow.persons(id) ON DELETE SET NULL,
|
||||
due_date timestamptz NULL,
|
||||
priority integer NOT NULL DEFAULT 0,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
completed_at timestamptz NULL,
|
||||
metadata jsonb NOT NULL DEFAULT '{}'::jsonb
|
||||
);
|
||||
|
||||
ALTER TABLE noteflow.tasks
|
||||
ADD CONSTRAINT IF NOT EXISTS tasks_status_chk
|
||||
CHECK (status IN ('open','done','dismissed'));
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_tasks_workspace_status
|
||||
ON noteflow.tasks(workspace_id, status);
|
||||
|
||||
CREATE TRIGGER trg_tasks_updated_at
|
||||
BEFORE UPDATE ON noteflow.tasks
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.integrations (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
|
||||
name text NOT NULL,
|
||||
type text NOT NULL,
|
||||
status text NOT NULL DEFAULT 'disconnected',
|
||||
config jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
last_sync timestamptz NULL,
|
||||
error_message text NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
ALTER TABLE noteflow.integrations
|
||||
ADD CONSTRAINT IF NOT EXISTS integrations_type_chk
|
||||
CHECK (type IN ('auth','email','calendar','pkm','custom'));
|
||||
|
||||
ALTER TABLE noteflow.integrations
|
||||
ADD CONSTRAINT IF NOT EXISTS integrations_status_chk
|
||||
CHECK (status IN ('disconnected','connected','error'));
|
||||
|
||||
CREATE TRIGGER trg_integrations_updated_at
|
||||
BEFORE UPDATE ON noteflow.integrations
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.integration_secrets (
|
||||
integration_id uuid NOT NULL REFERENCES noteflow.integrations(id) ON DELETE CASCADE,
|
||||
secret_key text NOT NULL,
|
||||
secret_value bytea NOT NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (integration_id, secret_key)
|
||||
);
|
||||
|
||||
CREATE TRIGGER trg_integration_secrets_updated_at
|
||||
BEFORE UPDATE ON noteflow.integration_secrets
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.integration_sync_runs (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
integration_id uuid NOT NULL REFERENCES noteflow.integrations(id) ON DELETE CASCADE,
|
||||
status text NOT NULL,
|
||||
started_at timestamptz NOT NULL DEFAULT now(),
|
||||
ended_at timestamptz NULL,
|
||||
duration_ms integer NULL,
|
||||
error_message text NULL,
|
||||
stats jsonb NOT NULL DEFAULT '{}'::jsonb
|
||||
);
|
||||
|
||||
ALTER TABLE noteflow.integration_sync_runs
|
||||
ADD CONSTRAINT IF NOT EXISTS integration_sync_runs_status_chk
|
||||
CHECK (status IN ('running','success','error'));
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sync_runs_integration_started
|
||||
ON noteflow.integration_sync_runs(integration_id, started_at DESC);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.calendar_events (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
integration_id uuid NOT NULL REFERENCES noteflow.integrations(id) ON DELETE CASCADE,
|
||||
external_id text NOT NULL,
|
||||
calendar_id text NOT NULL,
|
||||
calendar_name text NOT NULL,
|
||||
title text NOT NULL,
|
||||
description text NULL,
|
||||
start_time timestamptz NOT NULL,
|
||||
end_time timestamptz NOT NULL,
|
||||
location text NULL,
|
||||
attendees text[] NULL,
|
||||
is_all_day boolean NOT NULL DEFAULT false,
|
||||
meeting_link text NULL,
|
||||
raw jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (integration_id, external_id)
|
||||
);
|
||||
|
||||
CREATE TRIGGER trg_calendar_events_updated_at
|
||||
BEFORE UPDATE ON noteflow.calendar_events
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.meeting_calendar_links (
|
||||
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
calendar_event_id uuid NOT NULL REFERENCES noteflow.calendar_events(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (meeting_id, calendar_event_id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.external_refs (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
integration_id uuid NOT NULL REFERENCES noteflow.integrations(id) ON DELETE CASCADE,
|
||||
entity_type text NOT NULL,
|
||||
entity_id text NOT NULL,
|
||||
external_id text NOT NULL,
|
||||
external_url text NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (integration_id, entity_type, entity_id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.settings (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
scope text NOT NULL, -- system | workspace | user
|
||||
workspace_id uuid NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
|
||||
user_id uuid NULL REFERENCES noteflow.users(id) ON DELETE CASCADE,
|
||||
key text NOT NULL,
|
||||
value jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
UNIQUE (scope, workspace_id, user_id, key)
|
||||
);
|
||||
|
||||
ALTER TABLE noteflow.settings
|
||||
ADD CONSTRAINT IF NOT EXISTS settings_scope_chk
|
||||
CHECK (scope IN ('system','workspace','user'));
|
||||
|
||||
CREATE TRIGGER trg_settings_updated_at
|
||||
BEFORE UPDATE ON noteflow.settings
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Seed data (safe to re-run)
|
||||
--------------------------------------------------------------------------------
|
||||
-- Deterministic IDs for local dev
|
||||
-- workspace/user share the same UUID to simplify defaults
|
||||
INSERT INTO noteflow.workspaces (id, slug, name, metadata)
|
||||
VALUES (
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'default',
|
||||
'Default Workspace',
|
||||
'{"seed":true}'::jsonb
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO noteflow.users (id, email, display_name, metadata)
|
||||
VALUES (
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'local@noteflow.local',
|
||||
'Local User',
|
||||
'{"seed":true}'::jsonb
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO noteflow.workspace_memberships (workspace_id, user_id, role)
|
||||
VALUES (
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'owner'
|
||||
)
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Sample meeting
|
||||
INSERT INTO noteflow.meetings (
|
||||
id, title, state, created_at, started_at, ended_at, metadata, asset_path
|
||||
) VALUES (
|
||||
'11111111-1111-1111-1111-111111111111',
|
||||
'Seed Meeting: Project Kickoff',
|
||||
4,
|
||||
now() - interval '2 days',
|
||||
now() - interval '2 days' + interval '5 minutes',
|
||||
now() - interval '2 days' + interval '47 minutes',
|
||||
'{"source":"seed","topic":"kickoff"}'::jsonb,
|
||||
'11111111-1111-1111-1111-111111111111'
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Sample segments
|
||||
INSERT INTO noteflow.segments (
|
||||
id, meeting_id, segment_id, text, start_time, end_time, language, speaker_id, speaker_confidence
|
||||
) VALUES
|
||||
(1, '11111111-1111-1111-1111-111111111111', 0, 'Welcome everyone. Today we will align on goals and deliverables.', 0.0, 6.2, 'en', 'SPEAKER_00', 0.92),
|
||||
(2, '11111111-1111-1111-1111-111111111111', 1, 'We should prioritize the database schema first, then build the UI around it.', 6.2, 12.4, 'en', 'SPEAKER_01', 0.88),
|
||||
(3, '11111111-1111-1111-1111-111111111111', 2, 'Action item: draft an initial schema and seed script for local development.', 12.4, 18.0, 'en', 'SPEAKER_00', 0.90)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Word timings (a few illustrative words)
|
||||
INSERT INTO noteflow.word_timings (segment_pk, word_index, word, start_time, end_time, probability)
|
||||
VALUES
|
||||
(1, 0, 'Welcome', 0.00, 0.40, 0.98),
|
||||
(1, 1, 'everyone.', 0.41, 0.80, 0.97),
|
||||
(2, 0, 'We', 6.20, 6.30, 0.99),
|
||||
(2, 1, 'should', 6.31, 6.55, 0.99),
|
||||
(3, 0, 'Action', 12.40, 12.62, 0.97),
|
||||
(3, 1, 'item:', 12.63, 12.82, 0.95)
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Summary + points + items
|
||||
INSERT INTO noteflow.summaries (
|
||||
id, meeting_id, executive_summary, provider_name, model_name, tokens_used, latency_ms, verification
|
||||
) VALUES (
|
||||
1,
|
||||
'11111111-1111-1111-1111-111111111111',
|
||||
'Aligned on building a scalable schema first; UI will follow. Identified a concrete next action to draft schema + seeds.',
|
||||
'local',
|
||||
'mock',
|
||||
123,
|
||||
42.0,
|
||||
'{"seed":true}'::jsonb
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO noteflow.key_points (id, summary_id, position, text, segment_ids, start_time, end_time)
|
||||
VALUES
|
||||
(1, 1, 0, 'Schema-first development to accelerate UI work.', ARRAY[1], 6.2, 12.4)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO noteflow.action_items (id, summary_id, position, text, segment_ids, start_time, end_time, assignee, priority)
|
||||
VALUES
|
||||
(1, 1, 0, 'Draft initial database schema + seed script.', ARRAY[2], 12.4, 18.0, 'Local User', 2)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Task derived from action item (future task workflow)
|
||||
INSERT INTO noteflow.tasks (id, workspace_id, meeting_id, action_item_id, text, status, priority)
|
||||
VALUES (
|
||||
'22222222-2222-2222-2222-222222222222',
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'11111111-1111-1111-1111-111111111111',
|
||||
1,
|
||||
'Draft initial database schema + seed script.',
|
||||
'open',
|
||||
2
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Annotation
|
||||
INSERT INTO noteflow.annotations (id, meeting_id, annotation_type, text, start_time, end_time, segment_ids)
|
||||
VALUES
|
||||
(1, '11111111-1111-1111-1111-111111111111', 'ANNOTATION_TYPE_NOTE', 'Remember to keep schema modular and future-proof.', 6.0, 10.0, ARRAY[1])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Speaker/person mapping
|
||||
INSERT INTO noteflow.persons (id, workspace_id, display_name, email)
|
||||
VALUES
|
||||
('33333333-3333-3333-3333-333333333333', '00000000-0000-0000-0000-000000000001', 'Alex Example', 'alex@example.com')
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO noteflow.meeting_speakers (meeting_id, speaker_id, display_name, person_id)
|
||||
VALUES
|
||||
('11111111-1111-1111-1111-111111111111', 'SPEAKER_00', 'Alex', '33333333-3333-3333-3333-333333333333'),
|
||||
('11111111-1111-1111-1111-111111111111', 'SPEAKER_01', 'Jordan', NULL)
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Tags
|
||||
INSERT INTO noteflow.tags (id, workspace_id, name, color)
|
||||
VALUES
|
||||
('44444444-4444-4444-4444-444444444444', '00000000-0000-0000-0000-000000000001', 'seed', '#00AEEF')
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO noteflow.meeting_tags (meeting_id, tag_id)
|
||||
VALUES
|
||||
('11111111-1111-1111-1111-111111111111', '44444444-4444-4444-4444-444444444444')
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Mock integration + a calendar event (shape matches your client-side config model) [oai_citation:12‡repomix-output.md](sediment://file_000000004f2c722fbba5e8a81215dabf)
|
||||
INSERT INTO noteflow.integrations (id, workspace_id, name, type, status, config, last_sync)
|
||||
VALUES (
|
||||
'55555555-5555-5555-5555-555555555555',
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'Mock Calendar',
|
||||
'calendar',
|
||||
'connected',
|
||||
'{"sync_interval_minutes":60,"calendar_ids":["primary"],"webhook_url":"https://example.invalid/webhook"}'::jsonb,
|
||||
now() - interval '1 day'
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO noteflow.calendar_events (
|
||||
id, integration_id, external_id, calendar_id, calendar_name, title, start_time, end_time, attendees, meeting_link
|
||||
) VALUES (
|
||||
'66666666-6666-6666-6666-666666666666',
|
||||
'55555555-5555-5555-5555-555555555555',
|
||||
'evt_seed_001',
|
||||
'primary',
|
||||
'Primary',
|
||||
'Seed Meeting: Project Kickoff',
|
||||
now() - interval '2 days' + interval '5 minutes',
|
||||
now() - interval '2 days' + interval '47 minutes',
|
||||
ARRAY['alex@example.com'],
|
||||
'https://meet.example.invalid/seed'
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO noteflow.meeting_calendar_links (meeting_id, calendar_event_id)
|
||||
VALUES ('11111111-1111-1111-1111-111111111111', '66666666-6666-6666-6666-666666666666')
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Preferences KV used by server-side logic (stored as {"value": ...}) [oai_citation:13‡repomix-output.md](sediment://file_000000004f2c722fbba5e8a81215dabf)
|
||||
INSERT INTO noteflow.user_preferences (key, value)
|
||||
VALUES
|
||||
('cloud_consent_granted', '{"value": false}'::jsonb),
|
||||
('schema_seeded', '{"value": true}'::jsonb)
|
||||
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value, updated_at = now();
|
||||
|
||||
-- Keep sequences sane if you re-run
|
||||
SELECT setval('noteflow.segments_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.segments));
|
||||
SELECT setval('noteflow.summaries_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.summaries));
|
||||
SELECT setval('noteflow.key_points_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.key_points));
|
||||
SELECT setval('noteflow.action_items_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.action_items));
|
||||
SELECT setval('noteflow.annotations_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.annotations));
|
||||
SELECT setval('noteflow.word_timings_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.word_timings));
|
||||
SELECT setval('noteflow.streaming_diarization_turns_id_seq', (SELECT COALESCE(MAX(id), 1) FROM noteflow.streaming_diarization_turns));
|
||||
@@ -1,633 +0,0 @@
|
||||
# Code Quality Correction Plan
|
||||
|
||||
This plan addresses code quality issues identified by automated testing across the NoteFlow codebase.
|
||||
|
||||
## Executive Summary
|
||||
|
||||
| Area | Failing Tests | Issues Found | Status |
|
||||
|------|---------------|--------------|--------|
|
||||
| Python Backend Code | 10 | 17 violations | 🔴 Thresholds tightened |
|
||||
| Python Test Smells | 7 | 223 smells | 🔴 Thresholds tightened |
|
||||
| React/TypeScript Frontend | 6 | 23 violations | 🔴 Already strict |
|
||||
| Rust/Tauri | 0 | 4 large files | ⚪ No quality tests |
|
||||
|
||||
**2024-12-24 Update:** Quality test thresholds have been aggressively tightened to expose real technical debt. Previously, all tests passed because thresholds were set just above actual violation counts.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Python Backend (High Priority)
|
||||
|
||||
### 1.1 Split `NoteFlowClient` God Class
|
||||
|
||||
**File:** `src/noteflow/grpc/client.py` (942 lines, 32 methods)
|
||||
|
||||
**Problem:** Single class combines 6 distinct concerns: connection management, streaming, meeting CRUD, annotation CRUD, export, and diarization.
|
||||
|
||||
**Solution:** Apply mixin pattern (already used successfully in `grpc/_mixins/`).
|
||||
|
||||
```
|
||||
src/noteflow/grpc/
|
||||
├── client.py # Thin facade (~100 lines)
|
||||
├── _client_mixins/
|
||||
│ ├── __init__.py
|
||||
│ ├── connection.py # GrpcConnectionMixin (~100 lines)
|
||||
│ ├── streaming.py # AudioStreamingMixin (~150 lines)
|
||||
│ ├── meeting.py # MeetingClientMixin (~100 lines)
|
||||
│ ├── annotation.py # AnnotationClientMixin (~150 lines)
|
||||
│ ├── export.py # ExportClientMixin (~50 lines)
|
||||
│ ├── diarization.py # DiarizationClientMixin (~100 lines)
|
||||
│ └── converters.py # Proto conversion helpers (~100 lines)
|
||||
└── ...
|
||||
```
|
||||
|
||||
**Steps:**
|
||||
1. Create `_client_mixins/` directory structure
|
||||
2. Extract `converters.py` with static proto conversion functions
|
||||
3. Extract each mixin with focused responsibilities
|
||||
4. Compose `NoteFlowClient` from mixins
|
||||
5. Update imports in dependent code
|
||||
|
||||
**Estimated Impact:** -800 lines in single file, +750 lines across 7 focused files
|
||||
|
||||
---
|
||||
|
||||
### 1.2 Reduce `StreamTranscription` Complexity
|
||||
|
||||
**File:** `src/noteflow/grpc/_mixins/streaming.py` (579 lines, complexity=16)
|
||||
|
||||
**Problem:** 11 per-meeting state dictionaries, deeply nested async generators.
|
||||
|
||||
**Solution:** Create `StreamingSession` class to encapsulate per-meeting state.
|
||||
|
||||
```python
|
||||
# New file: src/noteflow/grpc/_mixins/_streaming_session.py
|
||||
|
||||
@dataclass
|
||||
class StreamingSession:
|
||||
"""Encapsulates all per-meeting streaming state."""
|
||||
meeting_id: str
|
||||
vad: StreamingVad
|
||||
segmenter: Segmenter
|
||||
partial_state: PartialState
|
||||
diarization_state: DiarizationState | None
|
||||
audio_writer: BufferedAudioWriter | None
|
||||
next_segment_id: int
|
||||
stop_requested: bool = False
|
||||
|
||||
@classmethod
|
||||
async def create(cls, meeting_id: str, host: ServicerHost, ...) -> "StreamingSession":
|
||||
"""Factory method for session initialization."""
|
||||
...
|
||||
```
|
||||
|
||||
**Steps:**
|
||||
1. Define `StreamingSession` dataclass with all session state
|
||||
2. Extract `PartialState` and `DiarizationState` as nested dataclasses
|
||||
3. Replace dictionary lookups (`self._vad_instances[meeting_id]`) with session attributes
|
||||
4. Move helper methods into session class where appropriate
|
||||
5. Simplify `StreamTranscription` to manage session lifecycle
|
||||
|
||||
**Estimated Impact:** Complexity 16 → 10, clearer state management
|
||||
|
||||
---
|
||||
|
||||
### 1.3 Create Server Configuration Objects
|
||||
|
||||
**File:** `src/noteflow/grpc/server.py` (430 lines)
|
||||
|
||||
**Problem:** `run_server()` has 12 parameters, `main()` has 124 lines of argument parsing.
|
||||
|
||||
**Solution:** Create configuration dataclasses.
|
||||
|
||||
```python
|
||||
# New file: src/noteflow/grpc/_config.py
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AsrConfig:
|
||||
model: str
|
||||
device: str
|
||||
compute_type: str
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DiarizationConfig:
|
||||
enabled: bool = False
|
||||
hf_token: str | None = None
|
||||
device: str = "auto"
|
||||
streaming_latency: float | None = None
|
||||
min_speakers: int | None = None
|
||||
max_speakers: int | None = None
|
||||
refinement_enabled: bool = True
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ServerConfig:
|
||||
port: int
|
||||
asr: AsrConfig
|
||||
database_url: str | None = None
|
||||
diarization: DiarizationConfig | None = None
|
||||
```
|
||||
|
||||
**Steps:**
|
||||
1. Create `_config.py` with config dataclasses
|
||||
2. Refactor `run_server()` to accept `ServerConfig`
|
||||
3. Extract `_parse_arguments()` function from `main()`
|
||||
4. Create `_build_config()` to construct config from args
|
||||
5. Extract `ServerBootstrap` class for initialization phases
|
||||
|
||||
**Estimated Impact:** 12 params → 3, functions 146 → ~60 lines each
|
||||
|
||||
---
|
||||
|
||||
### 1.4 Simplify `parse_llm_response`
|
||||
|
||||
**File:** `src/noteflow/infrastructure/summarization/_parsing.py` (complexity=21)
|
||||
|
||||
**Problem:** Multiple parsing phases, repeated patterns for key_points/action_items.
|
||||
|
||||
**Solution:** Extract helper functions for common patterns.
|
||||
|
||||
```python
|
||||
# Refactored structure
|
||||
def _strip_markdown_fences(text: str) -> str:
|
||||
"""Remove markdown code block delimiters."""
|
||||
...
|
||||
|
||||
def _parse_items[T](
|
||||
raw_items: list[dict],
|
||||
valid_segment_ids: set[int],
|
||||
segments: Sequence[Segment],
|
||||
item_factory: Callable[..., T],
|
||||
) -> list[T]:
|
||||
"""Generic parser for key_points and action_items."""
|
||||
...
|
||||
|
||||
def parse_llm_response(
|
||||
raw_response: str,
|
||||
request: SummarizationRequest,
|
||||
) -> Summary:
|
||||
"""Parse LLM JSON response into Summary entity."""
|
||||
text = _strip_markdown_fences(raw_response)
|
||||
data = json.loads(text)
|
||||
valid_ids = {seg.id for seg in request.segments}
|
||||
|
||||
key_points = _parse_items(data.get("key_points", []), valid_ids, ...)
|
||||
action_items = _parse_items(data.get("action_items", []), valid_ids, ...)
|
||||
|
||||
return Summary(...)
|
||||
```
|
||||
|
||||
**Steps:**
|
||||
1. Extract `_strip_markdown_fences()` helper
|
||||
2. Create generic `_parse_items()` function
|
||||
3. Simplify `parse_llm_response()` to use helpers
|
||||
4. Add unit tests for extracted functions
|
||||
|
||||
**Estimated Impact:** Complexity 21 → 12
|
||||
|
||||
---
|
||||
|
||||
### 1.5 Update Quality Test Thresholds
|
||||
|
||||
The feature envy test has 39 false positives because converters and repositories legitimately work with external objects.
|
||||
|
||||
**File:** `tests/quality/test_code_smells.py`
|
||||
|
||||
**Changes:**
|
||||
```python
|
||||
def test_no_feature_envy() -> None:
|
||||
"""Detect methods that use other objects more than self."""
|
||||
# Exclude known patterns that are NOT feature envy:
|
||||
# - Converter classes (naturally transform external objects)
|
||||
# - Repository methods (query + convert pattern)
|
||||
# - Exporter classes (transform domain to output)
|
||||
excluded_patterns = [
|
||||
"converter",
|
||||
"repo",
|
||||
"exporter",
|
||||
"_to_domain",
|
||||
"_to_proto",
|
||||
"_proto_to_",
|
||||
]
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: React/TypeScript Frontend (High Priority)
|
||||
|
||||
### 2.1 Split `Settings.tsx` into Sub-Components
|
||||
|
||||
**File:** `client/src/pages/Settings.tsx` (1,831 lines)
|
||||
|
||||
**Problem:** Monolithic page with 7+ concerns mixed together.
|
||||
|
||||
**Solution:** Extract into settings module.
|
||||
|
||||
```
|
||||
client/src/pages/settings/
|
||||
├── Settings.tsx # Page orchestrator (~150 lines)
|
||||
├── components/
|
||||
│ ├── ServerConnectionPanel.tsx # Connection settings (~150 lines)
|
||||
│ ├── AudioDevicePanel.tsx # Audio device selection (~200 lines)
|
||||
│ ├── ProviderConfigPanel.tsx # AI provider configs (~400 lines)
|
||||
│ ├── AITemplatePanel.tsx # Tone/format/verbosity (~150 lines)
|
||||
│ ├── SyncPanel.tsx # Sync settings (~100 lines)
|
||||
│ ├── IntegrationsPanel.tsx # Third-party integrations (~200 lines)
|
||||
│ └── QuickActionsPanel.tsx # Quick actions bar (~80 lines)
|
||||
└── hooks/
|
||||
├── useProviderConfig.ts # Provider state management (~150 lines)
|
||||
└── useServerConnection.ts # Connection state (~100 lines)
|
||||
```
|
||||
|
||||
**Steps:**
|
||||
1. Create `settings/` directory structure
|
||||
2. Extract `useProviderConfig` hook for shared provider logic
|
||||
3. Extract each accordion section into focused component
|
||||
4. Create shared `ProviderConfigCard` component for reuse
|
||||
5. Update routing to use new `Settings.tsx`
|
||||
|
||||
**Estimated Impact:** 1,831 lines → ~150 lines main + 1,500 distributed
|
||||
|
||||
---
|
||||
|
||||
### 2.2 Centralize Configuration Constants
|
||||
|
||||
**Problem:** Hardcoded endpoints scattered across 4 files.
|
||||
|
||||
**Solution:** Create centralized configuration.
|
||||
|
||||
```typescript
|
||||
// client/src/lib/config/index.ts
|
||||
export * from './provider-endpoints';
|
||||
export * from './defaults';
|
||||
export * from './server';
|
||||
|
||||
// client/src/lib/config/provider-endpoints.ts
|
||||
export const PROVIDER_ENDPOINTS = {
|
||||
openai: 'https://api.openai.com/v1',
|
||||
anthropic: 'https://api.anthropic.com/v1',
|
||||
google: 'https://generativelanguage.googleapis.com/v1',
|
||||
azure: 'https://{resource}.openai.azure.com',
|
||||
ollama: 'http://localhost:11434/api',
|
||||
deepgram: 'https://api.deepgram.com/v1',
|
||||
elevenlabs: 'https://api.elevenlabs.io/v1',
|
||||
} as const;
|
||||
|
||||
// client/src/lib/config/server.ts
|
||||
export const SERVER_DEFAULTS = {
|
||||
HOST: 'localhost',
|
||||
PORT: 50051,
|
||||
} as const;
|
||||
|
||||
// client/src/lib/config/defaults.ts
|
||||
export const DEFAULT_PREFERENCES = { ... };
|
||||
```
|
||||
|
||||
**Files to Update:**
|
||||
- `lib/ai-providers.ts` - Import from config
|
||||
- `lib/preferences.ts` - Import defaults from config
|
||||
- `pages/Settings.tsx` - Import server defaults
|
||||
|
||||
**Estimated Impact:** Eliminates 16 hardcoded endpoint violations
|
||||
|
||||
---
|
||||
|
||||
### 2.3 Extract Shared Adapter Utilities
|
||||
|
||||
**Files:** `api/mock-adapter.ts` (637 lines), `api/tauri-adapter.ts` (635 lines)
|
||||
|
||||
**Problem:** ~150 lines of duplicated helper code.
|
||||
|
||||
**Solution:** Extract shared utilities.
|
||||
|
||||
```typescript
|
||||
// client/src/api/constants.ts
|
||||
export const TauriCommands = { ... };
|
||||
export const TauriEvents = { ... };
|
||||
|
||||
// client/src/api/helpers.ts
|
||||
export function isRecord(value: unknown): value is Record<string, unknown> { ... }
|
||||
export function extractStringArrayFromRecords(records: unknown[], key: string): string[] { ... }
|
||||
export function getErrorMessage(value: unknown): string | undefined { ... }
|
||||
export function normalizeSuccessResponse(response: boolean | { success: boolean }): boolean { ... }
|
||||
export function stateToGrpcEnum(state: string): number { ... }
|
||||
```
|
||||
|
||||
**Steps:**
|
||||
1. Create `api/constants.ts` with shared command/event names
|
||||
2. Create `api/helpers.ts` with type guards and converters
|
||||
3. Update both adapters to import from shared modules
|
||||
4. Remove duplicated code
|
||||
|
||||
**Estimated Impact:** -150 lines of duplication
|
||||
|
||||
---
|
||||
|
||||
### 2.4 Refactor `lib/preferences.ts`
|
||||
|
||||
**File:** `client/src/lib/preferences.ts` (670 lines)
|
||||
|
||||
**Problem:** 15 identical setter patterns.
|
||||
|
||||
**Solution:** Create generic setter factory.
|
||||
|
||||
```typescript
|
||||
// Before: 15 methods like this
|
||||
setTranscriptionProvider(provider: TranscriptionProviderType, baseUrl: string): void {
|
||||
const prefs = loadPreferences();
|
||||
prefs.ai_config.transcription.provider = provider;
|
||||
prefs.ai_config.transcription.base_url = baseUrl;
|
||||
prefs.ai_config.transcription.test_status = 'untested';
|
||||
savePreferences(prefs);
|
||||
}
|
||||
|
||||
// After: Single generic function
|
||||
updateAIConfig<K extends keyof AIConfig>(
|
||||
configType: K,
|
||||
updates: Partial<AIConfig[K]>
|
||||
): void {
|
||||
const prefs = loadPreferences();
|
||||
prefs.ai_config[configType] = {
|
||||
...prefs.ai_config[configType],
|
||||
...updates,
|
||||
test_status: 'untested',
|
||||
};
|
||||
savePreferences(prefs);
|
||||
}
|
||||
```
|
||||
|
||||
**Steps:**
|
||||
1. Create generic `updateAIConfig()` function
|
||||
2. Deprecate individual setter methods
|
||||
3. Update Settings.tsx to use generic setter
|
||||
4. Remove deprecated methods after migration
|
||||
|
||||
**Estimated Impact:** -200 lines of repetitive code
|
||||
|
||||
---
|
||||
|
||||
### 2.5 Split Type Definitions
|
||||
|
||||
**File:** `client/src/api/types.ts` (659 lines)
|
||||
|
||||
**Solution:** Organize into focused modules.
|
||||
|
||||
```
|
||||
client/src/api/types/
|
||||
├── index.ts # Re-exports all
|
||||
├── enums.ts # All enum types (~100 lines)
|
||||
├── messages.ts # Core DTOs (Meeting, Segment, etc.) (~200 lines)
|
||||
├── requests.ts # Request/Response types (~150 lines)
|
||||
├── config.ts # Provider config types (~100 lines)
|
||||
└── integrations.ts # Integration types (~80 lines)
|
||||
```
|
||||
|
||||
**Steps:**
|
||||
1. Create `types/` directory
|
||||
2. Split types by domain (safe refactor - no logic changes)
|
||||
3. Create `index.ts` with re-exports
|
||||
4. Update imports across codebase
|
||||
|
||||
**Estimated Impact:** Better organization, easier navigation
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Component Refactoring (Medium Priority)
|
||||
|
||||
### 3.1 Split `Recording.tsx`
|
||||
|
||||
**File:** `client/src/pages/Recording.tsx` (641 lines)
|
||||
|
||||
**Solution:** Extract hooks and components.
|
||||
|
||||
```
|
||||
client/src/pages/recording/
|
||||
├── Recording.tsx # Orchestrator (~100 lines)
|
||||
├── hooks/
|
||||
│ ├── useRecordingState.ts # State machine (~150 lines)
|
||||
│ ├── useTranscriptionStream.ts # Stream handling (~120 lines)
|
||||
│ └── useRecordingControls.ts # Control actions (~80 lines)
|
||||
└── components/
|
||||
├── RecordingHeader.tsx # Title + timer (~50 lines)
|
||||
├── TranscriptPanel.tsx # Transcript display (~80 lines)
|
||||
├── NotesPanel.tsx # Notes editor (~70 lines)
|
||||
└── RecordingControls.tsx # Control buttons (~50 lines)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3.2 Split `sidebar.tsx`
|
||||
|
||||
**File:** `client/src/components/ui/sidebar.tsx` (639 lines)
|
||||
|
||||
**Solution:** Split into sidebar module with sub-components.
|
||||
|
||||
```
|
||||
client/src/components/ui/sidebar/
|
||||
├── index.ts # Re-exports
|
||||
├── context.ts # SidebarContext + useSidebar (~50 lines)
|
||||
├── provider.tsx # SidebarProvider (~200 lines)
|
||||
└── components/
|
||||
├── sidebar-trigger.tsx # (~40 lines)
|
||||
├── sidebar-rail.tsx # (~40 lines)
|
||||
├── sidebar-content.tsx # (~40 lines)
|
||||
├── sidebar-menu.tsx # (~60 lines)
|
||||
└── sidebar-inset.tsx # (~20 lines)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3.3 Refactor `ai-providers.ts`
|
||||
|
||||
**File:** `client/src/lib/ai-providers.ts` (618 lines)
|
||||
|
||||
**Problem:** 7 provider-specific fetch functions with duplicated error handling.
|
||||
|
||||
**Solution:** Create provider metadata + generic fetcher.
|
||||
|
||||
```typescript
|
||||
// client/src/lib/ai-providers/provider-metadata.ts
|
||||
interface ProviderMetadata {
|
||||
value: string;
|
||||
label: string;
|
||||
defaultUrl: string;
|
||||
authHeader: { name: string; prefix: string };
|
||||
modelsEndpoint: string | null;
|
||||
modelKey: string;
|
||||
fallbackModels: string[];
|
||||
}
|
||||
|
||||
export const PROVIDERS: Record<string, ProviderMetadata> = {
|
||||
openai: {
|
||||
value: 'openai',
|
||||
label: 'OpenAI',
|
||||
defaultUrl: PROVIDER_ENDPOINTS.openai,
|
||||
authHeader: { name: 'Authorization', prefix: 'Bearer ' },
|
||||
modelsEndpoint: '/models',
|
||||
modelKey: 'id',
|
||||
fallbackModels: ['gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo'],
|
||||
},
|
||||
// ... other providers
|
||||
};
|
||||
|
||||
// client/src/lib/ai-providers/model-fetcher.ts
|
||||
export async function fetchModels(
|
||||
provider: string,
|
||||
baseUrl: string,
|
||||
apiKey: string
|
||||
): Promise<string[]> {
|
||||
const meta = PROVIDERS[provider];
|
||||
if (!meta?.modelsEndpoint) return meta?.fallbackModels ?? [];
|
||||
|
||||
const response = await fetch(`${baseUrl}${meta.modelsEndpoint}`, {
|
||||
headers: { [meta.authHeader.name]: `${meta.authHeader.prefix}${apiKey}` },
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
return extractModels(data, meta.modelKey);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: Rust/Tauri (Low Priority)
|
||||
|
||||
### 4.1 Add Clippy Lints
|
||||
|
||||
**File:** `client/src-tauri/Cargo.toml`
|
||||
|
||||
Add additional clippy lints:
|
||||
```toml
|
||||
[lints.clippy]
|
||||
unwrap_used = "warn"
|
||||
expect_used = "warn"
|
||||
todo = "warn"
|
||||
cognitive_complexity = "warn"
|
||||
```
|
||||
|
||||
### 4.2 Review Clone Usage
|
||||
|
||||
Run quality script and address files with excessive `.clone()` calls.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Order
|
||||
|
||||
### Week 1: Configuration & Quick Wins
|
||||
1. ✅ Create `lib/config/` with centralized endpoints
|
||||
2. ✅ Extract `api/helpers.ts` shared utilities
|
||||
3. ✅ Update quality test thresholds for false positives
|
||||
4. ✅ Tighten Python quality test thresholds (2024-12-24)
|
||||
5. ✅ Add test smell detection suite (15 tests) (2024-12-24)
|
||||
|
||||
### Week 2: Python Backend Core
|
||||
4. Create `ServerConfig` dataclasses
|
||||
5. Refactor `run_server()` to use config
|
||||
6. Extract `parse_llm_response` helpers
|
||||
|
||||
### Week 3: Client God Class
|
||||
7. Create `_client_mixins/converters.py`
|
||||
8. Extract connection mixin
|
||||
9. Extract streaming mixin
|
||||
10. Extract remaining mixins
|
||||
11. Compose `NoteFlowClient` from mixins
|
||||
|
||||
### Week 4: Frontend Pages
|
||||
12. Split `Settings.tsx` into sub-components
|
||||
13. Create `useProviderConfig` hook
|
||||
14. Refactor `preferences.ts` with generic setter
|
||||
|
||||
### Week 5: Streaming & Types
|
||||
15. Create `StreamingSession` class
|
||||
16. Split `api/types.ts` into modules
|
||||
17. Refactor `ai-providers.ts` with metadata
|
||||
|
||||
### Week 6: Component Cleanup
|
||||
18. Split `Recording.tsx`
|
||||
19. Split `sidebar.tsx`
|
||||
20. Final quality test run & verification
|
||||
|
||||
---
|
||||
|
||||
## Current Quality Test Status (2024-12-24)
|
||||
|
||||
### Python Backend Tests (17 failures)
|
||||
|
||||
| Test | Found | Threshold | Key Offenders |
|
||||
|------|-------|-----------|---------------|
|
||||
| Long parameter lists | 4 | ≤2 | `run_server` (12), `add_segment` (11) |
|
||||
| God classes | 3 | ≤1 | `NoteFlowClient` (32 methods, 815 lines) |
|
||||
| Long methods | 7 | ≤4 | `run_server` (145 lines), `main` (123) |
|
||||
| Module size (hard >750) | 1 | ≤0 | `client.py` (942 lines) |
|
||||
| Module size (soft >500) | 3 | ≤1 | `streaming.py`, `diarization.py` |
|
||||
| Scattered helpers | 21 | ≤10 | Helpers across unrelated modules |
|
||||
| Duplicate helper signatures | 32 | ≤20 | `is_enabled` (7x), `get_by_meeting` (6x) |
|
||||
| Repeated code patterns | 92 | ≤50 | Docstring blocks, method signatures |
|
||||
| Magic numbers | 15 | ≤10 | `10` (20x), `1024` (14x), `5` (13x) |
|
||||
| Repeated strings | 53 | ≤30 | Log messages, schema names |
|
||||
| Thin wrappers | 46 | ≤25 | Passthrough functions |
|
||||
|
||||
### Python Test Smell Tests (7 failures)
|
||||
|
||||
| Test | Found | Threshold | Issue |
|
||||
|------|-------|-----------|-------|
|
||||
| Assertion roulette | 91 | ≤50 | Tests with naked asserts (no messages) |
|
||||
| Conditional test logic | 75 | ≤40 | Loops/ifs in test bodies |
|
||||
| Sleepy tests | 5 | ≤3 | Uses `time.sleep()` |
|
||||
| Broad exception handling | 5 | ≤3 | Catches generic `Exception` |
|
||||
| Sensitive equality | 12 | ≤10 | Comparing `str()` output |
|
||||
| Duplicate test names | 26 | ≤15 | Same test name in multiple files |
|
||||
| Long test methods | 5 | ≤3 | Tests exceeding 50 lines |
|
||||
|
||||
### Frontend Tests (6 failures)
|
||||
|
||||
| Test | Found | Threshold | Key Offenders |
|
||||
|------|-------|-----------|---------------|
|
||||
| Overly long files | 9 | ≤3 | `Settings.tsx` (1832!), 8 others >500 |
|
||||
| Hardcoded endpoints | 4 | 0 | API URLs outside config |
|
||||
| Nested ternaries | 1 | 0 | Complex conditional |
|
||||
| TODO/FIXME comments | >15 | ≤15 | Technical debt markers |
|
||||
| Commented-out code | >10 | ≤10 | Stale code blocks |
|
||||
|
||||
### Rust/Tauri (no quality tests yet)
|
||||
|
||||
Large files that could benefit from splitting:
|
||||
- `noteflow.rs`: 1205 lines (generated proto)
|
||||
- `recording.rs`: 897 lines
|
||||
- `app_state.rs`: 851 lines
|
||||
- `client.rs`: 681 lines
|
||||
|
||||
---
|
||||
|
||||
## Success Metrics
|
||||
|
||||
| Metric | Current | Target |
|
||||
|--------|---------|--------|
|
||||
| Python files > 750 lines | 1 | 0 |
|
||||
| TypeScript files > 500 lines | 9 | 3 |
|
||||
| Functions > 100 lines | 8 | 2 |
|
||||
| Cyclomatic complexity > 15 | 2 | 0 |
|
||||
| Functions with > 7 params | 4 | 0 |
|
||||
| Hardcoded endpoints | 4 | 0 |
|
||||
| Duplicated adapter code | ~150 lines | 0 |
|
||||
| Python quality tests passing | 23/40 (58%) | 38/40 (95%) |
|
||||
| Frontend quality tests passing | 15/21 (71%) | 20/21 (95%) |
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
### False Positives to Ignore
|
||||
|
||||
The following "feature envy" detections are **correct design patterns** and should NOT be refactored:
|
||||
|
||||
1. **Converter classes** (`OrmConverter`, `AsrConverter`) - Inherently transform external objects
|
||||
2. **Repository methods** - Query→fetch→convert is the standard pattern
|
||||
3. **Exporter classes** - Transformation classes work with domain entities
|
||||
4. **Proto converters in gRPC** - Proto→DTO adaptation is appropriate
|
||||
|
||||
### Patterns to Preserve
|
||||
|
||||
- Mixin architecture in `grpc/_mixins/` - Apply to client
|
||||
- Repository base class helpers - Keep shared utilities
|
||||
- Export formatting helpers - Already well-centralized
|
||||
- Domain utilities in `domain/utils/` - Appropriate location
|
||||
1098
docs/milestones.md
1098
docs/milestones.md
File diff suppressed because it is too large
Load Diff
@@ -1,466 +0,0 @@
|
||||
# Code Quality Analysis Report
|
||||
**Date:** 2024-12-24
|
||||
**Sprint:** Comprehensive Backend QA Scan
|
||||
**Scope:** `/home/trav/repos/noteflow/src/noteflow/`
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
**Status:** PASS ✅
|
||||
|
||||
The NoteFlow Python backend demonstrates excellent code quality with:
|
||||
- **0 type checking errors** (basedpyright clean)
|
||||
- **0 remaining lint violations** (all Ruff issues auto-fixed)
|
||||
- **0 security issues** detected
|
||||
- **3 complexity violations** requiring architectural improvements
|
||||
|
||||
### Quality Metrics
|
||||
|
||||
| Category | Status | Details |
|
||||
|----------|--------|---------|
|
||||
| Type Safety | ✅ PASS | 0 errors (basedpyright strict mode) |
|
||||
| Code Linting | ✅ PASS | 1 fix applied, 0 remaining |
|
||||
| Formatting | ⚠️ SKIP | Black not installed in venv |
|
||||
| Security | ✅ PASS | 0 vulnerabilities (Bandit rules) |
|
||||
| Complexity | ⚠️ WARN | 3 functions exceed threshold |
|
||||
| Architecture | ✅ GOOD | Modular mixin pattern, clean separation |
|
||||
|
||||
---
|
||||
|
||||
## 1. Type Safety Analysis (basedpyright)
|
||||
|
||||
### Result: PASS ✅
|
||||
|
||||
**Command:** `basedpyright --pythonversion 3.12 src/noteflow/`
|
||||
**Outcome:** `0 errors, 0 warnings, 0 notes`
|
||||
|
||||
#### Configuration Strengths
|
||||
- `typeCheckingMode = "standard"`
|
||||
- Python 3.12 target with modern type syntax
|
||||
- Appropriate exclusions for generated proto files
|
||||
- SQLAlchemy-specific overrides for known false positives
|
||||
|
||||
#### Notes
|
||||
The mypy output showed numerous errors, but these are **false positives** due to:
|
||||
1. Missing type stubs for third-party libraries (`grpc`, `pgvector`, `diart`, `sounddevice`)
|
||||
2. Generated protobuf files (excluded from analysis scope)
|
||||
3. SQLAlchemy's dynamic attribute system (correctly configured in basedpyright)
|
||||
|
||||
**Recommendation:** Basedpyright is the authoritative type checker for this project. The mypy configuration should be removed or aligned with basedpyright's exclusions.
|
||||
|
||||
---
|
||||
|
||||
## 2. Linting Analysis (Ruff)
|
||||
|
||||
### Result: PASS ✅ (1 fix applied)
|
||||
|
||||
**Command:** `ruff check --fix src/noteflow/`
|
||||
|
||||
#### Fixed Issues
|
||||
|
||||
| File | Code | Issue | Fix Applied |
|
||||
|------|------|-------|-------------|
|
||||
| `grpc/_config.py:95` | UP037 | Quoted type annotation | Removed unnecessary quotes from `GrpcServerConfig` |
|
||||
|
||||
#### Configuration Issues
|
||||
|
||||
**Deprecated settings detected:**
|
||||
```toml
|
||||
# Current (deprecated)
|
||||
[tool.ruff]
|
||||
select = [...]
|
||||
ignore = [...]
|
||||
per-file-ignores = {...}
|
||||
|
||||
# Required migration
|
||||
[tool.ruff.lint]
|
||||
select = [...]
|
||||
ignore = [...]
|
||||
per-file-ignores = {...}
|
||||
```
|
||||
|
||||
**Action Required:** Update `pyproject.toml` to use `[tool.ruff.lint]` section.
|
||||
|
||||
#### Selected Rules (Good Coverage)
|
||||
- E/W: pycodestyle errors/warnings
|
||||
- F: Pyflakes
|
||||
- I: isort (import sorting)
|
||||
- B: flake8-bugbear (bug detection)
|
||||
- C4: flake8-comprehensions
|
||||
- UP: pyupgrade (modern syntax)
|
||||
- SIM: flake8-simplify
|
||||
- RUF: Ruff-specific rules
|
||||
|
||||
---
|
||||
|
||||
## 3. Complexity Analysis
|
||||
|
||||
### Result: WARN ⚠️ (3 violations)
|
||||
|
||||
**Command:** `ruff check --select C901 src/noteflow/`
|
||||
|
||||
| File | Function | Complexity | Threshold | Severity |
|
||||
|------|----------|------------|-----------|----------|
|
||||
| `grpc/_mixins/diarization.py:102` | `_process_streaming_diarization` | 11 | ≤10 | 🟡 LOW |
|
||||
| `grpc/_mixins/streaming.py:55` | `StreamTranscription` | 14 | ≤10 | 🟠 MEDIUM |
|
||||
| `grpc/server.py:159` | `run_server_with_config` | 16 | ≤10 | 🔴 HIGH |
|
||||
|
||||
---
|
||||
|
||||
### 3.1 HIGH Priority: `run_server_with_config` (CC=16)
|
||||
|
||||
**Location:** `src/noteflow/grpc/server.py:159-254`
|
||||
|
||||
**Issues:**
|
||||
- 96 lines with multiple initialization phases
|
||||
- Deeply nested conditionals for database/diarization/consent logic
|
||||
- Mixes infrastructure setup with business logic
|
||||
|
||||
**Suggested Refactoring:**
|
||||
|
||||
```python
|
||||
# Extract helper functions to reduce complexity
|
||||
|
||||
async def _initialize_database(
|
||||
config: GrpcServerConfig
|
||||
) -> tuple[AsyncSessionFactory | None, RecoveryResult | None]:
|
||||
"""Initialize database connection and run recovery."""
|
||||
if not config.database_url:
|
||||
return None, None
|
||||
|
||||
session_factory = create_async_session_factory(config.database_url)
|
||||
await ensure_schema_ready(session_factory, config.database_url)
|
||||
|
||||
recovery_service = RecoveryService(
|
||||
SqlAlchemyUnitOfWork(session_factory),
|
||||
meetings_dir=get_settings().meetings_dir,
|
||||
)
|
||||
recovery_result = await recovery_service.recover_all()
|
||||
return session_factory, recovery_result
|
||||
|
||||
async def _initialize_consent_persistence(
|
||||
session_factory: AsyncSessionFactory,
|
||||
summarization_service: SummarizationService,
|
||||
) -> None:
|
||||
"""Load cloud consent from DB and set up persistence callback."""
|
||||
async with SqlAlchemyUnitOfWork(session_factory) as uow:
|
||||
cloud_consent = await uow.preferences.get_bool("cloud_consent_granted", False)
|
||||
summarization_service.settings.cloud_consent_granted = cloud_consent
|
||||
|
||||
async def persist_consent(granted: bool) -> None:
|
||||
async with SqlAlchemyUnitOfWork(session_factory) as uow:
|
||||
await uow.preferences.set("cloud_consent_granted", granted)
|
||||
await uow.commit()
|
||||
|
||||
summarization_service.on_consent_change = persist_consent
|
||||
|
||||
def _initialize_diarization(
|
||||
config: GrpcServerConfig
|
||||
) -> DiarizationEngine | None:
|
||||
"""Create diarization engine if enabled and configured."""
|
||||
diarization = config.diarization
|
||||
if not diarization.enabled:
|
||||
return None
|
||||
|
||||
if not diarization.hf_token:
|
||||
logger.warning("Diarization enabled but no HF token provided")
|
||||
return None
|
||||
|
||||
diarization_kwargs = {
|
||||
"device": diarization.device,
|
||||
"hf_token": diarization.hf_token,
|
||||
}
|
||||
if diarization.streaming_latency is not None:
|
||||
diarization_kwargs["streaming_latency"] = diarization.streaming_latency
|
||||
if diarization.min_speakers is not None:
|
||||
diarization_kwargs["min_speakers"] = diarization.min_speakers
|
||||
if diarization.max_speakers is not None:
|
||||
diarization_kwargs["max_speakers"] = diarization.max_speakers
|
||||
|
||||
return DiarizationEngine(**diarization_kwargs)
|
||||
|
||||
async def run_server_with_config(config: GrpcServerConfig) -> None:
|
||||
"""Run the async gRPC server with structured configuration."""
|
||||
# Initialize database and recovery
|
||||
session_factory, recovery_result = await _initialize_database(config)
|
||||
if recovery_result:
|
||||
_log_recovery_results(recovery_result)
|
||||
|
||||
# Initialize summarization
|
||||
summarization_service = create_summarization_service()
|
||||
if session_factory:
|
||||
await _initialize_consent_persistence(session_factory, summarization_service)
|
||||
|
||||
# Initialize diarization
|
||||
diarization_engine = _initialize_diarization(config)
|
||||
|
||||
# Create and start server
|
||||
server = NoteFlowServer(
|
||||
port=config.port,
|
||||
asr_model=config.asr.model,
|
||||
asr_device=config.asr.device,
|
||||
asr_compute_type=config.asr.compute_type,
|
||||
session_factory=session_factory,
|
||||
summarization_service=summarization_service,
|
||||
diarization_engine=diarization_engine,
|
||||
diarization_refinement_enabled=config.diarization.refinement_enabled,
|
||||
)
|
||||
await server.start()
|
||||
await server.wait_for_termination()
|
||||
```
|
||||
|
||||
**Expected Impact:** CC 16 → ~6 (main function becomes orchestration only)
|
||||
|
||||
---
|
||||
|
||||
### 3.2 MEDIUM Priority: `StreamTranscription` (CC=14)
|
||||
|
||||
**Location:** `src/noteflow/grpc/_mixins/streaming.py:55-115`
|
||||
|
||||
**Issues:**
|
||||
- Multiple conditional checks for stream initialization
|
||||
- Nested error handling with context managers
|
||||
- Mixed concerns: stream lifecycle + chunk processing
|
||||
|
||||
**Suggested Refactoring:**
|
||||
|
||||
The codebase already has `_streaming_session.py` created. Recommendation:
|
||||
|
||||
```python
|
||||
# Use StreamingSession to encapsulate per-meeting state
|
||||
async def StreamTranscription(
|
||||
self: ServicerHost,
|
||||
request_iterator: AsyncIterator[noteflow_pb2.AudioChunk],
|
||||
context: grpc.aio.ServicerContext,
|
||||
) -> AsyncIterator[noteflow_pb2.TranscriptUpdate]:
|
||||
"""Handle bidirectional audio streaming with persistence."""
|
||||
if self._asr_engine is None or not self._asr_engine.is_loaded:
|
||||
await abort_failed_precondition(context, "ASR engine not loaded")
|
||||
|
||||
session: StreamingSession | None = None
|
||||
|
||||
try:
|
||||
async for chunk in request_iterator:
|
||||
# Initialize session on first chunk
|
||||
if session is None:
|
||||
session = await StreamingSession.create(chunk.meeting_id, self, context)
|
||||
if session is None:
|
||||
return
|
||||
|
||||
# Check for stop request
|
||||
if session.should_stop():
|
||||
logger.info("Stop requested, exiting stream gracefully")
|
||||
break
|
||||
|
||||
# Process chunk
|
||||
async for update in session.process_chunk(chunk):
|
||||
yield update
|
||||
|
||||
# Flush remaining audio
|
||||
if session:
|
||||
async for update in session.flush():
|
||||
yield update
|
||||
finally:
|
||||
if session:
|
||||
await session.cleanup()
|
||||
```
|
||||
|
||||
**Expected Impact:** CC 14 → ~8 (move complexity into StreamingSession methods)
|
||||
|
||||
---
|
||||
|
||||
### 3.3 LOW Priority: `_process_streaming_diarization` (CC=11)
|
||||
|
||||
**Location:** `src/noteflow/grpc/_mixins/diarization.py:102-174`
|
||||
|
||||
**Issues:**
|
||||
- Multiple early returns (guard clauses)
|
||||
- Lock-based session management
|
||||
- Error handling for streaming pipeline
|
||||
|
||||
**Analysis:**
|
||||
This function is already well-structured with clear separation:
|
||||
1. Early validation checks (lines 114-119)
|
||||
2. Session creation under lock (lines 124-145)
|
||||
3. Chunk processing in thread pool (lines 148-164)
|
||||
4. Turn persistence (lines 167-174)
|
||||
|
||||
**Recommendation:** Accept CC=11 as reasonable for this complex concurrent operation. The early returns are defensive programming, not complexity.
|
||||
|
||||
---
|
||||
|
||||
## 4. Security Analysis (Bandit/Ruff S Rules)
|
||||
|
||||
### Result: PASS ✅
|
||||
|
||||
**Command:** `ruff check --select S src/noteflow/`
|
||||
**Outcome:** 0 security issues detected
|
||||
|
||||
**Scanned Patterns:**
|
||||
- S101: Use of assert
|
||||
- S102: Use of exec
|
||||
- S103: Insecure file permissions
|
||||
- S104-S113: Cryptographic issues
|
||||
- S301-S324: SQL injection, pickle usage, etc.
|
||||
|
||||
**Notable Security Strengths:**
|
||||
1. **Encryption:** `infrastructure/security/crypto.py` uses AES-GCM (authenticated encryption)
|
||||
2. **Key Management:** `infrastructure/security/keystore.py` uses system keyring
|
||||
3. **Database:** SQLAlchemy ORM prevents SQL injection
|
||||
4. **No hardcoded secrets:** Uses environment variables and keyring
|
||||
|
||||
---
|
||||
|
||||
## 5. Architecture Quality
|
||||
|
||||
### Result: EXCELLENT ✅
|
||||
|
||||
**Strengths:**
|
||||
|
||||
#### 5.1 Hexagonal Architecture
|
||||
```
|
||||
domain/ (pure business logic)
|
||||
↓ depends on
|
||||
application/ (use cases)
|
||||
↓ depends on
|
||||
infrastructure/ (adapters)
|
||||
```
|
||||
Clean dependency direction with no circular imports.
|
||||
|
||||
#### 5.2 Modular gRPC Mixins
|
||||
```
|
||||
grpc/_mixins/
|
||||
├── streaming.py # ASR streaming
|
||||
├── diarization.py # Speaker diarization
|
||||
├── summarization.py # Summary generation
|
||||
├── meeting.py # Meeting CRUD
|
||||
├── annotation.py # Annotations
|
||||
├── export.py # Document export
|
||||
└── protocols.py # ServicerHost protocol
|
||||
```
|
||||
Each mixin focuses on single responsibility, composed via `ServicerHost` protocol.
|
||||
|
||||
#### 5.3 Repository Pattern with Unit of Work
|
||||
```python
|
||||
async with SqlAlchemyUnitOfWork(session_factory) as uow:
|
||||
meeting = await uow.meetings.get(meeting_id)
|
||||
await uow.segments.add(segment)
|
||||
await uow.commit() # Atomic transaction
|
||||
```
|
||||
Proper transaction boundaries and separation of concerns.
|
||||
|
||||
#### 5.4 Protocol-Based Dependency Injection
|
||||
```python
|
||||
# domain/ports/
|
||||
class MeetingRepository(Protocol):
|
||||
async def get(self, meeting_id: MeetingId) -> Meeting | None: ...
|
||||
|
||||
# infrastructure/persistence/repositories/
|
||||
class SqlAlchemyMeetingRepository:
|
||||
"""Concrete implementation."""
|
||||
```
|
||||
Testable, swappable implementations (DB vs memory).
|
||||
|
||||
---
|
||||
|
||||
## 6. File Size Analysis
|
||||
|
||||
### Result: GOOD ✅
|
||||
|
||||
| File | Lines | Status | Notes |
|
||||
|------|-------|--------|-------|
|
||||
| `grpc/server.py` | 489 | ✅ Good | Under 500-line soft limit |
|
||||
| `grpc/_mixins/streaming.py` | 579 | ⚠️ Review | Near 750-line hard limit |
|
||||
| `grpc/_mixins/diarization.py` | 578 | ⚠️ Review | Near 750-line hard limit |
|
||||
|
||||
**Recommendation:** Both large mixins are candidates for splitting into sub-modules once complexity is addressed.
|
||||
|
||||
---
|
||||
|
||||
## 7. Missing Quality Tools
|
||||
|
||||
### 7.1 Black Formatter
|
||||
**Status:** Not installed in venv
|
||||
**Impact:** Cannot verify formatting compliance
|
||||
**Action Required:**
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
uv pip install black
|
||||
black --check src/noteflow/
|
||||
```
|
||||
|
||||
### 7.2 Pyrefly
|
||||
**Status:** Not available
|
||||
**Impact:** Missing semantic bug detection
|
||||
**Action:** Optional enhancement (not critical)
|
||||
|
||||
---
|
||||
|
||||
## Next Actions
|
||||
|
||||
### Critical (Do Before Next Commit)
|
||||
1. ✅ **Fixed:** Remove quoted type annotation in `_config.py` (auto-fixed by Ruff)
|
||||
2. ⚠️ **Required:** Update `pyproject.toml` to use `[tool.ruff.lint]` section
|
||||
3. ⚠️ **Required:** Install Black and verify formatting: `uv pip install black && black src/noteflow/`
|
||||
|
||||
### High Priority (This Sprint)
|
||||
4. **Extract helpers from `run_server_with_config`** to reduce CC from 16 → ~6
|
||||
- Create `_initialize_database()`, `_initialize_consent_persistence()`, `_initialize_diarization()`
|
||||
- Target: <10 complexity per function
|
||||
|
||||
5. **Complete `StreamingSession` refactoring** to reduce `StreamTranscription` CC from 14 → ~8
|
||||
- File already created: `grpc/_streaming_session.py`
|
||||
- Move per-meeting state into session class
|
||||
- Simplify main async generator
|
||||
|
||||
### Medium Priority (Next Sprint)
|
||||
6. **Split large mixin files** if they exceed 750 lines after complexity fixes
|
||||
- `streaming.py` (579 lines) → `streaming/` package
|
||||
- `diarization.py` (578 lines) → `diarization/` package
|
||||
|
||||
7. **Add mypy exclusions** to align with basedpyright configuration
|
||||
- Exclude proto files, third-party libraries without stubs
|
||||
|
||||
### Low Priority (Backlog)
|
||||
8. Consider adding `pyrefly` for additional semantic checks
|
||||
9. Review duplication patterns from code-quality-correction-plan.md
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
### Mechanical Fixes Applied ✅
|
||||
- **Ruff:** Removed quoted type annotation in `grpc/_config.py:95`
|
||||
|
||||
### Configuration Issues ⚠️
|
||||
- **pyproject.toml:** Migrate to `[tool.ruff.lint]` section (deprecated warning)
|
||||
- **Black:** Not installed in venv (cannot verify formatting)
|
||||
|
||||
### Architectural Recommendations 📋
|
||||
|
||||
#### Complexity Violations (3 total)
|
||||
| Priority | Function | Current CC | Target | Effort |
|
||||
|----------|----------|------------|--------|--------|
|
||||
| 🔴 HIGH | `run_server_with_config` | 16 | ≤10 | 2-3 hours |
|
||||
| 🟠 MEDIUM | `StreamTranscription` | 14 | ≤10 | 3-4 hours |
|
||||
| 🟡 LOW | `_process_streaming_diarization` | 11 | Accept | N/A |
|
||||
|
||||
**Total Estimated Effort:** 5-7 hours to address HIGH and MEDIUM priorities
|
||||
|
||||
### Pass Criteria Met ✅
|
||||
- [x] Type safety (basedpyright): 0 errors
|
||||
- [x] Linting (Ruff): 0 violations remaining
|
||||
- [x] Security (Bandit): 0 vulnerabilities
|
||||
- [x] Architecture: Clean hexagonal design
|
||||
- [x] No critical issues blocking development
|
||||
|
||||
### Status: PASS ✅
|
||||
|
||||
The NoteFlow backend demonstrates **excellent code quality** with well-architected patterns, strong type safety, and zero critical issues. The complexity violations are isolated to 3 functions and have clear refactoring paths. All mechanical fixes have been applied successfully.
|
||||
|
||||
---
|
||||
|
||||
**QA Agent:** Code-Quality Agent
|
||||
**Report Generated:** 2024-12-24
|
||||
**Next Review:** After complexity refactoring (estimated 1 week)
|
||||
@@ -1,6 +1,6 @@
|
||||
# NoteFlow Feature Gap Analysis & Development Roadmap
|
||||
|
||||
> Generated: 2025-12-23
|
||||
> Generated: 2025-12-23 | Updated: 2025-12-25
|
||||
> Focus: Core pipeline completion (transcription → summary → diarization → export)
|
||||
|
||||
---
|
||||
@@ -19,10 +19,23 @@ This document identifies features not yet developed or fully connected between t
|
||||
| **Export** | Partial | Markdown/HTML working, PDF missing |
|
||||
| **Integrations** | Stub | UI exists, backend handlers missing |
|
||||
|
||||
### Sprint Overview
|
||||
|
||||
| Sprint | Name | Phase | Status | Prerequisites |
|
||||
|--------|------|-------|--------|---------------|
|
||||
| **0** | Proto & Schema Foundation | Foundation | New | — |
|
||||
| 1 | AI Templates Pass-Through | Core Pipeline | Planned | Sprint 0 |
|
||||
| 2 | Diarization Application Service | Core Pipeline | Planned | Sprint 0 |
|
||||
| 3 | PDF Export | Core Pipeline | Planned | Sprint 0 |
|
||||
| 4 | Named Entity Extraction | Intelligence | Planned | Sprint 0 |
|
||||
| 5 | Calendar Sync | Integrations | Planned | Sprint 0 |
|
||||
| 6 | Webhook Execution | Integrations | Planned | Sprint 0 |
|
||||
|
||||
### Feature Gap Summary
|
||||
|
||||
| Priority | Feature | Owner | Complexity | Status |
|
||||
|----------|---------|-------|------------|--------|
|
||||
| 0 | Proto & Schema Foundation | Backend | Medium | **NEW** - Consolidates proto/DB changes |
|
||||
| 1 | AI Templates Pass-Through | Both | Low | Not connected |
|
||||
| 2 | Diarization Application Service | Backend | Medium | Engine exists, service missing |
|
||||
| 3 | PDF Export | Backend | Low-Medium | Not implemented |
|
||||
@@ -32,6 +45,32 @@ This document identifies features not yet developed or fully connected between t
|
||||
|
||||
---
|
||||
|
||||
## Sprint 0: Proto & Schema Foundation (NEW)
|
||||
|
||||
> **Priority**: 0 | **Owner**: Backend | **Complexity**: Medium
|
||||
> **Documentation**: `docs/sprints/phase-0-foundation/sprint-0-proto-schema/README.md`
|
||||
|
||||
### Objective
|
||||
|
||||
Consolidate all proto and database schema changes required by Sprints 1-6 into a single, atomic foundation sprint. This prevents proto version conflicts and ensures all sprints start from a consistent base.
|
||||
|
||||
### Key Components
|
||||
|
||||
1. **Consolidated Proto Definitions**: All RPCs and messages for Sprints 1-6
|
||||
2. **Alembic Migrations**: `named_entities`, `webhooks`, `webhook_deliveries` tables
|
||||
3. **Feature Flags**: Toggle experimental features (`ner_extraction_enabled`, `calendar_sync_enabled`)
|
||||
4. **Docker Integration**: spaCy model downloads, database initialization
|
||||
5. **Proto Regeneration Script**: Consistent stub generation
|
||||
|
||||
### Critical Fixes Included
|
||||
|
||||
- Resolves proto version conflicts across sprints
|
||||
- Ensures database schema exists before feature sprints
|
||||
- Provides feature flags for gradual rollout
|
||||
- Documents proto changelog for sync points
|
||||
|
||||
---
|
||||
|
||||
## Ownership Guidelines
|
||||
|
||||
### Backend (Python gRPC) Responsibilities
|
||||
@@ -70,7 +109,6 @@ This document identifies features not yet developed or fully connected between t
|
||||
**Priority**: 1
|
||||
**Owner**: Both (proto change + frontend wiring)
|
||||
**Complexity**: Low
|
||||
**Estimated Effort**: 1-2 days
|
||||
|
||||
#### Current State
|
||||
|
||||
@@ -222,7 +260,6 @@ async generateSummary(
|
||||
**Priority**: 2
|
||||
**Owner**: Backend
|
||||
**Complexity**: Medium
|
||||
**Estimated Effort**: 2-3 days
|
||||
|
||||
#### Current State
|
||||
|
||||
@@ -435,7 +472,6 @@ class DiarizationPort(Protocol):
|
||||
**Priority**: 3
|
||||
**Owner**: Backend
|
||||
**Complexity**: Low-Medium
|
||||
**Estimated Effort**: 1-2 days
|
||||
|
||||
#### Current State
|
||||
|
||||
@@ -645,7 +681,6 @@ Note: weasyprint requires system dependencies (cairo, pango). Document in README
|
||||
**Priority**: 4
|
||||
**Owner**: Backend
|
||||
**Complexity**: Medium
|
||||
**Estimated Effort**: 3-4 days
|
||||
|
||||
#### Current State
|
||||
|
||||
@@ -1012,7 +1047,6 @@ Post-install: `python -m spacy download en_core_web_sm`
|
||||
**Priority**: 5
|
||||
**Owner**: Backend
|
||||
**Complexity**: Medium-High
|
||||
**Estimated Effort**: 4-5 days
|
||||
|
||||
#### Current State
|
||||
|
||||
@@ -1317,7 +1351,6 @@ google-auth-oauthlib = "^1.1"
|
||||
**Priority**: 6
|
||||
**Owner**: Backend
|
||||
**Complexity**: Medium
|
||||
**Estimated Effort**: 2-3 days
|
||||
|
||||
#### Current State
|
||||
|
||||
@@ -1580,25 +1613,49 @@ After `GenerateSummary` completes successfully, call `webhook_service.trigger_su
|
||||
## Implementation Order & Dependencies
|
||||
|
||||
```
|
||||
Phase 1 (Parallel where possible):
|
||||
├── Feature 1: AI Templates ─────────────────┐
|
||||
├── Feature 3: PDF Export ───────────────────┤─→ Proto regeneration
|
||||
└── Feature 2: Diarization Service ──────────┘
|
||||
Sprint 0: Foundation (MUST complete first)
|
||||
└── Proto & Schema Foundation ───────────────→ All proto + DB migrations + feature flags
|
||||
|
||||
Phase 2:
|
||||
└── Feature 4: NER ──────────────────────────→ Requires proto changes
|
||||
Phase 1: Core Pipeline (Parallel, after Sprint 0):
|
||||
├── Sprint 1: AI Templates ─────────────────┐
|
||||
├── Sprint 3: PDF Export ───────────────────┤─→ Use proto from Sprint 0
|
||||
└── Sprint 2: Diarization Service ──────────┘ (DB persistence, application layer)
|
||||
|
||||
Phase 3 (Sequential):
|
||||
├── Feature 5: Calendar Sync ────────────────→ OAuth infrastructure
|
||||
└── Feature 6: Webhooks ─────────────────────→ Can start after Phase 1
|
||||
Phase 2: Intelligence (after Phase 1):
|
||||
└── Sprint 4: NER ──────────────────────────→ Uses NerService application layer
|
||||
|
||||
Phase 3: Integrations (Sequential, after Phase 2):
|
||||
├── Sprint 5: Calendar Sync ────────────────→ Complete OAuth flow with PKCE
|
||||
└── Sprint 6: Webhooks ─────────────────────→ HMAC signing, retry logic
|
||||
```
|
||||
|
||||
### Critical Path
|
||||
|
||||
1. **Proto changes** (Features 1, 3, 4, 5) must be done and regenerated together
|
||||
2. **Diarization Service** blocks nothing, can proceed independently
|
||||
3. **Calendar Sync** requires OAuth token storage infrastructure
|
||||
4. **Webhooks** can be implemented at any time
|
||||
1. **Sprint 0** is the **mandatory prerequisite** for all other sprints
|
||||
2. **All proto/DB changes consolidated** in Sprint 0 - no more scattered migrations
|
||||
3. **Feature flags** control feature availability before full rollout
|
||||
4. **Application service layer** required for Sprints 2, 4, 5 (hexagonal architecture)
|
||||
5. **Sprint 5 OAuth** now includes complete PKCE flow, token persistence, and refresh
|
||||
|
||||
### Architectural Decisions (Updated)
|
||||
|
||||
| Sprint | Key Improvement |
|
||||
|--------|-----------------|
|
||||
| Sprint 0 | Consolidated proto + feature flags + Docker model downloads |
|
||||
| Sprint 2 | Database persistence via repository (not in-memory `_jobs` dict) |
|
||||
| Sprint 4 | `NerService` application layer (gRPC → Service → Engine) |
|
||||
| Sprint 5 | Complete OAuth with PKCE, token storage, and auto-refresh |
|
||||
|
||||
### Quality Gates
|
||||
|
||||
Each sprint must pass before merge:
|
||||
```bash
|
||||
pytest tests/quality/ # 23+ quality checks
|
||||
ruff check src/noteflow # Linting
|
||||
basedpyright # Type checking
|
||||
```
|
||||
|
||||
See `docs/sprints/QUALITY_STANDARDS.md` for thresholds and reduction targets.
|
||||
|
||||
---
|
||||
|
||||
|
||||
575
docs/sprints/QUALITY_STANDARDS.md
Normal file
575
docs/sprints/QUALITY_STANDARDS.md
Normal file
@@ -0,0 +1,575 @@
|
||||
# NoteFlow Quality Standards Reference
|
||||
|
||||
> All sprint implementations MUST comply with these standards. Run quality gates before PR.
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference: Quality Commands
|
||||
|
||||
```bash
|
||||
# Python Backend
|
||||
pytest tests/quality/ # All quality checks (23+ rules)
|
||||
ruff check src/noteflow # Linting
|
||||
mypy src/noteflow # Type checking (strict)
|
||||
basedpyright # Additional type checks
|
||||
|
||||
# TypeScript/React Frontend
|
||||
cd client
|
||||
npm run test:quality # Frontend quality checks
|
||||
npm run lint # ESLint
|
||||
|
||||
# Rust/Tauri
|
||||
cd client
|
||||
npm run quality:rs # Rust quality script
|
||||
cargo clippy # Rust linting
|
||||
|
||||
# Full Suite
|
||||
npm run quality:all # TS + Rust quality
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Python Standards (`src/noteflow/`)
|
||||
|
||||
### Type Safety (STRICT)
|
||||
|
||||
| Rule | Description | Enforcement |
|
||||
|------|-------------|-------------|
|
||||
| No `# type: ignore` | Forbidden without justification | mypy strict |
|
||||
| No `Any` type | Use specific types always | basedpyright |
|
||||
| Union syntax | Use `str \| None` over `Optional[str]` | ruff UP |
|
||||
| Return annotations | All public functions must have returns | mypy |
|
||||
|
||||
### Code Limits
|
||||
|
||||
| Metric | Soft Limit | Hard Limit | Location |
|
||||
|--------|------------|------------|----------|
|
||||
| Module lines | 500 | 750 | `test_code_smells.py` |
|
||||
| Function lines | 50 (tests), 75 (src) | — | `test_code_smells.py` |
|
||||
| Function complexity | 15 | — | `test_code_smells.py` |
|
||||
| Parameters | 7 | — | `test_code_smells.py` |
|
||||
| Class methods | 20 | — | `test_code_smells.py` |
|
||||
| Nesting depth | 5 | — | `test_code_smells.py` |
|
||||
|
||||
### Test Requirements
|
||||
|
||||
**Current thresholds** (to be reduced each sprint):
|
||||
|
||||
| Rule | Max Allowed | Target | File |
|
||||
|------|-------------|--------|------|
|
||||
| Assertion roulette (>3 assertions without msg) | 25 | 0 | `test_test_smells.py` |
|
||||
| Conditional test logic | 15 | 0 | `test_test_smells.py` |
|
||||
| Empty tests | 0 | 0 | `test_test_smells.py` |
|
||||
| Sleepy tests (time.sleep) | 3 | 0 | `test_test_smells.py` |
|
||||
| Tests without assertions | 3 | 0 | `test_test_smells.py` |
|
||||
| Redundant assertions | 0 | 0 | `test_test_smells.py` |
|
||||
| Print statements in tests | 3 | 0 | `test_test_smells.py` |
|
||||
| Skipped tests without reason | 0 | 0 | `test_test_smells.py` |
|
||||
| Exception handling (try/except) | 3 | 0 | `test_test_smells.py` |
|
||||
| Magic numbers in assertions | 25 | 10 | `test_test_smells.py` |
|
||||
| Duplicate test names | 5 | 0 | `test_test_smells.py` |
|
||||
| Long test methods (>50 lines) | 3 | 0 | `test_test_smells.py` |
|
||||
| unittest-style assertions | 0 | 0 | `test_test_smells.py` |
|
||||
| Fixtures without type hints | 5 | 0 | `test_test_smells.py` |
|
||||
| Unused fixture parameters | 3 | 0 | `test_test_smells.py` |
|
||||
| pytest.raises without match= | 20 | 0 | `test_test_smells.py` |
|
||||
| Cross-file fixture duplicates | 0 | 0 | `test_test_smells.py` |
|
||||
|
||||
**Reduction schedule**:
|
||||
- After each sprint, reduce non-zero thresholds by 20% (rounded down)
|
||||
- Goal: All thresholds at target values by Sprint 6
|
||||
|
||||
### Docstring Requirements
|
||||
|
||||
- Write imperatively with proper punctuation
|
||||
- All public functions, classes, modules documented
|
||||
- Document complex business rules and edge cases
|
||||
|
||||
---
|
||||
|
||||
## TypeScript/React Standards (`client/src/`)
|
||||
|
||||
### Type Safety
|
||||
|
||||
| Rule | Max Allowed | File |
|
||||
|------|-------------|------|
|
||||
| `any` type usage | 10 | `code-quality.test.ts` |
|
||||
| Unsafe type assertions (`as any/unknown/never`) | 5 | `code-quality.test.ts` |
|
||||
| TypeScript suppressions (@ts-ignore) | 3 | `code-quality.test.ts` |
|
||||
|
||||
### Code Quality
|
||||
|
||||
| Rule | Max Allowed | Description |
|
||||
|------|-------------|-------------|
|
||||
| Repeated string literals | 5 | Same string in multiple files |
|
||||
| Complex JSX patterns | 10 | Repeated component structures |
|
||||
| Scattered helper functions | 2 | format/parse/convert scattered |
|
||||
| TODO/FIXME comments | 15 | Unaddressed tech debt |
|
||||
| Commented-out code | 10 | Stale code blocks |
|
||||
| Trivial wrapper components | 3 | Components that just spread props |
|
||||
| Magic numbers (>3 digits) | 5 | Use named constants |
|
||||
| Hardcoded colors in JSX | 3 | Use theme/CSS variables |
|
||||
| Hardcoded API endpoints | 0 | Use config |
|
||||
| Long files (>500 lines) | 3 | Split into modules |
|
||||
| Complex inline styles | 5 | Use CSS/Tailwind |
|
||||
| Deeply nested ternaries | 0 | Use if/switch |
|
||||
| Excessive prop spreading | 2 | Consider context |
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
- Components: PascalCase (`RecordingPanel`, not `recordingPanel`)
|
||||
- Hooks: `use` prefix (`useAudioLevel`)
|
||||
- Utils: camelCase (`formatDuration`)
|
||||
- Constants: SCREAMING_SNAKE_CASE (`MAX_RETRIES`)
|
||||
|
||||
---
|
||||
|
||||
## Rust/Tauri Standards (`client/src-tauri/src/`)
|
||||
|
||||
### Code Quality Checks
|
||||
|
||||
| Check | Threshold | Description |
|
||||
|-------|-----------|-------------|
|
||||
| Magic numbers | Warning | Numbers >100 not in const |
|
||||
| Repeated strings | >3 occurrences | Extract to constants |
|
||||
| TODO/FIXME comments | >10 | Address or remove |
|
||||
| Long functions | >100 lines | Split into helpers |
|
||||
| Deep nesting | >5 levels (20 spaces) | Flatten control flow |
|
||||
| unwrap() calls | >20 | Use ? or expect() |
|
||||
| clone() per file | >10 | Review ownership |
|
||||
| Parameters | >5 | Use struct/builder |
|
||||
| Duplicate error messages | >2 | Use error enum |
|
||||
| File size | >500 lines | Split module |
|
||||
|
||||
### Clippy Enforcement
|
||||
|
||||
```bash
|
||||
cargo clippy -- -W unused_imports -W dead_code
|
||||
```
|
||||
|
||||
Must pass with zero warnings for:
|
||||
- Unused imports
|
||||
- Dead code
|
||||
- Missing docs on public items
|
||||
|
||||
---
|
||||
|
||||
## Pre-Commit Checklist
|
||||
|
||||
Before any PR:
|
||||
|
||||
```markdown
|
||||
## Python
|
||||
- [ ] `pytest tests/quality/` passes
|
||||
- [ ] `ruff check --fix .` run
|
||||
- [ ] `mypy src/noteflow` clean
|
||||
- [ ] No `# type: ignore` without comment
|
||||
- [ ] Docstrings on all new public functions
|
||||
|
||||
## TypeScript/React
|
||||
- [ ] `npm run test:quality` passes
|
||||
- [ ] `npm run lint` clean
|
||||
- [ ] No `any` types added
|
||||
- [ ] Components use PascalCase
|
||||
|
||||
## Rust
|
||||
- [ ] `npm run quality:rs` passes
|
||||
- [ ] `cargo clippy` clean
|
||||
- [ ] No unwrap() in error paths
|
||||
- [ ] Error types documented
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Architecture Patterns
|
||||
|
||||
### Hexagonal Architecture (Python)
|
||||
|
||||
```
|
||||
domain/ → Entities, value objects, ports (interfaces)
|
||||
application/ → Use cases, services, orchestration
|
||||
infrastructure/ → Implementations, adapters, external services
|
||||
grpc/ → Transport layer, proto definitions
|
||||
```
|
||||
|
||||
### File Organization
|
||||
|
||||
| Layer | Pattern | Example |
|
||||
|-------|---------|---------|
|
||||
| Domain | `entities/meeting.py`, `ports/repository.py` | Pure business logic |
|
||||
| Application | `services/meeting_service.py` | Orchestrates domain |
|
||||
| Infrastructure | `persistence/repositories/meeting_repo.py` | Implements ports |
|
||||
| gRPC | `_mixins/meeting.py` | Transport handlers |
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
| Type | Convention | Example |
|
||||
|------|------------|---------|
|
||||
| Domain entity | Singular noun | `Meeting`, `Segment` |
|
||||
| Service | NounService | `MeetingService`, `SummarizationService` |
|
||||
| Repository | NounRepository | `MeetingRepository` |
|
||||
| Port | NounPort (Protocol) | `SummarizationPort` |
|
||||
| Mixin | NounMixin | `StreamingMixin` |
|
||||
| Factory | create_noun() | `create_summarization_service()` |
|
||||
|
||||
---
|
||||
|
||||
## Testable Code Patterns
|
||||
|
||||
### Protocol-Based Dependency Injection
|
||||
|
||||
All services MUST use **constructor injection** with **Protocol-based abstractions** for testability.
|
||||
|
||||
**References**:
|
||||
- [ArjanCodes: Python DI Best Practices](https://arjancodes.com/blog/python-dependency-injection-best-practices/)
|
||||
- [Real Python: SOLID Principles](https://realpython.com/solid-principles-python/)
|
||||
|
||||
### Key Principles
|
||||
|
||||
| Principle | Description | Example |
|
||||
|-----------|-------------|---------|
|
||||
| Constructor injection | All dependencies passed via `__init__` | `Service(repo: RepositoryPort)` |
|
||||
| Protocol abstractions | Use `typing.Protocol` for interfaces | `class RepositoryPort(Protocol)` |
|
||||
| Factory functions | Create configured instances | `create_service() -> Service` |
|
||||
| No global state | Avoid singletons and module-level state | Use DI instead of `get_instance()` |
|
||||
|
||||
### Pattern: Service with Protocol Dependencies
|
||||
|
||||
```python
|
||||
from typing import Protocol
|
||||
|
||||
# 1. Define port (interface) in domain layer
|
||||
class NerPort(Protocol):
|
||||
"""Port for NER operations."""
|
||||
|
||||
def extract(self, text: str) -> list[NamedEntity]:
|
||||
"""Extract named entities from text."""
|
||||
...
|
||||
|
||||
|
||||
# 2. Application service depends on protocol (not concrete impl)
|
||||
class NerService:
|
||||
"""Application service for NER operations."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ner_engine: NerPort, # Protocol, not SpacyNerEngine
|
||||
uow_factory: Callable[[], UnitOfWork],
|
||||
) -> None:
|
||||
self._ner_engine = ner_engine
|
||||
self._uow_factory = uow_factory
|
||||
|
||||
async def extract_entities(self, meeting_id: MeetingId) -> list[NamedEntity]:
|
||||
"""Extract entities from meeting transcript."""
|
||||
async with self._uow_factory() as uow:
|
||||
meeting = await uow.meetings.get(meeting_id)
|
||||
return self._ner_engine.extract(meeting.transcript)
|
||||
|
||||
|
||||
# 3. Infrastructure implements the protocol
|
||||
class SpacyNerEngine:
|
||||
"""spaCy implementation of NerPort."""
|
||||
|
||||
def __init__(self, model_name: str = "en_core_web_sm") -> None:
|
||||
self._nlp = spacy.load(model_name)
|
||||
|
||||
def extract(self, text: str) -> list[NamedEntity]:
|
||||
"""Extract entities using spaCy."""
|
||||
doc = self._nlp(text)
|
||||
return [NamedEntity.from_spacy(ent) for ent in doc.ents]
|
||||
|
||||
|
||||
# 4. Factory function wires dependencies
|
||||
def create_ner_service(
|
||||
model_name: str = "en_core_web_sm",
|
||||
uow_factory: Callable[[], UnitOfWork] | None = None,
|
||||
) -> NerService:
|
||||
"""Create NER service with dependencies."""
|
||||
engine = SpacyNerEngine(model_name)
|
||||
factory = uow_factory or SQLAlchemyUnitOfWork
|
||||
return NerService(engine, factory)
|
||||
```
|
||||
|
||||
### Testing with Mock Protocols
|
||||
|
||||
```python
|
||||
@pytest.fixture
|
||||
def mock_ner_engine() -> MagicMock:
|
||||
"""Create mock NER engine implementing NerPort."""
|
||||
engine = MagicMock(spec=NerPort)
|
||||
engine.extract.return_value = [
|
||||
NamedEntity.create("Test Person", EntityCategory.PERSON, [1], 0.9),
|
||||
]
|
||||
return engine
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ner_service(mock_ner_engine: MagicMock, mock_uow_factory: Callable) -> NerService:
|
||||
"""Create NER service with mock dependencies."""
|
||||
return NerService(mock_ner_engine, mock_uow_factory)
|
||||
|
||||
|
||||
def test_extract_entities_calls_engine(
|
||||
ner_service: NerService,
|
||||
mock_ner_engine: MagicMock,
|
||||
) -> None:
|
||||
"""Extraction delegates to NER engine."""
|
||||
await ner_service.extract_entities(MeetingId(uuid4()))
|
||||
|
||||
mock_ner_engine.extract.assert_called_once()
|
||||
```
|
||||
|
||||
### Anti-Patterns (AVOID)
|
||||
|
||||
```python
|
||||
# ❌ WRONG: Direct instantiation in service
|
||||
class BadService:
|
||||
def __init__(self) -> None:
|
||||
self._engine = SpacyNerEngine() # Untestable!
|
||||
|
||||
# ❌ WRONG: Module-level singleton
|
||||
_engine = SpacyNerEngine() # Global state!
|
||||
|
||||
def get_engine() -> SpacyNerEngine:
|
||||
return _engine
|
||||
|
||||
# ❌ WRONG: Concrete type dependency
|
||||
class BadService:
|
||||
def __init__(self, engine: SpacyNerEngine) -> None: # Concrete, not Protocol!
|
||||
self._engine = engine
|
||||
```
|
||||
|
||||
### Modern Library Recommendations
|
||||
|
||||
| Category | Library | Rationale |
|
||||
|----------|---------|-----------|
|
||||
| OAuth 2.0 | **Authlib** | Built-in PKCE, async support, handles edge cases |
|
||||
| HTTP Client | **httpx** | Modern async, compatible with Authlib |
|
||||
| NER | **spaCy** or **GLiNER** | spaCy for production, GLiNER for zero-shot |
|
||||
| Validation | **Pydantic** | Already used in project |
|
||||
| Testing | **pytest** | With `pytest.mark.parametrize` |
|
||||
|
||||
---
|
||||
|
||||
## Test Patterns
|
||||
|
||||
### CRITICAL: No Conditionals in Tests
|
||||
|
||||
**FORBIDDEN in test code:**
|
||||
- `if`/`else` statements with assertions
|
||||
- `for` loops with assertions
|
||||
- `while` loops
|
||||
- Conditional logic that determines test behavior
|
||||
|
||||
**USE INSTEAD: `pytest.mark.parametrize`**
|
||||
|
||||
```python
|
||||
# ❌ WRONG: Conditional test logic
|
||||
def test_entity_extraction(engine: NerEngine) -> None:
|
||||
for text, expected in test_cases: # FORBIDDEN
|
||||
entities = engine.extract(text)
|
||||
if expected: # FORBIDDEN
|
||||
assert entities
|
||||
|
||||
# ✅ CORRECT: Parametrized tests
|
||||
@pytest.mark.parametrize(
|
||||
("text", "expected_category"),
|
||||
[
|
||||
pytest.param("John Smith", EntityCategory.PERSON, id="person"),
|
||||
pytest.param("Google", EntityCategory.COMPANY, id="company"),
|
||||
pytest.param("New York", EntityCategory.LOCATION, id="location"),
|
||||
],
|
||||
)
|
||||
def test_entity_extraction(
|
||||
engine: NerEngine,
|
||||
text: str,
|
||||
expected_category: EntityCategory,
|
||||
) -> None:
|
||||
"""Extract entity of expected category."""
|
||||
entities = engine.extract(text)
|
||||
matching = [e for e in entities if e.category == expected_category]
|
||||
assert matching, f"Expected {expected_category.value} in: {text}"
|
||||
```
|
||||
|
||||
### Parametrization Best Practices
|
||||
|
||||
```python
|
||||
# Use pytest.param with descriptive IDs
|
||||
@pytest.mark.parametrize(
|
||||
("input_value", "expected_output", "description"),
|
||||
[
|
||||
pytest.param("", [], id="empty-input"),
|
||||
pytest.param("hello", ["hello"], id="single-word"),
|
||||
pytest.param("a b c", ["a", "b", "c"], id="multiple-words"),
|
||||
],
|
||||
)
|
||||
def test_tokenize(input_value: str, expected_output: list[str], description: str) -> None:
|
||||
"""Tokenize input produces expected tokens."""
|
||||
assert tokenize(input_value) == expected_output
|
||||
|
||||
# Class-based organization for related tests
|
||||
class TestAuthorizationUrl:
|
||||
"""Test authorization URL generation."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("provider", "expected_host"),
|
||||
[
|
||||
pytest.param(OAuthProvider.GOOGLE, "accounts.google.com", id="google"),
|
||||
pytest.param(OAuthProvider.MICROSOFT, "login.microsoftonline.com", id="microsoft"),
|
||||
],
|
||||
)
|
||||
def test_generates_valid_url(self, provider: OAuthProvider, expected_host: str) -> None:
|
||||
"""Generate URL for each provider."""
|
||||
url = generate_auth_url(provider)
|
||||
assert expected_host in url
|
||||
```
|
||||
|
||||
### Fixture Scoping for Performance
|
||||
|
||||
```python
|
||||
# Module-scoped for expensive operations (model loading, DB setup)
|
||||
@pytest.fixture(scope="module")
|
||||
def ner_engine() -> SpacyNerEngine:
|
||||
"""Load spaCy model once per test module."""
|
||||
return SpacyNerEngine("en_core_web_sm")
|
||||
|
||||
# Function-scoped for mutable state
|
||||
@pytest.fixture
|
||||
def mock_uow() -> AsyncMock:
|
||||
"""Fresh mock for each test."""
|
||||
return AsyncMock(spec=UnitOfWork)
|
||||
```
|
||||
|
||||
### Required Test Elements
|
||||
|
||||
1. **Type hints** on fixtures and test functions
|
||||
2. **Docstring** explaining what's being tested
|
||||
3. **AAA pattern** (Arrange/Act/Assert) with comments
|
||||
4. **Specific assertions** with messages for complex checks
|
||||
5. **pytest.raises with match=** for exception tests
|
||||
6. **`pytest.param` with IDs** for parametrized tests
|
||||
7. **No conditionals or loops** around assertions
|
||||
|
||||
---
|
||||
|
||||
## Code Reuse Checklist
|
||||
|
||||
Before creating new code, check:
|
||||
|
||||
| Location | Contains |
|
||||
|----------|----------|
|
||||
| `domain/entities/` | Existing entity types |
|
||||
| `domain/ports/` | Existing port interfaces |
|
||||
| `infrastructure/converters/` | Entity ↔ ORM converters |
|
||||
| `grpc/_mixins/converters.py` | Proto ↔ Domain converters |
|
||||
| `infrastructure/*/protocols.py` | Infrastructure interfaces |
|
||||
| `application/services/` | Existing service patterns |
|
||||
|
||||
### Shared Utilities
|
||||
|
||||
| File | Functions |
|
||||
|------|-----------|
|
||||
| `infrastructure/export/_formatting.py` | `format_timestamp()`, `format_datetime()` |
|
||||
| `infrastructure/security/keystore.py` | `_generate_key()`, `_decode_and_validate_key()` |
|
||||
| `infrastructure/summarization/_parsing.py` | `build_transcript_prompt()`, `parse_llm_response()` |
|
||||
| `infrastructure/diarization/assigner.py` | `assign_speaker()`, `assign_speakers_batch()` |
|
||||
|
||||
---
|
||||
|
||||
## Documentation Requirements
|
||||
|
||||
Every new feature must include:
|
||||
|
||||
1. **Unit tests** covering core logic
|
||||
2. **Integration tests** for end-to-end flow (where applicable)
|
||||
3. **Docstrings** on all public APIs
|
||||
4. **CLAUDE.md updates** if architectural patterns change
|
||||
5. **Proto changes** documented in commit message
|
||||
|
||||
---
|
||||
|
||||
## Shared Test Fixtures
|
||||
|
||||
### Available Fixtures (`tests/conftest.py`)
|
||||
|
||||
**DO NOT redefine these fixtures in test files**. Use them from conftest.py.
|
||||
|
||||
| Fixture | Type | Scope | Description |
|
||||
|---------|------|-------|-------------|
|
||||
| `crypto` | `CryptoService` | function | Encryption service for test data |
|
||||
| `meetings_dir` | `Path` | function | Temporary directory for meeting assets |
|
||||
| `mock_uow` | `AsyncMock` | function | Mock Unit of Work with all repositories |
|
||||
| `mock_uow_factory` | `type` | function | Factory that returns mock_uow |
|
||||
| `temp_db` | `Engine` | session | Temporary SQLite database |
|
||||
| `async_session` | `AsyncSession` | function | Async SQLAlchemy session |
|
||||
| `grpc_server` | `NoteFlowServicer` | function | Test gRPC server instance |
|
||||
| `grpc_client` | `NoteFlowClient` | function | Test gRPC client |
|
||||
| `sample_meeting` | `Meeting` | function | Pre-populated meeting entity |
|
||||
| `sample_segments` | `list[Segment]` | function | Sample transcript segments |
|
||||
| `mock_ner_engine` | `MagicMock` | function | Mock NER engine |
|
||||
| `mock_oauth_manager` | `MagicMock` | function | Mock OAuth manager |
|
||||
| `mock_calendar_settings` | `CalendarSettings` | function | Calendar settings with test OAuth creds |
|
||||
|
||||
### Usage Pattern
|
||||
|
||||
```python
|
||||
# CORRECT: Use shared fixtures
|
||||
def test_meeting_creation(mock_uow: AsyncMock, sample_meeting: Meeting) -> None:
|
||||
"""Create meeting uses repository correctly."""
|
||||
mock_uow.meetings.save.return_value = None
|
||||
# ... test logic
|
||||
|
||||
# INCORRECT: Do not redefine fixtures
|
||||
@pytest.fixture
|
||||
def mock_uow(): # DON'T DO THIS - use conftest.py fixture
|
||||
return AsyncMock()
|
||||
```
|
||||
|
||||
### Cross-File Fixture Detection
|
||||
|
||||
The `test_test_smells.py` quality check detects when fixtures are redefined:
|
||||
|
||||
```python
|
||||
# Fails quality check - fixture "mock_uow" already in conftest.py
|
||||
@pytest.fixture
|
||||
def mock_uow():
|
||||
...
|
||||
```
|
||||
|
||||
Move new shared fixtures to `tests/conftest.py` to avoid duplication.
|
||||
|
||||
### Adding New Shared Fixtures
|
||||
|
||||
When adding a fixture that could be reused:
|
||||
|
||||
1. Check if a similar fixture exists in `tests/conftest.py`
|
||||
2. If not, add it to `tests/conftest.py` with:
|
||||
- Type annotation on the return
|
||||
- Docstring explaining the fixture
|
||||
- Appropriate scope (`function`, `class`, `module`, `session`)
|
||||
|
||||
```python
|
||||
@pytest.fixture
|
||||
def new_shared_fixture() -> SomeType:
|
||||
"""Provide X for Y tests.
|
||||
|
||||
Returns:
|
||||
Configured SomeType instance.
|
||||
"""
|
||||
return SomeType(...)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Sprint-Specific Quality Requirements
|
||||
|
||||
Each sprint must:
|
||||
|
||||
1. **Not increase** any quality threshold violations
|
||||
2. **Reduce** at least one threshold toward target
|
||||
3. **Add fixtures** to conftest.py (not test files)
|
||||
4. **Run quality suite** before PR:
|
||||
```bash
|
||||
pytest tests/quality/ -v
|
||||
```
|
||||
5. **Document** any threshold exceptions in PR description
|
||||
989
docs/sprints/phase-0-foundation/sprint-0-proto-schema/README.md
Normal file
989
docs/sprints/phase-0-foundation/sprint-0-proto-schema/README.md
Normal file
@@ -0,0 +1,989 @@
|
||||
# Sprint 0: Proto & Schema Foundation
|
||||
|
||||
> **Priority**: 0 | **Owner**: Backend | **Complexity**: Medium | **Prerequisite for all other sprints**
|
||||
|
||||
---
|
||||
|
||||
## Objective
|
||||
|
||||
Consolidate all protobuf schema changes and database migrations required by Sprints 1-6 into a single coordinated release. This prevents proto conflicts, ensures backward compatibility, and establishes the persistence foundation for all features.
|
||||
|
||||
---
|
||||
|
||||
## Rationale
|
||||
|
||||
Multiple sprints modify shared infrastructure:
|
||||
|
||||
| Sprint | Proto Changes | DB Changes |
|
||||
|--------|---------------|------------|
|
||||
| 1 (AI Templates) | `SummarizationOptions` message | None |
|
||||
| 3 (PDF Export) | `EXPORT_FORMAT_PDF` enum | None |
|
||||
| 4 (NER) | `ExtractEntities` RPC + messages | `named_entities` table |
|
||||
| 5 (Calendar) | `ListCalendarEvents` RPC + messages | Uses existing tables |
|
||||
| 6 (Webhooks) | None | `webhook_configs`, `webhook_deliveries` tables |
|
||||
|
||||
Without coordination:
|
||||
- Proto regeneration conflicts between parallel sprints
|
||||
- Migration ordering issues
|
||||
- Client/server version mismatches
|
||||
|
||||
---
|
||||
|
||||
## Phased Implementation
|
||||
|
||||
Sprint 0 is split into four sub-increments to enable independent verification and reduce blast radius:
|
||||
|
||||
| Increment | Scope | Verification Gate |
|
||||
|-----------|-------|-------------------|
|
||||
| **0a** | Proto schema + stub regeneration | `python -c "from noteflow.grpc.proto import noteflow_pb2"` |
|
||||
| **0b** | Database schema (schema.sql) | `psql -f docker/db/schema.sql` on fresh DB |
|
||||
| **0c** | Alembic migrations | `alembic upgrade head && alembic downgrade -1` |
|
||||
| **0d** | Dependencies + Docker + Feature flags | `pip install -e ".[all]" && pytest tests/` |
|
||||
|
||||
### Increment 0a: Proto Schema
|
||||
|
||||
**Files**: `noteflow.proto`, `*_pb2.py`, `*_pb2_grpc.py`, `*_pb2.pyi`
|
||||
|
||||
**Tasks**: Task 1, Task 7, Task 9
|
||||
|
||||
**Done when**:
|
||||
- [ ] Proto compiles without errors
|
||||
- [ ] Python stubs import cleanly
|
||||
- [ ] Rust/TS stubs generate via `client/build.rs`
|
||||
- [ ] PROTO_CHANGELOG.md committed
|
||||
|
||||
### Increment 0b: Database Schema
|
||||
|
||||
**Files**: `docker/db/schema.sql`
|
||||
|
||||
**Tasks**: Task 2
|
||||
|
||||
**Done when**:
|
||||
- [ ] Schema applies to fresh PostgreSQL
|
||||
- [ ] All tables have proper indexes
|
||||
- [ ] Foreign key constraints validated
|
||||
- [ ] Triggers for `updated_at` in place
|
||||
|
||||
### Increment 0c: Alembic Migrations
|
||||
|
||||
**Files**: `migrations/versions/001_*.py`, `migrations/versions/002_*.py`
|
||||
|
||||
**Tasks**: Task 3
|
||||
|
||||
**Done when**:
|
||||
- [ ] Migrations apply to existing database
|
||||
- [ ] Downgrade path works for each migration
|
||||
- [ ] Schema matches schema.sql output
|
||||
|
||||
### Increment 0d: Dependencies and Docker
|
||||
|
||||
**Files**: `pyproject.toml`, `Dockerfile`, `docker-compose.yml`, `settings.py`, `cli/models.py`
|
||||
|
||||
**Tasks**: Task 4, Task 5, Task 6, Task 8
|
||||
|
||||
**Done when**:
|
||||
- [ ] All optional dependencies install
|
||||
- [ ] Feature flags control availability
|
||||
- [ ] Model download CLI works
|
||||
- [ ] Docker build completes with NER support
|
||||
|
||||
---
|
||||
|
||||
## Target/Affected Code
|
||||
|
||||
### Files to Modify
|
||||
|
||||
| File | Change Type |
|
||||
|------|-------------|
|
||||
| `src/noteflow/grpc/proto/noteflow.proto` | All proto additions |
|
||||
| `src/noteflow/grpc/proto/noteflow_pb2.py` | Regenerated |
|
||||
| `src/noteflow/grpc/proto/noteflow_pb2_grpc.py` | Regenerated |
|
||||
| `src/noteflow/grpc/proto/noteflow_pb2.pyi` | Regenerated |
|
||||
| `docker/db/schema.sql` | All table additions |
|
||||
| `pyproject.toml` | All new dependencies |
|
||||
| `client/src-tauri/build.rs` | Proto path verification |
|
||||
|
||||
### Files to Create
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/noteflow/infrastructure/persistence/migrations/versions/001_add_named_entities.py` | NER tables |
|
||||
| `src/noteflow/infrastructure/persistence/migrations/versions/002_add_webhooks.py` | Webhook tables |
|
||||
| `docs/sprints/phase-0-foundation/PROTO_CHANGELOG.md` | Proto version history |
|
||||
|
||||
---
|
||||
|
||||
## Implementation Tasks
|
||||
|
||||
### Task 1: Proto Schema Consolidation
|
||||
|
||||
**File**: `src/noteflow/grpc/proto/noteflow.proto`
|
||||
|
||||
Add all new messages and RPCs in a single commit:
|
||||
|
||||
```protobuf
|
||||
// =============================================================================
|
||||
// Sprint 0: Consolidated Proto Changes
|
||||
// Version: 2.0.0
|
||||
// Date: 2025-XX-XX
|
||||
// =============================================================================
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Sprint 1: AI Templates
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Summarization style options passed from frontend settings
|
||||
message SummarizationOptions {
|
||||
// Tone: professional, casual, technical, friendly
|
||||
string tone = 1;
|
||||
|
||||
// Format: bullet_points, narrative, structured, concise
|
||||
string format = 2;
|
||||
|
||||
// Verbosity: minimal, balanced, detailed, comprehensive
|
||||
string verbosity = 3;
|
||||
}
|
||||
|
||||
// Modify existing GenerateSummaryRequest (add field 3)
|
||||
// message GenerateSummaryRequest {
|
||||
// string meeting_id = 1;
|
||||
// bool force_regenerate = 2;
|
||||
// SummarizationOptions options = 3; // NEW
|
||||
// }
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Sprint 3: PDF Export
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Add to existing ExportFormat enum
|
||||
// enum ExportFormat {
|
||||
// EXPORT_FORMAT_UNSPECIFIED = 0;
|
||||
// EXPORT_FORMAT_MARKDOWN = 1;
|
||||
// EXPORT_FORMAT_HTML = 2;
|
||||
// EXPORT_FORMAT_PDF = 3; // NEW
|
||||
// }
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Sprint 4: Named Entity Extraction
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Add to service definition
|
||||
// rpc ExtractEntities(ExtractEntitiesRequest) returns (ExtractEntitiesResponse);
|
||||
|
||||
message ExtractEntitiesRequest {
|
||||
string meeting_id = 1;
|
||||
bool force_refresh = 2; // Re-extract even if entities exist
|
||||
}
|
||||
|
||||
message ExtractedEntity {
|
||||
string id = 1;
|
||||
string text = 2;
|
||||
// Category: person, company, product, technical, acronym, location, date, other
|
||||
string category = 3;
|
||||
repeated int32 segment_ids = 4;
|
||||
float confidence = 5;
|
||||
bool is_pinned = 6; // User-confirmed
|
||||
}
|
||||
|
||||
message ExtractEntitiesResponse {
|
||||
repeated ExtractedEntity entities = 1;
|
||||
int32 total_count = 2;
|
||||
bool cached = 3; // True if returning cached results
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Sprint 5: Calendar Sync
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Add to service definition
|
||||
// rpc ListCalendarEvents(ListCalendarEventsRequest) returns (ListCalendarEventsResponse);
|
||||
// rpc GetCalendarProviders(GetCalendarProvidersRequest) returns (GetCalendarProvidersResponse);
|
||||
// rpc InitiateCalendarAuth(InitiateCalendarAuthRequest) returns (InitiateCalendarAuthResponse);
|
||||
// rpc CompleteCalendarAuth(CompleteCalendarAuthRequest) returns (CompleteCalendarAuthResponse);
|
||||
|
||||
message CalendarEvent {
|
||||
string id = 1;
|
||||
string title = 2;
|
||||
int64 start_time = 3; // Unix timestamp (seconds)
|
||||
int64 end_time = 4; // Unix timestamp (seconds)
|
||||
repeated string attendees = 5;
|
||||
string location = 6;
|
||||
string description = 7;
|
||||
string meeting_url = 8;
|
||||
bool is_recurring = 9;
|
||||
string provider = 10; // google, outlook
|
||||
}
|
||||
|
||||
message ListCalendarEventsRequest {
|
||||
int32 hours_ahead = 1; // How far ahead to look (default: 24)
|
||||
int32 limit = 2; // Max events to return (default: 10)
|
||||
string provider = 3; // Optional: specific provider name
|
||||
}
|
||||
|
||||
message ListCalendarEventsResponse {
|
||||
repeated CalendarEvent events = 1;
|
||||
int32 total_count = 2;
|
||||
}
|
||||
|
||||
message GetCalendarProvidersRequest {}
|
||||
|
||||
message CalendarProvider {
|
||||
string name = 1;
|
||||
bool is_authenticated = 2;
|
||||
string display_name = 3; // "Google Calendar", "Microsoft Outlook"
|
||||
}
|
||||
|
||||
message GetCalendarProvidersResponse {
|
||||
repeated CalendarProvider providers = 1;
|
||||
}
|
||||
|
||||
// OAuth flow messages
|
||||
message InitiateCalendarAuthRequest {
|
||||
string provider = 1; // google, outlook
|
||||
string redirect_uri = 2; // Where to redirect after auth
|
||||
}
|
||||
|
||||
message InitiateCalendarAuthResponse {
|
||||
string auth_url = 1; // URL to redirect user to
|
||||
string state = 2; // CSRF token to verify callback
|
||||
}
|
||||
|
||||
message CompleteCalendarAuthRequest {
|
||||
string provider = 1;
|
||||
string code = 2; // Authorization code from OAuth callback
|
||||
string state = 3; // CSRF token for verification
|
||||
}
|
||||
|
||||
message CompleteCalendarAuthResponse {
|
||||
bool success = 1;
|
||||
string error_message = 2;
|
||||
string provider_email = 3; // Email of authenticated account
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Database Schema Additions
|
||||
|
||||
**File**: `docker/db/schema.sql`
|
||||
|
||||
Add after existing tables (preserve insertion order for foreign keys):
|
||||
|
||||
```sql
|
||||
--------------------------------------------------------------------------------
|
||||
-- Sprint 4: Named Entities
|
||||
--------------------------------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS noteflow.named_entities (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
meeting_id uuid NOT NULL REFERENCES noteflow.meetings(id) ON DELETE CASCADE,
|
||||
text text NOT NULL,
|
||||
normalized_text text NOT NULL, -- Lowercase, trimmed for deduplication
|
||||
category varchar(50) NOT NULL, -- person, company, product, location, etc.
|
||||
segment_ids integer[] NOT NULL DEFAULT '{}'::integer[],
|
||||
confidence double precision NOT NULL DEFAULT 0.0,
|
||||
is_pinned boolean NOT NULL DEFAULT false,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
|
||||
-- Unique constraint for deduplication within a meeting
|
||||
CONSTRAINT uq_named_entities_meeting_text UNIQUE (meeting_id, normalized_text)
|
||||
);
|
||||
|
||||
CREATE TRIGGER trg_named_entities_updated_at
|
||||
BEFORE UPDATE ON noteflow.named_entities
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_named_entities_meeting_id
|
||||
ON noteflow.named_entities(meeting_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_named_entities_category
|
||||
ON noteflow.named_entities(category);
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Sprint 6: Webhooks
|
||||
--------------------------------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS noteflow.webhook_configs (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
workspace_id uuid NOT NULL REFERENCES noteflow.workspaces(id) ON DELETE CASCADE,
|
||||
name varchar(255) NOT NULL DEFAULT 'Webhook',
|
||||
url text NOT NULL,
|
||||
events text[] NOT NULL DEFAULT '{}'::text[],
|
||||
secret text NULL, -- HMAC signing secret
|
||||
enabled boolean NOT NULL DEFAULT true,
|
||||
timeout_ms integer NOT NULL DEFAULT 10000,
|
||||
max_retries integer NOT NULL DEFAULT 3,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
|
||||
-- Validate URL format
|
||||
CONSTRAINT chk_webhook_url_format CHECK (url ~ '^https?://')
|
||||
);
|
||||
|
||||
CREATE TRIGGER trg_webhook_configs_updated_at
|
||||
BEFORE UPDATE ON noteflow.webhook_configs
|
||||
FOR EACH ROW EXECUTE FUNCTION noteflow.set_updated_at();
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_webhook_configs_workspace_id
|
||||
ON noteflow.webhook_configs(workspace_id);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS noteflow.webhook_deliveries (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
webhook_id uuid NOT NULL REFERENCES noteflow.webhook_configs(id) ON DELETE CASCADE,
|
||||
event_type text NOT NULL,
|
||||
payload jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
status_code integer NULL,
|
||||
response_body text NULL, -- First 1KB of response for debugging
|
||||
error_message text NULL,
|
||||
attempt_count integer NOT NULL DEFAULT 1,
|
||||
duration_ms integer NULL, -- Request duration for monitoring
|
||||
delivered_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_webhook_id
|
||||
ON noteflow.webhook_deliveries(webhook_id, delivered_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_event_type
|
||||
ON noteflow.webhook_deliveries(event_type, delivered_at DESC);
|
||||
|
||||
-- Partition by month for large deployments (optional)
|
||||
-- CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_delivered_at
|
||||
-- ON noteflow.webhook_deliveries(delivered_at);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Alembic Migrations
|
||||
|
||||
**File**: `src/noteflow/infrastructure/persistence/migrations/versions/001_add_named_entities.py`
|
||||
|
||||
```python
|
||||
"""Add named_entities table.
|
||||
|
||||
Revision ID: 001_named_entities
|
||||
Revises: <previous_revision>
|
||||
Create Date: 2025-XX-XX
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision = "001_named_entities"
|
||||
down_revision = "<previous_revision>"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Create named_entities table."""
|
||||
op.create_table(
|
||||
"named_entities",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column(
|
||||
"meeting_id",
|
||||
postgresql.UUID(as_uuid=True),
|
||||
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("text", sa.Text(), nullable=False),
|
||||
sa.Column("normalized_text", sa.Text(), nullable=False),
|
||||
sa.Column("category", sa.String(50), nullable=False),
|
||||
sa.Column(
|
||||
"segment_ids",
|
||||
postgresql.ARRAY(sa.Integer()),
|
||||
nullable=False,
|
||||
server_default="{}",
|
||||
),
|
||||
sa.Column(
|
||||
"confidence",
|
||||
sa.Float(),
|
||||
nullable=False,
|
||||
server_default="0.0",
|
||||
),
|
||||
sa.Column(
|
||||
"is_pinned",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default="false",
|
||||
),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
sa.UniqueConstraint(
|
||||
"meeting_id",
|
||||
"normalized_text",
|
||||
name="uq_named_entities_meeting_text",
|
||||
),
|
||||
schema="noteflow",
|
||||
)
|
||||
|
||||
op.create_index(
|
||||
"idx_named_entities_meeting_id",
|
||||
"named_entities",
|
||||
["meeting_id"],
|
||||
schema="noteflow",
|
||||
)
|
||||
|
||||
op.create_index(
|
||||
"idx_named_entities_category",
|
||||
"named_entities",
|
||||
["category"],
|
||||
schema="noteflow",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Drop named_entities table."""
|
||||
op.drop_index("idx_named_entities_category", schema="noteflow")
|
||||
op.drop_index("idx_named_entities_meeting_id", schema="noteflow")
|
||||
op.drop_table("named_entities", schema="noteflow")
|
||||
```
|
||||
|
||||
**File**: `src/noteflow/infrastructure/persistence/migrations/versions/002_add_webhooks.py`
|
||||
|
||||
```python
|
||||
"""Add webhook tables.
|
||||
|
||||
Revision ID: 002_webhooks
|
||||
Revises: 001_named_entities
|
||||
Create Date: 2025-XX-XX
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision = "002_webhooks"
|
||||
down_revision = "001_named_entities"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Create webhook tables."""
|
||||
# webhook_configs
|
||||
op.create_table(
|
||||
"webhook_configs",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column(
|
||||
"workspace_id",
|
||||
postgresql.UUID(as_uuid=True),
|
||||
sa.ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("name", sa.String(255), nullable=False, server_default="Webhook"),
|
||||
sa.Column("url", sa.Text(), nullable=False),
|
||||
sa.Column(
|
||||
"events",
|
||||
postgresql.ARRAY(sa.Text()),
|
||||
nullable=False,
|
||||
server_default="{}",
|
||||
),
|
||||
sa.Column("secret", sa.Text(), nullable=True),
|
||||
sa.Column("enabled", sa.Boolean(), nullable=False, server_default="true"),
|
||||
sa.Column("timeout_ms", sa.Integer(), nullable=False, server_default="10000"),
|
||||
sa.Column("max_retries", sa.Integer(), nullable=False, server_default="3"),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
sa.CheckConstraint("url ~ '^https?://'", name="chk_webhook_url_format"),
|
||||
schema="noteflow",
|
||||
)
|
||||
|
||||
op.create_index(
|
||||
"idx_webhook_configs_workspace_id",
|
||||
"webhook_configs",
|
||||
["workspace_id"],
|
||||
schema="noteflow",
|
||||
)
|
||||
|
||||
# webhook_deliveries
|
||||
op.create_table(
|
||||
"webhook_deliveries",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column(
|
||||
"webhook_id",
|
||||
postgresql.UUID(as_uuid=True),
|
||||
sa.ForeignKey("noteflow.webhook_configs.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("event_type", sa.Text(), nullable=False),
|
||||
sa.Column(
|
||||
"payload",
|
||||
postgresql.JSONB(),
|
||||
nullable=False,
|
||||
server_default="{}",
|
||||
),
|
||||
sa.Column("status_code", sa.Integer(), nullable=True),
|
||||
sa.Column("response_body", sa.Text(), nullable=True),
|
||||
sa.Column("error_message", sa.Text(), nullable=True),
|
||||
sa.Column("attempt_count", sa.Integer(), nullable=False, server_default="1"),
|
||||
sa.Column("duration_ms", sa.Integer(), nullable=True),
|
||||
sa.Column(
|
||||
"delivered_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
schema="noteflow",
|
||||
)
|
||||
|
||||
op.create_index(
|
||||
"idx_webhook_deliveries_webhook_id",
|
||||
"webhook_deliveries",
|
||||
["webhook_id", "delivered_at"],
|
||||
schema="noteflow",
|
||||
)
|
||||
|
||||
op.create_index(
|
||||
"idx_webhook_deliveries_event_type",
|
||||
"webhook_deliveries",
|
||||
["event_type", "delivered_at"],
|
||||
schema="noteflow",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Drop webhook tables."""
|
||||
op.drop_index("idx_webhook_deliveries_event_type", schema="noteflow")
|
||||
op.drop_index("idx_webhook_deliveries_webhook_id", schema="noteflow")
|
||||
op.drop_table("webhook_deliveries", schema="noteflow")
|
||||
|
||||
op.drop_index("idx_webhook_configs_workspace_id", schema="noteflow")
|
||||
op.drop_table("webhook_configs", schema="noteflow")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Dependency Consolidation
|
||||
|
||||
**File**: `pyproject.toml`
|
||||
|
||||
Add all new dependencies in a single update:
|
||||
|
||||
```toml
|
||||
[project]
|
||||
dependencies = [
|
||||
# ... existing dependencies ...
|
||||
|
||||
# Sprint 0: Consolidated new dependencies
|
||||
"httpx>=0.27", # HTTP client (webhooks, future integrations)
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
# PDF Export (Sprint 3)
|
||||
pdf = [
|
||||
"weasyprint>=62.0",
|
||||
]
|
||||
|
||||
# Named Entity Recognition (Sprint 4)
|
||||
ner = [
|
||||
"spacy>=3.7",
|
||||
]
|
||||
|
||||
# Calendar Integration (Sprint 5)
|
||||
calendar = [
|
||||
"google-api-python-client>=2.100",
|
||||
"google-auth>=2.23",
|
||||
"google-auth-oauthlib>=1.1",
|
||||
# Outlook support (future)
|
||||
# "msal>=1.24",
|
||||
]
|
||||
|
||||
# All optional features
|
||||
all = [
|
||||
"noteflow[pdf,ner,calendar]",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
# Model download helper
|
||||
noteflow-download-models = "noteflow.cli.models:download_all"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 5: Model Download CLI
|
||||
|
||||
**File**: `src/noteflow/cli/models.py`
|
||||
|
||||
```python
|
||||
"""CLI for downloading ML models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def download_spacy_model(model: str = "en_core_web_sm") -> None:
|
||||
"""Download spaCy model.
|
||||
|
||||
Args:
|
||||
model: Model name to download.
|
||||
"""
|
||||
print(f"Downloading spaCy model: {model}")
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "spacy", "download", model],
|
||||
check=True,
|
||||
)
|
||||
print(f"Successfully downloaded: {model}")
|
||||
|
||||
|
||||
def download_all() -> None:
|
||||
"""Download all required ML models."""
|
||||
print("Downloading all NoteFlow ML models...")
|
||||
|
||||
try:
|
||||
download_spacy_model("en_core_web_sm")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Failed to download spaCy model: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
print("\nAll models downloaded successfully!")
|
||||
print("You can now use NER features.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
download_all()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 6: Docker Integration
|
||||
|
||||
**File**: `Dockerfile` (additions)
|
||||
|
||||
```dockerfile
|
||||
# Stage: Download ML models (optional, for NER support)
|
||||
FROM python:3.12-slim AS models
|
||||
|
||||
# Install spacy and download model
|
||||
RUN pip install spacy>=3.7 && \
|
||||
python -m spacy download en_core_web_sm
|
||||
|
||||
# Stage: Runtime with models
|
||||
FROM noteflow-base AS runtime-with-ner
|
||||
|
||||
# Copy spaCy model from models stage
|
||||
COPY --from=models /usr/local/lib/python3.12/site-packages/en_core_web_sm \
|
||||
/usr/local/lib/python3.12/site-packages/en_core_web_sm
|
||||
|
||||
# Verify model is available
|
||||
RUN python -c "import spacy; spacy.load('en_core_web_sm')"
|
||||
```
|
||||
|
||||
**File**: `docker-compose.yml` (additions)
|
||||
|
||||
```yaml
|
||||
services:
|
||||
noteflow:
|
||||
build:
|
||||
context: .
|
||||
target: runtime-with-ner # Use runtime-with-ner for NER support
|
||||
environment:
|
||||
# Feature flags
|
||||
NOTEFLOW_FEATURE_NER_ENABLED: "true"
|
||||
NOTEFLOW_FEATURE_CALENDAR_ENABLED: "true"
|
||||
NOTEFLOW_FEATURE_WEBHOOKS_ENABLED: "true"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 7: Proto Regeneration Script
|
||||
|
||||
**File**: `scripts/regenerate_proto.sh`
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
PROTO_DIR="src/noteflow/grpc/proto"
|
||||
PROTO_FILE="$PROTO_DIR/noteflow.proto"
|
||||
|
||||
echo "Regenerating protobuf stubs..."
|
||||
|
||||
python -m grpc_tools.protoc \
|
||||
-I "$PROTO_DIR" \
|
||||
--python_out="$PROTO_DIR" \
|
||||
--grpc_python_out="$PROTO_DIR" \
|
||||
--pyi_out="$PROTO_DIR" \
|
||||
"$PROTO_FILE"
|
||||
|
||||
echo "Fixing imports for Python 3.12+ compatibility..."
|
||||
# Fix relative imports in generated files
|
||||
sed -i '' 's/^import noteflow_pb2/from . import noteflow_pb2/' "$PROTO_DIR/noteflow_pb2_grpc.py" 2>/dev/null || \
|
||||
sed -i 's/^import noteflow_pb2/from . import noteflow_pb2/' "$PROTO_DIR/noteflow_pb2_grpc.py"
|
||||
|
||||
echo "Proto stubs regenerated successfully!"
|
||||
echo ""
|
||||
echo "Files updated:"
|
||||
echo " - $PROTO_DIR/noteflow_pb2.py"
|
||||
echo " - $PROTO_DIR/noteflow_pb2_grpc.py"
|
||||
echo " - $PROTO_DIR/noteflow_pb2.pyi"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Run 'cd client && npm run build:proto' to update Rust/TS stubs"
|
||||
echo " 2. Run tests: pytest tests/grpc/"
|
||||
echo " 3. Commit all generated files together"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 8: Feature Flags
|
||||
|
||||
**File**: `src/noteflow/config/settings.py` (additions)
|
||||
|
||||
```python
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class FeatureFlags(BaseSettings):
|
||||
"""Feature flag settings for gradual rollout."""
|
||||
|
||||
model_config = SettingsConfigDict(env_prefix="NOTEFLOW_FEATURE_")
|
||||
|
||||
# Sprint 1: AI Templates
|
||||
templates_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Enable summarization template options",
|
||||
)
|
||||
|
||||
# Sprint 3: PDF Export
|
||||
pdf_export_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Enable PDF export format",
|
||||
)
|
||||
|
||||
# Sprint 4: NER
|
||||
ner_enabled: bool = Field(
|
||||
default=False, # Disabled by default (requires model download)
|
||||
description="Enable named entity extraction",
|
||||
)
|
||||
|
||||
# Sprint 5: Calendar
|
||||
calendar_enabled: bool = Field(
|
||||
default=False, # Disabled by default (requires OAuth setup)
|
||||
description="Enable calendar integration",
|
||||
)
|
||||
|
||||
# Sprint 6: Webhooks
|
||||
webhooks_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Enable webhook notifications",
|
||||
)
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Main application settings."""
|
||||
|
||||
# ... existing fields ...
|
||||
|
||||
features: FeatureFlags = Field(default_factory=FeatureFlags)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_feature_flags() -> FeatureFlags:
|
||||
"""Get cached feature flags."""
|
||||
return get_settings().features
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 9: Proto Changelog
|
||||
|
||||
**File**: `docs/sprints/phase-0-foundation/PROTO_CHANGELOG.md`
|
||||
|
||||
```markdown
|
||||
# Proto Changelog
|
||||
|
||||
All notable changes to `noteflow.proto` are documented here.
|
||||
|
||||
## [2.0.0] - 2025-XX-XX
|
||||
|
||||
### Added
|
||||
|
||||
#### Messages
|
||||
- `SummarizationOptions` - AI template preferences (tone, format, verbosity)
|
||||
- `ExtractEntitiesRequest` / `ExtractEntitiesResponse` - NER extraction
|
||||
- `ExtractedEntity` - Named entity with category, segments, confidence
|
||||
- `CalendarEvent` - Calendar event representation
|
||||
- `ListCalendarEventsRequest` / `ListCalendarEventsResponse` - Calendar listing
|
||||
- `CalendarProvider` - Provider info with auth status
|
||||
- `GetCalendarProvidersRequest` / `GetCalendarProvidersResponse` - Provider listing
|
||||
- `InitiateCalendarAuthRequest` / `InitiateCalendarAuthResponse` - OAuth initiation
|
||||
- `CompleteCalendarAuthRequest` / `CompleteCalendarAuthResponse` - OAuth completion
|
||||
|
||||
#### RPCs
|
||||
- `ExtractEntities` - Extract named entities from meeting
|
||||
- `ListCalendarEvents` - List upcoming calendar events
|
||||
- `GetCalendarProviders` - Get available calendar providers
|
||||
- `InitiateCalendarAuth` - Start OAuth flow
|
||||
- `CompleteCalendarAuth` - Complete OAuth flow
|
||||
|
||||
#### Enums
|
||||
- `ExportFormat.EXPORT_FORMAT_PDF` - PDF export support
|
||||
|
||||
### Modified
|
||||
|
||||
#### Messages
|
||||
- `GenerateSummaryRequest` - Added optional `options` field (field 3)
|
||||
|
||||
### Compatibility Notes
|
||||
|
||||
- All new fields are optional or have defaults
|
||||
- Existing clients will continue to work without changes
|
||||
- New features require updated clients to access
|
||||
|
||||
## [1.x.x] - Previous Versions
|
||||
|
||||
See git history for earlier changes.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
### Functional
|
||||
|
||||
- [ ] All proto messages compile without errors
|
||||
- [ ] Proto stubs regenerate cleanly
|
||||
- [ ] Alembic migrations apply to fresh database
|
||||
- [ ] Alembic migrations apply to existing database (upgrade path)
|
||||
- [ ] Feature flags control feature availability
|
||||
- [ ] Model download CLI works correctly
|
||||
|
||||
### Technical
|
||||
|
||||
- [ ] Proto backward compatible (existing clients work)
|
||||
- [ ] No breaking changes to existing RPCs
|
||||
- [ ] All new tables have proper indexes
|
||||
- [ ] Foreign key constraints correct
|
||||
- [ ] Triggers for `updated_at` in place
|
||||
|
||||
### Quality Gates
|
||||
|
||||
- [ ] `pytest tests/quality/` passes
|
||||
- [ ] `ruff check src/noteflow` clean
|
||||
- [ ] `mypy src/noteflow` clean
|
||||
- [ ] `alembic upgrade head` succeeds on fresh DB
|
||||
- [ ] `alembic downgrade -1` succeeds for each migration
|
||||
- [ ] Proto regeneration produces identical output (idempotent)
|
||||
|
||||
---
|
||||
|
||||
## Test Plan
|
||||
|
||||
### Migration Tests
|
||||
|
||||
**File**: `tests/infrastructure/persistence/test_migrations.py`
|
||||
|
||||
```python
|
||||
import pytest
|
||||
from alembic import command
|
||||
from alembic.config import Config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def alembic_config(tmp_path) -> Config:
|
||||
"""Create Alembic config for testing."""
|
||||
config = Config()
|
||||
config.set_main_option("script_location", "src/noteflow/infrastructure/persistence/migrations")
|
||||
config.set_main_option("sqlalchemy.url", f"sqlite:///{tmp_path}/test.db")
|
||||
return config
|
||||
|
||||
|
||||
def test_migrations_upgrade_downgrade(alembic_config: Config) -> None:
|
||||
"""All migrations can upgrade and downgrade."""
|
||||
# Upgrade to head
|
||||
command.upgrade(alembic_config, "head")
|
||||
|
||||
# Downgrade each migration
|
||||
command.downgrade(alembic_config, "-1")
|
||||
command.downgrade(alembic_config, "-1")
|
||||
|
||||
# Upgrade again
|
||||
command.upgrade(alembic_config, "head")
|
||||
```
|
||||
|
||||
### Proto Tests
|
||||
|
||||
**File**: `tests/grpc/test_proto_compilation.py`
|
||||
|
||||
```python
|
||||
def test_proto_imports() -> None:
|
||||
"""Proto stubs import without errors."""
|
||||
from noteflow.grpc.proto import noteflow_pb2, noteflow_pb2_grpc
|
||||
|
||||
# Verify new messages exist
|
||||
assert hasattr(noteflow_pb2, "SummarizationOptions")
|
||||
assert hasattr(noteflow_pb2, "ExtractEntitiesRequest")
|
||||
assert hasattr(noteflow_pb2, "CalendarEvent")
|
||||
|
||||
# Verify new enum values
|
||||
assert noteflow_pb2.EXPORT_FORMAT_PDF == 3
|
||||
|
||||
|
||||
def test_proto_message_defaults() -> None:
|
||||
"""New messages have correct defaults."""
|
||||
from noteflow.grpc.proto import noteflow_pb2
|
||||
|
||||
# SummarizationOptions defaults
|
||||
opts = noteflow_pb2.SummarizationOptions()
|
||||
assert opts.tone == ""
|
||||
assert opts.format == ""
|
||||
assert opts.verbosity == ""
|
||||
|
||||
# ExtractedEntity defaults
|
||||
entity = noteflow_pb2.ExtractedEntity()
|
||||
assert entity.confidence == 0.0
|
||||
assert entity.is_pinned is False
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Definition of Done
|
||||
|
||||
- [ ] All proto changes committed in single commit
|
||||
- [ ] All migrations committed and tested
|
||||
- [ ] Proto regeneration script works
|
||||
- [ ] Feature flags documented
|
||||
- [ ] PROTO_CHANGELOG.md updated
|
||||
- [ ] Client proto sync verified (`cd client && npm run build:proto`)
|
||||
- [ ] Integration tests pass with new schema
|
||||
- [ ] CLAUDE.md updated with new proto messages
|
||||
- [ ] README updated with new optional dependencies
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
- None (this is the foundation sprint)
|
||||
|
||||
## Blocks
|
||||
|
||||
- All other sprints depend on Sprint 0
|
||||
|
||||
## Post-Sprint
|
||||
|
||||
- Monitor for proto compatibility issues
|
||||
- Consider proto versioning strategy for future breaking changes
|
||||
- Document migration rollback procedures
|
||||
@@ -0,0 +1,548 @@
|
||||
# Sprint 1: AI Templates Pass-Through
|
||||
|
||||
> **Priority**: 1 | **Owner**: Both (Backend + Frontend) | **Complexity**: Low
|
||||
|
||||
---
|
||||
|
||||
## Objective
|
||||
|
||||
Enable user-configured summarization style preferences (tone, format, verbosity) to flow from frontend settings through gRPC to the LLM prompt builder.
|
||||
|
||||
---
|
||||
|
||||
## Current State Analysis
|
||||
|
||||
### What Exists
|
||||
|
||||
| Component | Location | Status |
|
||||
|-----------|----------|--------|
|
||||
| Frontend UI | `client/src/pages/Settings.tsx` | AI template controls saved to local preferences |
|
||||
| gRPC Proto | `src/noteflow/grpc/proto/noteflow.proto:291` | `GenerateSummaryRequest` lacks options field |
|
||||
| Summarization Service | `src/noteflow/application/services/summarization_service.py:167` | `summarize()` has no template params |
|
||||
| Prompt Builder | `src/noteflow/infrastructure/summarization/_parsing.py` | Only `build_transcript_prompt()`, no style builder |
|
||||
|
||||
### Gap
|
||||
|
||||
User preferences in Settings are never transmitted to the backend. `GenerateSummaryRequest` only contains:
|
||||
```protobuf
|
||||
message GenerateSummaryRequest {
|
||||
string meeting_id = 1;
|
||||
bool force_regenerate = 2;
|
||||
// Missing: SummarizationOptions options = 3;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Target/Affected Code
|
||||
|
||||
### Files to Modify
|
||||
|
||||
| File | Change Type | Lines Est. |
|
||||
|------|-------------|------------|
|
||||
| `src/noteflow/grpc/proto/noteflow.proto` | Add message + field | +15 |
|
||||
| `src/noteflow/infrastructure/summarization/_parsing.py` | Add `build_template_prompt()` | +40 |
|
||||
| `src/noteflow/application/services/summarization_service.py` | Accept options param | +10 |
|
||||
| `src/noteflow/grpc/_mixins/summarization.py` | Extract and pass options | +15 |
|
||||
| `client/src-tauri/src/commands/summary.rs` | Accept template params | +20 |
|
||||
| `client/src/api/tauri-adapter.ts` | Read prefs, pass to command | +15 |
|
||||
|
||||
### Files to Create
|
||||
|
||||
None - all changes are modifications to existing files.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Tasks
|
||||
|
||||
### Task 1: Proto Update
|
||||
|
||||
**File**: `src/noteflow/grpc/proto/noteflow.proto`
|
||||
|
||||
```protobuf
|
||||
// Add after line 288 (before GenerateSummaryRequest)
|
||||
message SummarizationOptions {
|
||||
// Tone: professional, casual, technical, friendly
|
||||
string tone = 1;
|
||||
|
||||
// Format: bullet_points, narrative, structured, concise
|
||||
string format = 2;
|
||||
|
||||
// Verbosity: minimal, balanced, detailed, comprehensive
|
||||
string verbosity = 3;
|
||||
}
|
||||
|
||||
// Modify existing GenerateSummaryRequest (line 291)
|
||||
message GenerateSummaryRequest {
|
||||
string meeting_id = 1;
|
||||
bool force_regenerate = 2;
|
||||
SummarizationOptions options = 3; // NEW
|
||||
}
|
||||
```
|
||||
|
||||
**Post-change**: Regenerate proto stubs:
|
||||
```bash
|
||||
python -m grpc_tools.protoc -I src/noteflow/grpc/proto \
|
||||
--python_out=src/noteflow/grpc/proto \
|
||||
--grpc_python_out=src/noteflow/grpc/proto \
|
||||
--pyi_out=src/noteflow/grpc/proto \
|
||||
src/noteflow/grpc/proto/noteflow.proto
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Template Prompt Builder
|
||||
|
||||
**File**: `src/noteflow/infrastructure/summarization/_parsing.py`
|
||||
|
||||
**Insert after** `SYSTEM_PROMPT` constant:
|
||||
|
||||
```python
|
||||
from noteflow.grpc.proto import noteflow_pb2
|
||||
|
||||
_TONE_INSTRUCTIONS: dict[str, str] = {
|
||||
"professional": "Use formal, business-appropriate language.",
|
||||
"casual": "Use conversational, approachable language.",
|
||||
"technical": "Use precise technical terminology.",
|
||||
"friendly": "Use warm, personable language.",
|
||||
}
|
||||
|
||||
_FORMAT_INSTRUCTIONS: dict[str, str] = {
|
||||
"bullet_points": "Present information in bullet points.",
|
||||
"narrative": "Write in flowing paragraphs.",
|
||||
"structured": "Use headers and organized sections.",
|
||||
"concise": "Be extremely brief and to the point.",
|
||||
}
|
||||
|
||||
_VERBOSITY_INSTRUCTIONS: dict[str, str] = {
|
||||
"minimal": "Provide only essential information.",
|
||||
"balanced": "Include moderate detail.",
|
||||
"detailed": "Include comprehensive information.",
|
||||
"comprehensive": "Include all relevant details and context.",
|
||||
}
|
||||
|
||||
|
||||
def build_template_prompt(
|
||||
options: noteflow_pb2.SummarizationOptions | None,
|
||||
) -> str:
|
||||
"""Build prompt prefix based on user template preferences.
|
||||
|
||||
Args:
|
||||
options: User's summarization style preferences.
|
||||
|
||||
Returns:
|
||||
Style instruction string to prepend to system prompt.
|
||||
"""
|
||||
if not options:
|
||||
return ""
|
||||
|
||||
parts: list[str] = []
|
||||
|
||||
if options.tone and options.tone in _TONE_INSTRUCTIONS:
|
||||
parts.append(_TONE_INSTRUCTIONS[options.tone])
|
||||
if options.format and options.format in _FORMAT_INSTRUCTIONS:
|
||||
parts.append(_FORMAT_INSTRUCTIONS[options.format])
|
||||
if options.verbosity and options.verbosity in _VERBOSITY_INSTRUCTIONS:
|
||||
parts.append(_VERBOSITY_INSTRUCTIONS[options.verbosity])
|
||||
|
||||
return " ".join(parts)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Service Update
|
||||
|
||||
**File**: `src/noteflow/application/services/summarization_service.py`
|
||||
|
||||
**Modify** `summarize()` signature (line 167):
|
||||
|
||||
```python
|
||||
async def summarize(
|
||||
self,
|
||||
meeting_id: MeetingId,
|
||||
segments: Sequence[Segment],
|
||||
mode: SummarizationMode | None = None,
|
||||
max_key_points: int | None = None,
|
||||
max_action_items: int | None = None,
|
||||
style_prompt: str | None = None, # NEW PARAMETER
|
||||
) -> SummarizationServiceResult:
|
||||
```
|
||||
|
||||
**Update** request building (around line 205):
|
||||
|
||||
```python
|
||||
request = SummarizationRequest(
|
||||
meeting_id=meeting_id,
|
||||
segments=segments,
|
||||
max_key_points=max_key_points or self.settings.max_key_points,
|
||||
max_action_items=max_action_items or self.settings.max_action_items,
|
||||
style_prompt=style_prompt, # NEW FIELD
|
||||
)
|
||||
```
|
||||
|
||||
**Note**: Also update `SummarizationRequest` dataclass in domain to include `style_prompt`.
|
||||
|
||||
---
|
||||
|
||||
### Task 4: gRPC Mixin Update
|
||||
|
||||
**File**: `src/noteflow/grpc/_mixins/summarization.py`
|
||||
|
||||
**Modify** `GenerateSummary` method:
|
||||
|
||||
```python
|
||||
async def GenerateSummary(
|
||||
self: ServicerHost,
|
||||
request: noteflow_pb2.GenerateSummaryRequest,
|
||||
context: grpc.aio.ServicerContext,
|
||||
) -> noteflow_pb2.Summary:
|
||||
"""Generate AI summary for meeting."""
|
||||
from noteflow.infrastructure.summarization._parsing import build_template_prompt
|
||||
|
||||
meeting_id = self._parse_meeting_id(request.meeting_id)
|
||||
|
||||
# Build style prompt from options
|
||||
style_prompt = build_template_prompt(request.options) if request.options else None
|
||||
|
||||
# ... existing meeting fetch logic ...
|
||||
|
||||
result = await self._summarization_service.summarize(
|
||||
meeting_id=meeting_id,
|
||||
segments=meeting.segments,
|
||||
style_prompt=style_prompt, # Pass style prompt
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 5: Rust Command Update
|
||||
|
||||
**File**: `client/src-tauri/src/commands/summary.rs`
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SummarizationOptions {
|
||||
pub tone: Option<String>,
|
||||
pub format: Option<String>,
|
||||
pub verbosity: Option<String>,
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn generate_summary(
|
||||
meeting_id: String,
|
||||
force_regenerate: Option<bool>,
|
||||
options: Option<SummarizationOptions>, // NEW
|
||||
state: State<'_, AppState>,
|
||||
) -> Result<Summary, String> {
|
||||
let client = state.grpc_client.lock().await;
|
||||
|
||||
let proto_options = options.map(|o| proto::SummarizationOptions {
|
||||
tone: o.tone.unwrap_or_default(),
|
||||
format: o.format.unwrap_or_default(),
|
||||
verbosity: o.verbosity.unwrap_or_default(),
|
||||
});
|
||||
|
||||
let request = proto::GenerateSummaryRequest {
|
||||
meeting_id,
|
||||
force_regenerate: force_regenerate.unwrap_or(false),
|
||||
options: proto_options,
|
||||
};
|
||||
|
||||
// ... rest of gRPC call
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 6: TypeScript Adapter Update
|
||||
|
||||
**File**: `client/src/api/tauri-adapter.ts`
|
||||
|
||||
```typescript
|
||||
interface SummarizationOptions {
|
||||
tone?: 'professional' | 'casual' | 'technical' | 'friendly';
|
||||
format?: 'bullet_points' | 'narrative' | 'structured' | 'concise';
|
||||
verbosity?: 'minimal' | 'balanced' | 'detailed' | 'comprehensive';
|
||||
}
|
||||
|
||||
async generateSummary(
|
||||
meetingId: string,
|
||||
forceRegenerate?: boolean,
|
||||
): Promise<Summary> {
|
||||
// Read from local preferences
|
||||
const prefs = await this.getPreferences();
|
||||
const template = prefs.ai_template;
|
||||
|
||||
const options: SummarizationOptions | undefined = template ? {
|
||||
tone: template.tone,
|
||||
format: template.format,
|
||||
verbosity: template.verbosity,
|
||||
} : undefined;
|
||||
|
||||
return invoke(Commands.GENERATE_SUMMARY, {
|
||||
meetingId,
|
||||
forceRegenerate,
|
||||
options,
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Code Segments to Reuse
|
||||
|
||||
### Existing Prompt Building
|
||||
|
||||
**Location**: `src/noteflow/infrastructure/summarization/_parsing.py:20-80`
|
||||
|
||||
```python
|
||||
SYSTEM_PROMPT = """You are an expert meeting analyst..."""
|
||||
|
||||
def build_transcript_prompt(segments: Sequence[Segment], ...) -> str:
|
||||
"""Build transcript with segment markers."""
|
||||
```
|
||||
|
||||
Use this pattern for `build_template_prompt()`.
|
||||
|
||||
### Existing Service Pattern
|
||||
|
||||
**Location**: `src/noteflow/application/services/summarization_service.py:167-249`
|
||||
|
||||
The `summarize()` method shows how to:
|
||||
- Accept optional parameters with defaults
|
||||
- Pass through to providers
|
||||
- Handle verification and persistence
|
||||
|
||||
### Rust Command Pattern
|
||||
|
||||
**Location**: `client/src-tauri/src/commands/meeting.rs`
|
||||
|
||||
Follow the pattern for:
|
||||
- Deriving `Serialize`, `Deserialize` on structs
|
||||
- Using `Option<T>` for optional command params
|
||||
- Converting to proto types
|
||||
|
||||
---
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
### Functional
|
||||
|
||||
- [ ] User can select tone (professional/casual/technical/friendly) in Settings
|
||||
- [ ] User can select format (bullet_points/narrative/structured/concise) in Settings
|
||||
- [ ] User can select verbosity (minimal/balanced/detailed/comprehensive) in Settings
|
||||
- [ ] When generating summary, selected options affect the output style
|
||||
- [ ] Default behavior (no options) produces same result as before
|
||||
|
||||
### Technical
|
||||
|
||||
- [ ] Proto regenerated and compiles cleanly
|
||||
- [ ] No breaking changes to existing clients (options field is optional)
|
||||
- [ ] Style prompt logged at DEBUG level for troubleshooting
|
||||
- [ ] Unit tests cover all tone/format/verbosity combinations
|
||||
|
||||
### Quality Gates
|
||||
|
||||
- [ ] `pytest tests/quality/` passes
|
||||
- [ ] `ruff check src/noteflow` clean
|
||||
- [ ] `mypy src/noteflow` clean
|
||||
- [ ] `npm run test:quality` passes (client)
|
||||
- [ ] `cargo clippy` clean (Rust)
|
||||
|
||||
---
|
||||
|
||||
## Test Plan
|
||||
|
||||
### Unit Tests
|
||||
|
||||
**File**: `tests/infrastructure/summarization/test_parsing.py`
|
||||
|
||||
```python
|
||||
import pytest
|
||||
from noteflow.grpc.proto import noteflow_pb2
|
||||
from noteflow.infrastructure.summarization._parsing import build_template_prompt
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tone,expected_fragment",
|
||||
[
|
||||
("professional", "formal, business-appropriate"),
|
||||
("casual", "conversational, approachable"),
|
||||
("technical", "precise technical terminology"),
|
||||
("friendly", "warm, personable"),
|
||||
],
|
||||
)
|
||||
def test_build_template_prompt_tone(tone: str, expected_fragment: str) -> None:
|
||||
"""Template prompt includes correct tone instruction."""
|
||||
options = noteflow_pb2.SummarizationOptions(tone=tone)
|
||||
|
||||
result = build_template_prompt(options)
|
||||
|
||||
assert expected_fragment in result
|
||||
|
||||
|
||||
def test_build_template_prompt_combines_all_options() -> None:
|
||||
"""Template prompt combines tone, format, and verbosity."""
|
||||
options = noteflow_pb2.SummarizationOptions(
|
||||
tone="professional",
|
||||
format="bullet_points",
|
||||
verbosity="detailed",
|
||||
)
|
||||
|
||||
result = build_template_prompt(options)
|
||||
|
||||
assert "formal" in result
|
||||
assert "bullet points" in result
|
||||
assert "comprehensive" in result.lower() or "detailed" in result.lower()
|
||||
|
||||
|
||||
def test_build_template_prompt_none_returns_empty() -> None:
|
||||
"""No options returns empty string."""
|
||||
result = build_template_prompt(None)
|
||||
|
||||
assert result == ""
|
||||
|
||||
|
||||
def test_build_template_prompt_unknown_values_ignored() -> None:
|
||||
"""Unknown option values are safely ignored."""
|
||||
options = noteflow_pb2.SummarizationOptions(
|
||||
tone="unknown_tone",
|
||||
format="unknown_format",
|
||||
)
|
||||
|
||||
result = build_template_prompt(options)
|
||||
|
||||
assert result == ""
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
|
||||
**File**: `tests/integration/test_summarization_templates.py`
|
||||
|
||||
```python
|
||||
@pytest.mark.integration
|
||||
async def test_generate_summary_with_professional_tone(
|
||||
grpc_client: NoteFlowClient,
|
||||
meeting_with_segments: Meeting,
|
||||
) -> None:
|
||||
"""Summary generation respects professional tone setting."""
|
||||
options = noteflow_pb2.SummarizationOptions(tone="professional")
|
||||
|
||||
summary = await grpc_client.generate_summary(
|
||||
meeting_id=str(meeting_with_segments.id),
|
||||
options=options,
|
||||
)
|
||||
|
||||
# Verify summary was generated (content verification is model-dependent)
|
||||
assert summary.executive_summary
|
||||
assert summary.key_points
|
||||
```
|
||||
|
||||
### Frontend Tests
|
||||
|
||||
**File**: `client/src/api/tauri-adapter.test.ts`
|
||||
|
||||
```typescript
|
||||
describe('generateSummary', () => {
|
||||
it('should pass template options from preferences', async () => {
|
||||
// Mock preferences with AI template
|
||||
mockPreferences.ai_template = {
|
||||
tone: 'professional',
|
||||
format: 'bullet_points',
|
||||
verbosity: 'detailed',
|
||||
};
|
||||
|
||||
await adapter.generateSummary('meeting-123');
|
||||
|
||||
expect(invoke).toHaveBeenCalledWith(
|
||||
Commands.GENERATE_SUMMARY,
|
||||
expect.objectContaining({
|
||||
options: {
|
||||
tone: 'professional',
|
||||
format: 'bullet_points',
|
||||
verbosity: 'detailed',
|
||||
},
|
||||
})
|
||||
);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rollback Plan
|
||||
|
||||
If issues arise:
|
||||
|
||||
1. **Proto rollback**: Remove `options` field (clients ignore unknown fields)
|
||||
2. **Backend**: `build_template_prompt()` returns empty string if options invalid
|
||||
3. **Frontend**: Gracefully handle missing options in existing summaries
|
||||
|
||||
---
|
||||
|
||||
## Frontend/Backend Sync Protocol
|
||||
|
||||
### Architecture Decision: Per-Request Transmission
|
||||
|
||||
Preferences are stored **only on the frontend** (local storage) and transmitted **per-request** via gRPC. The backend is stateless regarding user preferences.
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
│ Local Storage │────────▶│ Tauri Command │────────▶│ gRPC Request │
|
||||
│ (preferences) │ read │ (summary.rs) │ proto │ (options field)│
|
||||
└─────────────────┘ └─────────────────┘ └─────────────────┘
|
||||
```
|
||||
|
||||
### Why Per-Request (Not Persisted on Backend)
|
||||
|
||||
| Approach | Pros | Cons |
|
||||
|----------|------|------|
|
||||
| **Per-request (chosen)** | No sync conflicts; works offline; privacy-preserving | Slightly larger request payloads |
|
||||
| Backend-persisted | Single source of truth | Sync complexity; requires user accounts; offline failures |
|
||||
|
||||
### Failure Handling
|
||||
|
||||
| Scenario | Behavior |
|
||||
|----------|----------|
|
||||
| Backend unreachable | Summary generation fails (as expected); preferences remain in local storage |
|
||||
| Invalid preference value | Backend ignores unknown values; uses default behavior |
|
||||
| Missing preferences | `options` field omitted; backend uses default prompts |
|
||||
| Corrupted local storage | `getPreferences()` returns defaults; user re-configures in Settings |
|
||||
|
||||
### Implementation Notes
|
||||
|
||||
1. **No caching on backend**: Each `GenerateSummary` call reads `options` fresh from the request
|
||||
2. **No version conflicts**: Frontend preferences are authoritative; no bidirectional sync
|
||||
3. **Offline-first**: Preferences are always available locally; only summary generation requires connectivity
|
||||
4. **Migration path**: If backend persistence is needed later, add `UserPreferences` table and sync endpoint
|
||||
|
||||
### TypeScript Preference Loading
|
||||
|
||||
```typescript
|
||||
// client/src/api/tauri-adapter.ts
|
||||
private async getPreferences(): Promise<UserPreferences> {
|
||||
try {
|
||||
const stored = localStorage.getItem('noteflow_preferences');
|
||||
return stored ? JSON.parse(stored) : DEFAULT_PREFERENCES;
|
||||
} catch {
|
||||
// Corrupted storage: reset to defaults
|
||||
localStorage.removeItem('noteflow_preferences');
|
||||
return DEFAULT_PREFERENCES;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
- None (standalone feature)
|
||||
|
||||
## Blocks
|
||||
|
||||
- None
|
||||
|
||||
## Post-Sprint
|
||||
|
||||
- Update CLAUDE.md with new proto message
|
||||
- Consider adding template presets ("Meeting Notes", "Executive Brief")
|
||||
File diff suppressed because it is too large
Load Diff
778
docs/sprints/phase-1-core-pipeline/sprint-3-pdf-export/README.md
Normal file
778
docs/sprints/phase-1-core-pipeline/sprint-3-pdf-export/README.md
Normal file
@@ -0,0 +1,778 @@
|
||||
# Sprint 3: PDF Export
|
||||
|
||||
> **Priority**: 3 | **Owner**: Backend | **Complexity**: Low-Medium
|
||||
|
||||
---
|
||||
|
||||
## Objective
|
||||
|
||||
Add PDF export capability to complement existing Markdown and HTML exports. Users expect to export transcripts as PDF for sharing and archival.
|
||||
|
||||
---
|
||||
|
||||
## Current State Analysis
|
||||
|
||||
### What Exists
|
||||
|
||||
| Component | Location | Status |
|
||||
|-----------|----------|--------|
|
||||
| Export Protocol | `src/noteflow/infrastructure/export/protocols.py` | `TranscriptExporter` interface |
|
||||
| Markdown Exporter | `src/noteflow/infrastructure/export/markdown.py` | Working |
|
||||
| HTML Exporter | `src/noteflow/infrastructure/export/html.py` | Working |
|
||||
| Formatting Utils | `src/noteflow/infrastructure/export/_formatting.py` | `format_timestamp()`, `format_datetime()` |
|
||||
| gRPC Mixin | `src/noteflow/grpc/_mixins/export.py` | `ExportTranscript` RPC |
|
||||
| Proto Enum | `noteflow.proto:420` | `ExportFormat` (MARKDOWN, HTML only) |
|
||||
|
||||
### Gap
|
||||
|
||||
No PDF exporter exists. The `ExportFormat` proto enum lacks `EXPORT_FORMAT_PDF`.
|
||||
|
||||
---
|
||||
|
||||
## Target/Affected Code
|
||||
|
||||
### Files to Create
|
||||
|
||||
| File | Purpose | Lines Est. |
|
||||
|------|---------|------------|
|
||||
| `src/noteflow/infrastructure/export/pdf.py` | PDF exporter class | ~100 |
|
||||
| `tests/infrastructure/export/test_pdf.py` | Unit tests | ~80 |
|
||||
|
||||
### Files to Modify
|
||||
|
||||
| File | Change Type | Lines Est. |
|
||||
|------|-------------|------------|
|
||||
| `src/noteflow/grpc/proto/noteflow.proto` | Add PDF enum value | +1 |
|
||||
| `src/noteflow/infrastructure/export/__init__.py` | Export `PdfExporter` | +2 |
|
||||
| `src/noteflow/grpc/_mixins/export.py` | Handle PDF format | +15 |
|
||||
| `pyproject.toml` | Add weasyprint dependency | +1 |
|
||||
| `client/src-tauri/src/commands/export.rs` | Handle PDF format | +5 |
|
||||
| `client/src/pages/MeetingDetail.tsx` | Add PDF button | +5 |
|
||||
|
||||
---
|
||||
|
||||
## Implementation Tasks
|
||||
|
||||
### Task 1: Add Dependency
|
||||
|
||||
**File**: `pyproject.toml`
|
||||
|
||||
Add to dependencies:
|
||||
```toml
|
||||
dependencies = [
|
||||
# ... existing ...
|
||||
"weasyprint>=62.0",
|
||||
]
|
||||
```
|
||||
|
||||
**Note**: weasyprint requires system dependencies (cairo, pango, gdk-pixbuf). Document in README.
|
||||
|
||||
System packages (Ubuntu/Debian):
|
||||
```bash
|
||||
apt-get install libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0
|
||||
```
|
||||
|
||||
System packages (macOS):
|
||||
```bash
|
||||
brew install pango cairo gdk-pixbuf
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Proto Update
|
||||
|
||||
**File**: `src/noteflow/grpc/proto/noteflow.proto`
|
||||
|
||||
Modify `ExportFormat` enum (around line 420):
|
||||
|
||||
```protobuf
|
||||
enum ExportFormat {
|
||||
EXPORT_FORMAT_UNSPECIFIED = 0;
|
||||
EXPORT_FORMAT_MARKDOWN = 1;
|
||||
EXPORT_FORMAT_HTML = 2;
|
||||
EXPORT_FORMAT_PDF = 3; // NEW
|
||||
}
|
||||
```
|
||||
|
||||
Regenerate stubs after change.
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Create PDF Exporter
|
||||
|
||||
**File**: `src/noteflow/infrastructure/export/pdf.py`
|
||||
|
||||
```python
|
||||
"""PDF transcript exporter using weasyprint."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from weasyprint import HTML
|
||||
|
||||
from noteflow.infrastructure.export._formatting import (
|
||||
format_datetime,
|
||||
format_timestamp,
|
||||
)
|
||||
from noteflow.infrastructure.export.protocols import TranscriptExporter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.domain.entities.meeting import Meeting
|
||||
|
||||
# PDF-optimized CSS
|
||||
_PDF_CSS = """
|
||||
@page {
|
||||
size: A4;
|
||||
margin: 2cm;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: 'Helvetica Neue', Arial, sans-serif;
|
||||
font-size: 11pt;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
h1 {
|
||||
color: #1a1a1a;
|
||||
border-bottom: 2px solid #333;
|
||||
padding-bottom: 8px;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
h2 {
|
||||
color: #444;
|
||||
margin-top: 24px;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.metadata {
|
||||
color: #666;
|
||||
font-size: 10pt;
|
||||
margin-bottom: 20px;
|
||||
padding-bottom: 10px;
|
||||
border-bottom: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.summary {
|
||||
background-color: #f8f9fa;
|
||||
padding: 16px;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 24px;
|
||||
page-break-inside: avoid;
|
||||
}
|
||||
|
||||
.summary h2 {
|
||||
color: #2563eb;
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
.key-points {
|
||||
margin: 12px 0;
|
||||
}
|
||||
|
||||
.key-points li {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.action-item {
|
||||
background-color: #fef3c7;
|
||||
padding: 8px 12px;
|
||||
margin: 8px 0;
|
||||
border-left: 3px solid #f59e0b;
|
||||
page-break-inside: avoid;
|
||||
}
|
||||
|
||||
.segment {
|
||||
margin: 12px 0;
|
||||
padding: 8px 0;
|
||||
border-bottom: 1px solid #eee;
|
||||
page-break-inside: avoid;
|
||||
}
|
||||
|
||||
.speaker {
|
||||
font-weight: bold;
|
||||
color: #2563eb;
|
||||
}
|
||||
|
||||
.timestamp {
|
||||
color: #888;
|
||||
font-size: 9pt;
|
||||
margin-left: 8px;
|
||||
}
|
||||
|
||||
.text {
|
||||
margin-top: 4px;
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class PdfExporter(TranscriptExporter):
|
||||
"""Export transcripts to PDF format."""
|
||||
|
||||
def export(self, meeting: Meeting) -> bytes:
|
||||
"""Export meeting transcript to PDF bytes.
|
||||
|
||||
Args:
|
||||
meeting: Meeting entity with segments and optional summary.
|
||||
|
||||
Returns:
|
||||
PDF document as bytes.
|
||||
"""
|
||||
html_content = self._build_html(meeting)
|
||||
pdf_bytes: bytes = HTML(string=html_content).write_pdf()
|
||||
return pdf_bytes
|
||||
|
||||
def _build_html(self, meeting: Meeting) -> str:
|
||||
"""Build HTML content for PDF rendering."""
|
||||
title = meeting.title or f"Meeting {meeting.id}"
|
||||
date = format_datetime(meeting.created_at) if meeting.created_at else "Unknown"
|
||||
duration = (
|
||||
format_timestamp(meeting.duration_seconds)
|
||||
if meeting.duration_seconds
|
||||
else "Unknown"
|
||||
)
|
||||
|
||||
# Build segments HTML
|
||||
segments_html = self._build_segments_html(meeting)
|
||||
|
||||
# Build summary HTML
|
||||
summary_html = self._build_summary_html(meeting) if meeting.summary else ""
|
||||
|
||||
return f"""<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>{self._escape(title)}</title>
|
||||
<style>{_PDF_CSS}</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>{self._escape(title)}</h1>
|
||||
<div class="metadata">
|
||||
<strong>Date:</strong> {date} |
|
||||
<strong>Duration:</strong> {duration} |
|
||||
<strong>Segments:</strong> {len(meeting.segments)}
|
||||
</div>
|
||||
{summary_html}
|
||||
<h2>Transcript</h2>
|
||||
{segments_html}
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
def _build_segments_html(self, meeting: Meeting) -> str:
|
||||
"""Build HTML for transcript segments."""
|
||||
parts: list[str] = []
|
||||
|
||||
for segment in meeting.segments:
|
||||
speaker = self._escape(segment.speaker_id or "Unknown")
|
||||
timestamp = format_timestamp(segment.start_time)
|
||||
text = self._escape(segment.text)
|
||||
|
||||
parts.append(f"""
|
||||
<div class="segment">
|
||||
<span class="speaker">{speaker}</span>
|
||||
<span class="timestamp">[{timestamp}]</span>
|
||||
<div class="text">{text}</div>
|
||||
</div>""")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
def _build_summary_html(self, meeting: Meeting) -> str:
|
||||
"""Build HTML for meeting summary."""
|
||||
summary = meeting.summary
|
||||
if not summary:
|
||||
return ""
|
||||
|
||||
# Executive summary
|
||||
exec_summary = self._escape(summary.executive_summary)
|
||||
|
||||
# Key points
|
||||
key_points_html = ""
|
||||
if summary.key_points:
|
||||
items = "\n".join(
|
||||
f"<li>{self._escape(kp.text)}</li>"
|
||||
for kp in summary.key_points
|
||||
)
|
||||
key_points_html = f"""
|
||||
<h3>Key Points</h3>
|
||||
<ul class="key-points">
|
||||
{items}
|
||||
</ul>"""
|
||||
|
||||
# Action items
|
||||
action_items_html = ""
|
||||
if summary.action_items:
|
||||
items = "\n".join(
|
||||
f'<div class="action-item">{self._escape(ai.text)}</div>'
|
||||
for ai in summary.action_items
|
||||
)
|
||||
action_items_html = f"""
|
||||
<h3>Action Items</h3>
|
||||
{items}"""
|
||||
|
||||
return f"""
|
||||
<div class="summary">
|
||||
<h2>Summary</h2>
|
||||
<p>{exec_summary}</p>
|
||||
{key_points_html}
|
||||
{action_items_html}
|
||||
</div>"""
|
||||
|
||||
@staticmethod
|
||||
def _escape(text: str) -> str:
|
||||
"""Escape HTML special characters."""
|
||||
return (
|
||||
text.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace('"', """)
|
||||
.replace("'", "'")
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Register Exporter
|
||||
|
||||
**File**: `src/noteflow/infrastructure/export/__init__.py`
|
||||
|
||||
```python
|
||||
"""Export infrastructure module."""
|
||||
|
||||
from noteflow.infrastructure.export.html import HtmlExporter
|
||||
from noteflow.infrastructure.export.markdown import MarkdownExporter
|
||||
from noteflow.infrastructure.export.pdf import PdfExporter
|
||||
from noteflow.infrastructure.export.protocols import TranscriptExporter
|
||||
|
||||
__all__ = [
|
||||
"HtmlExporter",
|
||||
"MarkdownExporter",
|
||||
"PdfExporter",
|
||||
"TranscriptExporter",
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 5: Update gRPC Mixin
|
||||
|
||||
**File**: `src/noteflow/grpc/_mixins/export.py`
|
||||
|
||||
Modify `ExportTranscript` to handle PDF:
|
||||
|
||||
```python
|
||||
from noteflow.infrastructure.export import (
|
||||
HtmlExporter,
|
||||
MarkdownExporter,
|
||||
PdfExporter,
|
||||
)
|
||||
from noteflow.grpc.proto import noteflow_pb2
|
||||
|
||||
# Exporter registry
|
||||
_EXPORTERS = {
|
||||
noteflow_pb2.EXPORT_FORMAT_MARKDOWN: (MarkdownExporter, "markdown", ".md"),
|
||||
noteflow_pb2.EXPORT_FORMAT_HTML: (HtmlExporter, "html", ".html"),
|
||||
noteflow_pb2.EXPORT_FORMAT_PDF: (PdfExporter, "pdf", ".pdf"),
|
||||
}
|
||||
|
||||
|
||||
class ExportMixin:
|
||||
"""Mixin for export RPC methods."""
|
||||
|
||||
async def ExportTranscript(
|
||||
self: ServicerHost,
|
||||
request: noteflow_pb2.ExportTranscriptRequest,
|
||||
context: grpc.aio.ServicerContext,
|
||||
) -> noteflow_pb2.ExportTranscriptResponse:
|
||||
"""Export meeting transcript to specified format."""
|
||||
meeting_id = self._parse_meeting_id(request.meeting_id)
|
||||
|
||||
# Get exporter
|
||||
exporter_info = _EXPORTERS.get(request.format)
|
||||
if not exporter_info:
|
||||
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
|
||||
context.set_details(f"Unsupported format: {request.format}")
|
||||
return noteflow_pb2.ExportTranscriptResponse()
|
||||
|
||||
exporter_class, format_name, extension = exporter_info
|
||||
|
||||
# Fetch meeting
|
||||
async with self._create_repository_provider() as provider:
|
||||
meeting = await provider.meetings.get(meeting_id)
|
||||
if not meeting:
|
||||
context.set_code(grpc.StatusCode.NOT_FOUND)
|
||||
context.set_details(f"Meeting {meeting_id} not found")
|
||||
return noteflow_pb2.ExportTranscriptResponse()
|
||||
|
||||
# Export
|
||||
exporter = exporter_class()
|
||||
result = exporter.export(meeting)
|
||||
|
||||
# Handle bytes vs string
|
||||
if isinstance(result, bytes):
|
||||
# PDF returns bytes - base64 encode for transport
|
||||
import base64
|
||||
content = base64.b64encode(result).decode("ascii")
|
||||
else:
|
||||
content = result
|
||||
|
||||
return noteflow_pb2.ExportTranscriptResponse(
|
||||
content=content,
|
||||
format_name=format_name,
|
||||
file_extension=extension,
|
||||
)
|
||||
```
|
||||
|
||||
**Note**: For PDF, content is base64-encoded. Frontend must decode.
|
||||
|
||||
---
|
||||
|
||||
### Task 6: Frontend Updates
|
||||
|
||||
**File**: `client/src-tauri/src/commands/export.rs`
|
||||
|
||||
```rust
|
||||
#[tauri::command]
|
||||
pub async fn export_transcript(
|
||||
meeting_id: String,
|
||||
format: String,
|
||||
state: State<'_, AppState>,
|
||||
) -> Result<ExportResult, String> {
|
||||
let proto_format = match format.as_str() {
|
||||
"markdown" => proto::ExportFormat::Markdown,
|
||||
"html" => proto::ExportFormat::Html,
|
||||
"pdf" => proto::ExportFormat::Pdf, // NEW
|
||||
_ => return Err(format!("Invalid format: {}", format)),
|
||||
};
|
||||
|
||||
// ... gRPC call ...
|
||||
|
||||
// For PDF, decode base64 before saving
|
||||
let content = if format == "pdf" {
|
||||
// Content is base64-encoded bytes
|
||||
response.content
|
||||
} else {
|
||||
response.content
|
||||
};
|
||||
|
||||
Ok(ExportResult {
|
||||
content,
|
||||
format_name: response.format_name,
|
||||
file_extension: response.file_extension,
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
**File**: `client/src/pages/MeetingDetail.tsx`
|
||||
|
||||
Add PDF export button alongside existing exports:
|
||||
|
||||
```tsx
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button variant="outline">
|
||||
<Download className="w-4 h-4 mr-2" />
|
||||
Export
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent>
|
||||
<DropdownMenuItem onClick={() => handleExport('markdown')}>
|
||||
Markdown (.md)
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem onClick={() => handleExport('html')}>
|
||||
HTML (.html)
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem onClick={() => handleExport('pdf')}>
|
||||
PDF (.pdf)
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Code Segments to Reuse
|
||||
|
||||
### Existing Formatting Utilities
|
||||
|
||||
**Location**: `src/noteflow/infrastructure/export/_formatting.py`
|
||||
|
||||
```python
|
||||
def format_timestamp(seconds: float) -> str:
|
||||
"""Format seconds as MM:SS or HH:MM:SS."""
|
||||
|
||||
def format_datetime(dt: datetime) -> str:
|
||||
"""Format datetime for display."""
|
||||
```
|
||||
|
||||
### Existing HTML Exporter Pattern
|
||||
|
||||
**Location**: `src/noteflow/infrastructure/export/html.py`
|
||||
|
||||
Follow the same structure:
|
||||
- `export()` method returning string
|
||||
- `_build_*` helper methods
|
||||
- CSS embedded in output
|
||||
|
||||
### Existing Exporter Protocol
|
||||
|
||||
**Location**: `src/noteflow/infrastructure/export/protocols.py`
|
||||
|
||||
```python
|
||||
class TranscriptExporter(Protocol):
|
||||
"""Protocol for transcript exporters."""
|
||||
|
||||
def export(self, meeting: Meeting) -> str:
|
||||
"""Export meeting to string format."""
|
||||
...
|
||||
```
|
||||
|
||||
**Note**: PDF returns `bytes`, not `str`. Either:
|
||||
1. Update protocol to `str | bytes`
|
||||
2. Create separate `BinaryExporter` protocol
|
||||
|
||||
---
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
### Functional
|
||||
|
||||
- [ ] Export dropdown includes PDF option
|
||||
- [ ] Clicking PDF export downloads valid PDF file
|
||||
- [ ] PDF contains title, date, duration, segment count
|
||||
- [ ] PDF contains all transcript segments with speakers/timestamps
|
||||
- [ ] PDF contains summary (if present) with key points and action items
|
||||
- [ ] PDF renders cleanly on A4 paper
|
||||
|
||||
### Technical
|
||||
|
||||
- [ ] PDF generation uses weasyprint (not reportlab)
|
||||
- [ ] Content properly HTML-escaped to prevent injection
|
||||
- [ ] Base64 encoding/decoding works correctly
|
||||
- [ ] Error handling for missing weasyprint
|
||||
|
||||
### Quality Gates
|
||||
|
||||
- [ ] `pytest tests/quality/` passes
|
||||
- [ ] Module size < 200 lines
|
||||
- [ ] All functions documented
|
||||
- [ ] No hardcoded strings (use constants)
|
||||
|
||||
---
|
||||
|
||||
## Test Plan
|
||||
|
||||
### Unit Tests
|
||||
|
||||
**File**: `tests/infrastructure/export/test_pdf.py`
|
||||
|
||||
```python
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
from datetime import datetime, UTC
|
||||
from uuid import uuid4
|
||||
|
||||
from noteflow.domain.entities.meeting import Meeting, MeetingId, MeetingState
|
||||
from noteflow.domain.entities.segment import Segment
|
||||
from noteflow.domain.entities.summary import Summary, KeyPoint, ActionItem
|
||||
from noteflow.infrastructure.export.pdf import PdfExporter
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def meeting_with_segments() -> Meeting:
|
||||
"""Create meeting with segments for testing."""
|
||||
return Meeting(
|
||||
id=MeetingId(uuid4()),
|
||||
title="Test Meeting",
|
||||
state=MeetingState.COMPLETED,
|
||||
created_at=datetime.now(UTC),
|
||||
duration_seconds=3600.0,
|
||||
segments=[
|
||||
Segment(
|
||||
segment_id=1,
|
||||
text="Hello, welcome to the meeting.",
|
||||
start_time=0.0,
|
||||
end_time=5.0,
|
||||
speaker_id="Alice",
|
||||
),
|
||||
Segment(
|
||||
segment_id=2,
|
||||
text="Thank you for joining.",
|
||||
start_time=5.0,
|
||||
end_time=10.0,
|
||||
speaker_id="Bob",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def meeting_with_summary(meeting_with_segments: Meeting) -> Meeting:
|
||||
"""Add summary to meeting."""
|
||||
meeting_with_segments.summary = Summary(
|
||||
meeting_id=meeting_with_segments.id,
|
||||
executive_summary="This was a productive meeting.",
|
||||
key_points=[
|
||||
KeyPoint(text="Discussed project timeline", segment_ids=[1]),
|
||||
],
|
||||
action_items=[
|
||||
ActionItem(text="Follow up with client", assignee="Alice", segment_ids=[2]),
|
||||
],
|
||||
generated_at=datetime.now(UTC),
|
||||
)
|
||||
return meeting_with_segments
|
||||
|
||||
|
||||
def test_export_returns_bytes(meeting_with_segments: Meeting) -> None:
|
||||
"""PDF export returns bytes."""
|
||||
exporter = PdfExporter()
|
||||
|
||||
result = exporter.export(meeting_with_segments)
|
||||
|
||||
assert isinstance(result, bytes)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_export_is_valid_pdf(meeting_with_segments: Meeting) -> None:
|
||||
"""PDF export produces valid PDF file."""
|
||||
exporter = PdfExporter()
|
||||
|
||||
result = exporter.export(meeting_with_segments)
|
||||
|
||||
# PDF files start with %PDF-
|
||||
assert result.startswith(b"%PDF-")
|
||||
|
||||
|
||||
def test_export_includes_title(meeting_with_segments: Meeting) -> None:
|
||||
"""PDF contains meeting title."""
|
||||
exporter = PdfExporter()
|
||||
|
||||
# Check HTML content (before PDF conversion)
|
||||
html = exporter._build_html(meeting_with_segments)
|
||||
|
||||
assert "Test Meeting" in html
|
||||
|
||||
|
||||
def test_export_includes_segments(meeting_with_segments: Meeting) -> None:
|
||||
"""PDF contains all segments."""
|
||||
exporter = PdfExporter()
|
||||
|
||||
html = exporter._build_html(meeting_with_segments)
|
||||
|
||||
assert "Hello, welcome" in html
|
||||
assert "Thank you for joining" in html
|
||||
assert "Alice" in html
|
||||
assert "Bob" in html
|
||||
|
||||
|
||||
def test_export_includes_summary(meeting_with_summary: Meeting) -> None:
|
||||
"""PDF contains summary when present."""
|
||||
exporter = PdfExporter()
|
||||
|
||||
html = exporter._build_html(meeting_with_summary)
|
||||
|
||||
assert "productive meeting" in html
|
||||
assert "project timeline" in html
|
||||
assert "Follow up with client" in html
|
||||
|
||||
|
||||
def test_export_escapes_html_characters(meeting_with_segments: Meeting) -> None:
|
||||
"""PDF properly escapes HTML special characters."""
|
||||
meeting_with_segments.segments[0].text = "<script>alert('xss')</script>"
|
||||
exporter = PdfExporter()
|
||||
|
||||
html = exporter._build_html(meeting_with_segments)
|
||||
|
||||
assert "<script>" not in html
|
||||
assert "<script>" in html
|
||||
|
||||
|
||||
def test_export_handles_empty_meeting() -> None:
|
||||
"""PDF export handles meeting with no segments."""
|
||||
meeting = Meeting(
|
||||
id=MeetingId(uuid4()),
|
||||
title="Empty Meeting",
|
||||
state=MeetingState.COMPLETED,
|
||||
segments=[],
|
||||
)
|
||||
exporter = PdfExporter()
|
||||
|
||||
result = exporter.export(meeting)
|
||||
|
||||
assert isinstance(result, bytes)
|
||||
assert result.startswith(b"%PDF-")
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
|
||||
**File**: `tests/integration/test_export_pdf.py`
|
||||
|
||||
```python
|
||||
@pytest.mark.integration
|
||||
async def test_export_pdf_via_grpc(
|
||||
grpc_client: NoteFlowClient,
|
||||
meeting_with_segments: Meeting,
|
||||
) -> None:
|
||||
"""Export PDF via gRPC."""
|
||||
import base64
|
||||
|
||||
response = await grpc_client.export_transcript(
|
||||
meeting_id=str(meeting_with_segments.id),
|
||||
format=ExportFormat.PDF,
|
||||
)
|
||||
|
||||
assert response.format_name == "pdf"
|
||||
assert response.file_extension == ".pdf"
|
||||
|
||||
# Decode base64 content
|
||||
pdf_bytes = base64.b64decode(response.content)
|
||||
assert pdf_bytes.startswith(b"%PDF-")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **weasyprint**: PDF generation library
|
||||
- **System packages**: cairo, pango (documented in README)
|
||||
|
||||
## Blocks
|
||||
|
||||
- None (can proceed independently)
|
||||
|
||||
## Performance Note: Base64 Encoding Overhead
|
||||
|
||||
The current implementation base64-encodes PDF bytes for gRPC transport, which inflates payload size by ~33%.
|
||||
|
||||
**Alternative for large PDFs** (future enhancement):
|
||||
|
||||
For transcripts > 1MB, consider streaming the PDF file directly:
|
||||
|
||||
```python
|
||||
# Alternative: Stream PDF to temp file, return file path
|
||||
async def ExportTranscriptToFile(
|
||||
self: ServicerHost,
|
||||
request: noteflow_pb2.ExportTranscriptRequest,
|
||||
context: grpc.aio.ServicerContext,
|
||||
) -> noteflow_pb2.ExportTranscriptFileResponse:
|
||||
"""Export to file and return path (for large exports)."""
|
||||
# ... generate PDF ...
|
||||
temp_path = Path(tempfile.gettempdir()) / f"export_{meeting_id}.pdf"
|
||||
temp_path.write_bytes(pdf_bytes)
|
||||
|
||||
return noteflow_pb2.ExportTranscriptFileResponse(
|
||||
file_path=str(temp_path),
|
||||
format_name="pdf",
|
||||
)
|
||||
```
|
||||
|
||||
This avoids base64 overhead for large files while maintaining the current API for typical exports.
|
||||
|
||||
---
|
||||
|
||||
## Post-Sprint
|
||||
|
||||
- Add PDF settings (page size, margins)
|
||||
- Consider async PDF generation for large transcripts
|
||||
- Add print-optimized CSS media query
|
||||
1971
docs/sprints/phase-2-intelligence/sprint-4-ner-extraction/README.md
Normal file
1971
docs/sprints/phase-2-intelligence/sprint-4-ner-extraction/README.md
Normal file
File diff suppressed because it is too large
Load Diff
2767
docs/sprints/phase-3-integrations/sprint-5-calendar-sync/README.md
Normal file
2767
docs/sprints/phase-3-integrations/sprint-5-calendar-sync/README.md
Normal file
File diff suppressed because it is too large
Load Diff
1427
docs/sprints/phase-3-integrations/sprint-6-webhooks/README.md
Normal file
1427
docs/sprints/phase-3-integrations/sprint-6-webhooks/README.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,93 +0,0 @@
|
||||
# Uncommitted Changes Review (2025-12-21)
|
||||
|
||||
## Scope
|
||||
- Reviewed uncommitted changes in Tauri playback/audio, annotations UI/commands, and preferences UI/commands.
|
||||
|
||||
## Resolution Status (Session 3 - 2025-12-21)
|
||||
|
||||
| # | Issue | Status |
|
||||
|---|-------|--------|
|
||||
| 1 | Playback position tracking stops after pause/resume | ✅ FIXED |
|
||||
| 2 | Highlight state sticks on gaps/after seek | ✅ Already correct |
|
||||
| 3 | Hard-coded 16k sample rate | ✅ Already correct |
|
||||
| 4 | Sample rate validation | ✅ Already correct |
|
||||
| 5 | Selecting meeting doesn't stop playback | ✅ Already correct |
|
||||
| 6 | Audio device IDs unstable | ✅ FIXED |
|
||||
| 7 | Preferences in-memory only | ✅ FIXED |
|
||||
| 8 | Annotation UI wired to stubs | ✅ FIXED |
|
||||
|
||||
**Fixes Applied:**
|
||||
- `playback.rs`: Position tracker now respawns on resume, accumulates samples in state
|
||||
- `devices.rs`: Device IDs now use stable hash of device name
|
||||
- `preferences.rs`: Preferences persist to JSON file on disk
|
||||
- `preferences.rs`: API keys stored securely in system keychain
|
||||
- `grpc/client.rs`: Annotation methods now make actual gRPC calls via `NoteFlowServiceClient`
|
||||
|
||||
---
|
||||
|
||||
## Findings & Recommendations
|
||||
|
||||
### 1) Playback position tracking stops after pause/resume (High) ✅ FIXED
|
||||
Observation: `spawn_position_tracker` exits when `playing_flag` flips false, but `resume_playback` never restarts it, so position/highlight updates stop after the first pause. `pause` flips the flag to false. Evidence: `client/src-tauri/src/commands/playback.rs:146`, `client/src-tauri/src/commands/playback.rs:166`, `client/src-tauri/src/audio/playback.rs:73`.
|
||||
|
||||
Example: user pauses at 00:30, resumes, audio plays but playback position and highlights stop updating.
|
||||
|
||||
Recommendation: keep a single tracker thread alive and gate it on `playback_state`, or re-spawn the tracker inside `resume_playback` when resuming. Also consider syncing position from the playback sink instead of only time math. Evidence: `client/src-tauri/src/commands/playback.rs:146`.
|
||||
|
||||
### 2) Highlight state can stick on gaps or after seek (Medium) ✅ Already correct
|
||||
Observation: `seek` emits `HIGHLIGHT_CHANGE` only when a segment is found, and the tracker only emits when entering a segment, never clearing on gaps. Evidence: `client/src-tauri/src/commands/playback.rs:83`, `client/src-tauri/src/commands/playback.rs:86`, `client/src-tauri/src/commands/playback.rs:183`.
|
||||
|
||||
Example: seek into silence or between segments and the previous segment remains highlighted indefinitely.
|
||||
|
||||
Recommendation: emit `HIGHLIGHT_CHANGE` with `null` when `find_segment_at_position` returns `None`, and clear when leaving a segment in the tracker loop. Evidence: `client/src-tauri/src/commands/playback.rs:83`.
|
||||
|
||||
### 3) Hard-coded 16k sample rate ignores actual file sample rate (High) ✅ Already correct
|
||||
Observation: `load_audio_file` reads the sample rate, but `select_meeting` ignores it and playback uses `DEFAULT_SAMPLE_RATE` for both audio and position tracking. Evidence: `client/src-tauri/src/audio/loader.rs:40`, `client/src-tauri/src/commands/meeting.rs:147`, `client/src-tauri/src/commands/playback.rs:123`, `client/src-tauri/src/commands/playback.rs:160`.
|
||||
|
||||
Example: a 48kHz recording will play at ~3x speed and the UI highlight will drift from the audio.
|
||||
|
||||
Recommendation: store `sample_rate` in `AppState` when loading audio, pass it into `AudioPlayback::play_buffer`, and use it for the tracker loop. Fallback to 16k only when the value is missing. Evidence: `client/src-tauri/src/commands/meeting.rs:147`.
|
||||
|
||||
### 4) Missing validation for `sample_rate` can infinite-loop or divide by zero (Medium) ✅ Already correct
|
||||
Observation: `samples_to_chunks` computes `chunk_samples` from `sample_rate` and loops until offset advances; if `sample_rate` is 0 (or extremely small), `chunk_samples` becomes 0 and the loop never progresses. `play_buffer` also divides by `sample_rate`. Evidence: `client/src-tauri/src/audio/loader.rs:88`, `client/src-tauri/src/audio/loader.rs:92`, `client/src-tauri/src/audio/playback.rs:61`.
|
||||
|
||||
Example: a corrupted audio file with `sample_rate = 0` will hang the loader or produce invalid duration math.
|
||||
|
||||
Recommendation: validate `sample_rate > 0` and `chunk_samples >= 1` in `load_audio_file`, returning a clear error for invalid files. Guard divisions in playback accordingly. Evidence: `client/src-tauri/src/audio/loader.rs:40`.
|
||||
|
||||
### 5) Selecting a meeting doesn't stop active playback; stale position when audio missing (Medium) ✅ Already correct
|
||||
Observation: `select_meeting` only flips `playback_state` to `Stopped`; it never calls `AudioPlayback::stop` or clears the playback handle. When no audio is found, it clears duration but doesn’t reset `playback_position`. Evidence: `client/src-tauri/src/commands/meeting.rs:90`, `client/src-tauri/src/commands/meeting.rs:128`.
|
||||
|
||||
Example: switching meetings mid-playback can continue the old audio; selecting a meeting with no audio leaves the previous playback position in the UI.
|
||||
|
||||
Recommendation: reuse the stop logic (or shared helper) to stop playback and clear highlight/position when changing meetings; explicitly reset `playback_position` in the no-audio path. Evidence: `client/src-tauri/src/commands/playback.rs:53`.
|
||||
|
||||
### 6) Audio device IDs are unstable across runs (Medium) ✅ FIXED
|
||||
Observation: device IDs are assigned from enumeration order, and `get_default_input_device` always returns `id = 0`. Preferences store that id and later match by id. Evidence: `client/src-tauri/src/audio/devices.rs:16`, `client/src-tauri/src/audio/devices.rs:57`, `client/src-tauri/src/commands/audio.rs:22`, `client/src-tauri/src/commands/audio.rs:42`.
|
||||
|
||||
Example: unplugging/replugging devices changes enumeration order; the stored id may point to a different mic next launch.
|
||||
|
||||
Recommendation: persist a stable identifier (device name + host, or a hashed name), and resolve by that; handle duplicate names gracefully. Evidence: `client/src-tauri/src/audio/devices.rs:21`.
|
||||
|
||||
### 7) Preferences are in-memory only and include sensitive fields (Low/Medium) ✅ FIXED
|
||||
Observation: preferences are stored in an in-memory `HashMap`; there’s no persistence or secure storage, even for API keys. The UI stores and loads these values. Evidence: `client/src-tauri/src/state/app_state.rs:262`, `client/src-tauri/src/commands/preferences.rs:120`, `client/src/components/settings/SettingsPanel.tsx:167`.
|
||||
|
||||
Example: restarting the app loses `serverUrl`, `dataDirectory`, and `cloudApiKey`; API keys are kept in plain memory and re-exposed to the UI.
|
||||
|
||||
Recommendation: persist preferences to disk (config file) and store secrets in the OS keychain/credential vault; avoid returning stored secrets to the UI unless explicitly requested. Evidence: `client/src-tauri/src/commands/preferences.rs:151`.
|
||||
|
||||
### 8) Annotation UI wired to stubbed gRPC methods (Medium) ✅ FIXED
|
||||
Observation: the new UI calls annotation add/delete, but Rust gRPC client methods were TODO/NotImplemented or returned empty lists.
|
||||
|
||||
**Fix Applied:** Replaced all 5 annotation stub methods in `grpc/client.rs` with actual gRPC calls:
|
||||
- Added `tonic_client()` helper to create `NoteFlowServiceClient` from existing `Channel`
|
||||
- Added `annotation_from_proto()` converter for proto → local type mapping
|
||||
- Added `impl From<i32> for AnnotationType` in `types.rs`
|
||||
- `add_annotation`, `get_annotation`, `list_annotations`, `update_annotation`, `delete_annotation` now make real server calls
|
||||
- Removed dead `AnnotationInfo::new()` constructor (no longer needed)
|
||||
|
||||
## Suggested Tests
|
||||
- Playback pause/resume keeps position/highlight updates flowing (unit/integration around playback events).
|
||||
- Playback speed/duration is correct for a 48kHz `.nfaudio` fixture.
|
||||
- `select_meeting` stops audio and resets position when switching meetings or when audio is missing.
|
||||
- Device selection resolves the intended microphone across restarts.
|
||||
|
||||
9032
docs/ui.md
9032
docs/ui.md
File diff suppressed because it is too large
Load Diff
@@ -12,7 +12,7 @@
|
||||
"files": true,
|
||||
"removeComments": true,
|
||||
"removeEmptyLines": true,
|
||||
"compress": false,
|
||||
"compress": true,
|
||||
"topFilesLength": 5,
|
||||
"showLineNumbers": false,
|
||||
"truncateBase64": false,
|
||||
@@ -26,7 +26,7 @@
|
||||
"includeLogsCount": 50
|
||||
}
|
||||
},
|
||||
"include": ["src/"],
|
||||
"include": ["src/", "client/"],
|
||||
"ignore": {
|
||||
"useGitignore": true,
|
||||
"useDefaultPatterns": true,
|
||||
|
||||
@@ -324,7 +324,8 @@ class MeetingService:
|
||||
executive_summary: str,
|
||||
key_points: list[KeyPoint] | None = None,
|
||||
action_items: list[ActionItem] | None = None,
|
||||
model_version: str = "",
|
||||
provider_name: str = "",
|
||||
model_name: str = "",
|
||||
) -> Summary:
|
||||
"""Save or update a meeting summary.
|
||||
|
||||
@@ -333,7 +334,8 @@ class MeetingService:
|
||||
executive_summary: Executive summary text.
|
||||
key_points: List of key points.
|
||||
action_items: List of action items.
|
||||
model_version: Model version that generated the summary.
|
||||
provider_name: Name of the provider that generated the summary.
|
||||
model_name: Name of the model that generated the summary.
|
||||
|
||||
Returns:
|
||||
Saved summary.
|
||||
@@ -344,7 +346,8 @@ class MeetingService:
|
||||
key_points=key_points or [],
|
||||
action_items=action_items or [],
|
||||
generated_at=datetime.now(UTC),
|
||||
model_version=model_version,
|
||||
provider_name=provider_name,
|
||||
model_name=model_name,
|
||||
)
|
||||
|
||||
async with self._uow:
|
||||
|
||||
@@ -21,6 +21,7 @@ class KeyPoint:
|
||||
segment_ids: list[int] = field(default_factory=list)
|
||||
start_time: float = 0.0
|
||||
end_time: float = 0.0
|
||||
position: int = 0 # Ordering within the summary
|
||||
|
||||
# Database primary key (set after persistence)
|
||||
db_id: int | None = None
|
||||
@@ -42,6 +43,9 @@ class ActionItem:
|
||||
due_date: datetime | None = None
|
||||
priority: int = 0 # 0=unspecified, 1=low, 2=medium, 3=high
|
||||
segment_ids: list[int] = field(default_factory=list)
|
||||
start_time: float = 0.0
|
||||
end_time: float = 0.0
|
||||
position: int = 0 # Ordering within the summary
|
||||
|
||||
# Database primary key (set after persistence)
|
||||
db_id: int | None = None
|
||||
@@ -72,7 +76,15 @@ class Summary:
|
||||
key_points: list[KeyPoint] = field(default_factory=list)
|
||||
action_items: list[ActionItem] = field(default_factory=list)
|
||||
generated_at: datetime | None = None
|
||||
model_version: str = ""
|
||||
|
||||
# Provider tracking
|
||||
provider_name: str = ""
|
||||
model_name: str = ""
|
||||
tokens_used: int | None = None
|
||||
latency_ms: float | None = None
|
||||
|
||||
# Verification/citation metadata
|
||||
verification: dict[str, object] = field(default_factory=dict)
|
||||
|
||||
# Database primary key (set after persistence)
|
||||
db_id: int | None = None
|
||||
@@ -108,3 +120,13 @@ class Summary:
|
||||
def unevidenced_actions(self) -> list[ActionItem]:
|
||||
"""Action items without transcript evidence."""
|
||||
return [ai for ai in self.action_items if not ai.has_evidence()]
|
||||
|
||||
@property
|
||||
def model_version(self) -> str:
|
||||
"""Backward-compatible model version string.
|
||||
|
||||
Computes from provider_name and model_name for API compatibility.
|
||||
"""
|
||||
if self.provider_name and self.model_name:
|
||||
return f"{self.provider_name}/{self.model_name}"
|
||||
return self.model_name or self.provider_name
|
||||
|
||||
@@ -114,5 +114,6 @@ class SummarizationMixin:
|
||||
executive_summary=executive,
|
||||
key_points=[],
|
||||
action_items=[],
|
||||
model_version="placeholder-v0",
|
||||
provider_name="placeholder",
|
||||
model_name="v0",
|
||||
)
|
||||
|
||||
@@ -60,7 +60,9 @@ class OrmConverter:
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def word_timing_to_orm_kwargs(word: DomainWordTiming) -> dict[str, str | float]:
|
||||
def word_timing_to_orm_kwargs(
|
||||
word: DomainWordTiming, word_index: int
|
||||
) -> dict[str, str | float | int]:
|
||||
"""Convert domain WordTiming to ORM model kwargs.
|
||||
|
||||
Return a dict of kwargs rather than instantiating WordTimingModel directly
|
||||
@@ -68,12 +70,14 @@ class OrmConverter:
|
||||
|
||||
Args:
|
||||
word: Domain WordTiming entity.
|
||||
word_index: Position of word in the segment.
|
||||
|
||||
Returns:
|
||||
Dict with word, start_time, end_time, probability for ORM construction.
|
||||
Dict with word, word_index, start_time, end_time, probability for ORM construction.
|
||||
"""
|
||||
return {
|
||||
"word": word.word,
|
||||
"word_index": word_index,
|
||||
"start_time": word.start_time,
|
||||
"end_time": word.end_time,
|
||||
"probability": word.probability,
|
||||
@@ -180,6 +184,7 @@ class OrmConverter:
|
||||
segment_ids=model.segment_ids,
|
||||
start_time=model.start_time,
|
||||
end_time=model.end_time,
|
||||
position=model.position,
|
||||
db_id=model.id,
|
||||
)
|
||||
|
||||
@@ -199,6 +204,9 @@ class OrmConverter:
|
||||
due_date=model.due_date,
|
||||
priority=model.priority,
|
||||
segment_ids=model.segment_ids,
|
||||
start_time=model.start_time,
|
||||
end_time=model.end_time,
|
||||
position=model.position,
|
||||
db_id=model.id,
|
||||
)
|
||||
|
||||
@@ -219,6 +227,10 @@ class OrmConverter:
|
||||
key_points=[OrmConverter.key_point_to_domain(kp) for kp in model.key_points],
|
||||
action_items=[OrmConverter.action_item_to_domain(ai) for ai in model.action_items],
|
||||
generated_at=model.generated_at,
|
||||
model_version=model.model_version or "",
|
||||
provider_name=model.provider_name,
|
||||
model_name=model.model_name,
|
||||
tokens_used=model.tokens_used,
|
||||
latency_ms=model.latency_ms,
|
||||
verification=dict(model.verification),
|
||||
db_id=model.id,
|
||||
)
|
||||
|
||||
@@ -1,396 +0,0 @@
|
||||
"""SQLAlchemy ORM models for NoteFlow."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import ClassVar
|
||||
from uuid import uuid4
|
||||
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy import (
|
||||
DateTime,
|
||||
Float,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
LargeBinary,
|
||||
String,
|
||||
Text,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
# Vector dimension for embeddings (OpenAI compatible)
|
||||
EMBEDDING_DIM = 1536
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""Base class for all ORM models."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class MeetingModel(Base):
|
||||
"""SQLAlchemy model for meetings table."""
|
||||
|
||||
__tablename__ = "meetings"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
title: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
state: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
started_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
ended_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
metadata_: Mapped[dict[str, str]] = mapped_column(
|
||||
"metadata",
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
wrapped_dek: Mapped[bytes | None] = mapped_column(
|
||||
LargeBinary,
|
||||
nullable=True,
|
||||
)
|
||||
asset_path: Mapped[str | None] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
segments: Mapped[list[SegmentModel]] = relationship(
|
||||
"SegmentModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
summary: Mapped[SummaryModel | None] = relationship(
|
||||
"SummaryModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
uselist=False,
|
||||
lazy="selectin",
|
||||
)
|
||||
annotations: Mapped[list[AnnotationModel]] = relationship(
|
||||
"AnnotationModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
|
||||
|
||||
class SegmentModel(Base):
|
||||
"""SQLAlchemy model for segments table."""
|
||||
|
||||
__tablename__ = "segments"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
meeting_id: Mapped[UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
segment_id: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
|
||||
language_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
avg_logprob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
no_speech_prob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
embedding: Mapped[list[float] | None] = mapped_column(
|
||||
Vector(EMBEDDING_DIM),
|
||||
nullable=True,
|
||||
)
|
||||
speaker_id: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||
speaker_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="segments",
|
||||
)
|
||||
words: Mapped[list[WordTimingModel]] = relationship(
|
||||
"WordTimingModel",
|
||||
back_populates="segment",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
|
||||
|
||||
class WordTimingModel(Base):
|
||||
"""SQLAlchemy model for word_timings table."""
|
||||
|
||||
__tablename__ = "word_timings"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
segment_pk: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey("noteflow.segments.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
word: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
probability: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
|
||||
# Relationships
|
||||
segment: Mapped[SegmentModel] = relationship(
|
||||
"SegmentModel",
|
||||
back_populates="words",
|
||||
)
|
||||
|
||||
|
||||
class SummaryModel(Base):
|
||||
"""SQLAlchemy model for summaries table."""
|
||||
|
||||
__tablename__ = "summaries"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
meeting_id: Mapped[UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
unique=True,
|
||||
)
|
||||
executive_summary: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
generated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
model_version: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="summary",
|
||||
)
|
||||
key_points: Mapped[list[KeyPointModel]] = relationship(
|
||||
"KeyPointModel",
|
||||
back_populates="summary",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
action_items: Mapped[list[ActionItemModel]] = relationship(
|
||||
"ActionItemModel",
|
||||
back_populates="summary",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
|
||||
|
||||
class KeyPointModel(Base):
|
||||
"""SQLAlchemy model for key_points table."""
|
||||
|
||||
__tablename__ = "key_points"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
summary_id: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
segment_ids: Mapped[list[int]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=list,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
summary: Mapped[SummaryModel] = relationship(
|
||||
"SummaryModel",
|
||||
back_populates="key_points",
|
||||
)
|
||||
|
||||
|
||||
class ActionItemModel(Base):
|
||||
"""SQLAlchemy model for action_items table."""
|
||||
|
||||
__tablename__ = "action_items"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
summary_id: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
assignee: Mapped[str] = mapped_column(String(255), nullable=False, default="")
|
||||
due_date: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
segment_ids: Mapped[list[int]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=list,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
summary: Mapped[SummaryModel] = relationship(
|
||||
"SummaryModel",
|
||||
back_populates="action_items",
|
||||
)
|
||||
|
||||
|
||||
class AnnotationModel(Base):
|
||||
"""SQLAlchemy model for annotations table.
|
||||
|
||||
User-created annotations during recording. Distinct from LLM-extracted
|
||||
ActionItem/KeyPoint which belong to Summary. Annotations belong directly
|
||||
to Meeting and are created in real-time.
|
||||
"""
|
||||
|
||||
__tablename__ = "annotations"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
annotation_id: Mapped[UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
nullable=False,
|
||||
unique=True,
|
||||
default=uuid4,
|
||||
)
|
||||
meeting_id: Mapped[UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
annotation_type: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
segment_ids: Mapped[list[int]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=list,
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="annotations",
|
||||
)
|
||||
|
||||
|
||||
class UserPreferencesModel(Base):
|
||||
"""SQLAlchemy model for user_preferences table.
|
||||
|
||||
Stores key-value user preferences for persistence across server restarts.
|
||||
Currently used for cloud consent and other settings.
|
||||
"""
|
||||
|
||||
__tablename__ = "user_preferences"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
key: Mapped[str] = mapped_column(String(64), unique=True, index=True, nullable=False)
|
||||
value: Mapped[dict[str, object]] = mapped_column(JSONB, nullable=False, default=dict)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
|
||||
|
||||
class DiarizationJobModel(Base):
|
||||
"""SQLAlchemy model for diarization_jobs table.
|
||||
|
||||
Tracks background speaker diarization jobs. Persisting job state
|
||||
allows recovery after server restart and provides client visibility.
|
||||
"""
|
||||
|
||||
__tablename__ = "diarization_jobs"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True)
|
||||
meeting_id: Mapped[UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
status: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
segments_updated: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
speaker_ids: Mapped[list[str]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=list,
|
||||
)
|
||||
error_message: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
|
||||
|
||||
class StreamingDiarizationTurnModel(Base):
|
||||
"""SQLAlchemy model for streaming_diarization_turns table.
|
||||
|
||||
Stores speaker turns from real-time streaming diarization for crash
|
||||
resilience. These turns are persisted as they arrive and can be reloaded
|
||||
if the server restarts during a recording session.
|
||||
"""
|
||||
|
||||
__tablename__ = "streaming_diarization_turns"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
meeting_id: Mapped[UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
speaker: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
100
src/noteflow/infrastructure/persistence/models/__init__.py
Normal file
100
src/noteflow/infrastructure/persistence/models/__init__.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""SQLAlchemy ORM models for NoteFlow.
|
||||
|
||||
All models are re-exported here for backward compatibility with existing imports.
|
||||
Models are organized into subdomain packages:
|
||||
- core/: Meeting, segments, summaries, annotations, diarization
|
||||
- identity/: Workspaces, users, settings
|
||||
- entities/: Persons, speakers (knowledge graph entities)
|
||||
- organization/: Tags, tasks
|
||||
- integrations/: External service integrations, calendar
|
||||
"""
|
||||
|
||||
from noteflow.infrastructure.persistence.models._base import (
|
||||
DEFAULT_USER_ID,
|
||||
DEFAULT_WORKSPACE_ID,
|
||||
EMBEDDING_DIM,
|
||||
Base,
|
||||
)
|
||||
|
||||
# Core domain models
|
||||
from noteflow.infrastructure.persistence.models.core import (
|
||||
ActionItemModel,
|
||||
AnnotationModel,
|
||||
DiarizationJobModel,
|
||||
KeyPointModel,
|
||||
MeetingModel,
|
||||
SegmentModel,
|
||||
StreamingDiarizationTurnModel,
|
||||
SummaryModel,
|
||||
WordTimingModel,
|
||||
)
|
||||
|
||||
# Entity models (knowledge graph)
|
||||
from noteflow.infrastructure.persistence.models.entities import (
|
||||
MeetingSpeakerModel,
|
||||
PersonModel,
|
||||
)
|
||||
|
||||
# Identity and tenancy models
|
||||
from noteflow.infrastructure.persistence.models.identity import (
|
||||
SettingsModel,
|
||||
UserModel,
|
||||
UserPreferencesModel,
|
||||
WorkspaceMembershipModel,
|
||||
WorkspaceModel,
|
||||
)
|
||||
|
||||
# Integration models
|
||||
from noteflow.infrastructure.persistence.models.integrations import (
|
||||
CalendarEventModel,
|
||||
ExternalRefModel,
|
||||
IntegrationModel,
|
||||
IntegrationSecretModel,
|
||||
IntegrationSyncRunModel,
|
||||
MeetingCalendarLinkModel,
|
||||
)
|
||||
|
||||
# Organization models
|
||||
from noteflow.infrastructure.persistence.models.organization import (
|
||||
MeetingTagModel,
|
||||
TagModel,
|
||||
TaskModel,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"DEFAULT_USER_ID",
|
||||
"DEFAULT_WORKSPACE_ID",
|
||||
"EMBEDDING_DIM",
|
||||
# Core domain
|
||||
"ActionItemModel",
|
||||
"AnnotationModel",
|
||||
# Base and constants
|
||||
"Base",
|
||||
# Integrations
|
||||
"CalendarEventModel",
|
||||
"DiarizationJobModel",
|
||||
"ExternalRefModel",
|
||||
"IntegrationModel",
|
||||
"IntegrationSecretModel",
|
||||
"IntegrationSyncRunModel",
|
||||
"KeyPointModel",
|
||||
"MeetingCalendarLinkModel",
|
||||
"MeetingModel",
|
||||
# Entities
|
||||
"MeetingSpeakerModel",
|
||||
# Organization
|
||||
"MeetingTagModel",
|
||||
"PersonModel",
|
||||
"SegmentModel",
|
||||
# Identity
|
||||
"SettingsModel",
|
||||
"StreamingDiarizationTurnModel",
|
||||
"SummaryModel",
|
||||
"TagModel",
|
||||
"TaskModel",
|
||||
"UserModel",
|
||||
"UserPreferencesModel",
|
||||
"WordTimingModel",
|
||||
"WorkspaceMembershipModel",
|
||||
"WorkspaceModel",
|
||||
]
|
||||
18
src/noteflow/infrastructure/persistence/models/_base.py
Normal file
18
src/noteflow/infrastructure/persistence/models/_base.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""Base class and shared constants for SQLAlchemy ORM models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy.orm import DeclarativeBase
|
||||
|
||||
# Vector dimension for embeddings (OpenAI compatible)
|
||||
EMBEDDING_DIM = 1536
|
||||
|
||||
# Default workspace/user UUID for single-user mode
|
||||
DEFAULT_WORKSPACE_ID = "00000000-0000-0000-0000-000000000001"
|
||||
DEFAULT_USER_ID = "00000000-0000-0000-0000-000000000001"
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""Base class for all ORM models."""
|
||||
|
||||
pass
|
||||
@@ -0,0 +1,29 @@
|
||||
"""Core meeting domain models."""
|
||||
|
||||
from noteflow.infrastructure.persistence.models.core.annotation import AnnotationModel
|
||||
from noteflow.infrastructure.persistence.models.core.diarization import (
|
||||
DiarizationJobModel,
|
||||
StreamingDiarizationTurnModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import (
|
||||
MeetingModel,
|
||||
SegmentModel,
|
||||
WordTimingModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.core.summary import (
|
||||
ActionItemModel,
|
||||
KeyPointModel,
|
||||
SummaryModel,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ActionItemModel",
|
||||
"AnnotationModel",
|
||||
"DiarizationJobModel",
|
||||
"KeyPointModel",
|
||||
"MeetingModel",
|
||||
"SegmentModel",
|
||||
"StreamingDiarizationTurnModel",
|
||||
"SummaryModel",
|
||||
"WordTimingModel",
|
||||
]
|
||||
@@ -0,0 +1,64 @@
|
||||
"""User annotation models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
|
||||
|
||||
|
||||
class AnnotationModel(Base):
|
||||
"""Represent a user-created annotation during recording.
|
||||
|
||||
Distinct from LLM-extracted ActionItem/KeyPoint which belong to Summary.
|
||||
Annotations belong directly to Meeting and are created in real-time.
|
||||
"""
|
||||
|
||||
__tablename__ = "annotations"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
annotation_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
nullable=False,
|
||||
unique=True,
|
||||
default=uuid4,
|
||||
)
|
||||
meeting_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
annotation_type: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
segment_ids: Mapped[list[int]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=list,
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="annotations",
|
||||
)
|
||||
@@ -0,0 +1,96 @@
|
||||
"""Speaker diarization models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
|
||||
from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
|
||||
|
||||
|
||||
class DiarizationJobModel(Base):
|
||||
"""Track background speaker diarization jobs.
|
||||
|
||||
Persisting job state allows recovery after server restart and provides
|
||||
client visibility into job progress.
|
||||
"""
|
||||
|
||||
__tablename__ = "diarization_jobs"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True)
|
||||
meeting_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
status: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
segments_updated: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
speaker_ids: Mapped[list[str]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=list,
|
||||
)
|
||||
error_message: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="diarization_jobs",
|
||||
)
|
||||
|
||||
|
||||
class StreamingDiarizationTurnModel(Base):
|
||||
"""Store speaker turns from real-time streaming diarization.
|
||||
|
||||
These turns are persisted as they arrive for crash resilience
|
||||
and can be reloaded if the server restarts during a recording session.
|
||||
"""
|
||||
|
||||
__tablename__ = "streaming_diarization_turns"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
meeting_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
speaker: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="streaming_turns",
|
||||
)
|
||||
244
src/noteflow/infrastructure/persistence/models/core/meeting.py
Normal file
244
src/noteflow/infrastructure/persistence/models/core/meeting.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""Core meeting domain models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
from uuid import uuid4
|
||||
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy import (
|
||||
DateTime,
|
||||
Float,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
LargeBinary,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import DEFAULT_USER_ID, DEFAULT_WORKSPACE_ID, EMBEDDING_DIM, Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.annotation import AnnotationModel
|
||||
from noteflow.infrastructure.persistence.models.core.diarization import (
|
||||
DiarizationJobModel,
|
||||
StreamingDiarizationTurnModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.core.summary import SummaryModel
|
||||
from noteflow.infrastructure.persistence.models.entities.speaker import (
|
||||
MeetingSpeakerModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.identity.identity import (
|
||||
UserModel,
|
||||
WorkspaceModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.integrations.integration import (
|
||||
MeetingCalendarLinkModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.organization.tagging import (
|
||||
MeetingTagModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
|
||||
|
||||
|
||||
class MeetingModel(Base):
|
||||
"""Represent a meeting recording session."""
|
||||
|
||||
__tablename__ = "meetings"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
# Forward-looking tenancy fields with safe defaults for current single-user mode
|
||||
workspace_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.workspaces.id", ondelete="RESTRICT"),
|
||||
nullable=False,
|
||||
default=lambda: PyUUID(DEFAULT_WORKSPACE_ID),
|
||||
)
|
||||
created_by_id: Mapped[PyUUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.users.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
default=lambda: PyUUID(DEFAULT_USER_ID),
|
||||
)
|
||||
|
||||
title: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
state: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
started_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
ended_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
metadata_: Mapped[dict[str, object]] = mapped_column(
|
||||
"metadata",
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
wrapped_dek: Mapped[bytes | None] = mapped_column(
|
||||
LargeBinary,
|
||||
nullable=True,
|
||||
)
|
||||
asset_path: Mapped[str | None] = mapped_column(
|
||||
Text,
|
||||
nullable=True,
|
||||
)
|
||||
# Soft delete support
|
||||
deleted_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
workspace: Mapped[WorkspaceModel] = relationship(
|
||||
"WorkspaceModel",
|
||||
back_populates="meetings",
|
||||
)
|
||||
created_by: Mapped[UserModel | None] = relationship(
|
||||
"UserModel",
|
||||
back_populates="created_meetings",
|
||||
foreign_keys=[created_by_id],
|
||||
)
|
||||
segments: Mapped[list[SegmentModel]] = relationship(
|
||||
"SegmentModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
summary: Mapped[SummaryModel | None] = relationship(
|
||||
"SummaryModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
uselist=False,
|
||||
lazy="selectin",
|
||||
)
|
||||
annotations: Mapped[list[AnnotationModel]] = relationship(
|
||||
"AnnotationModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
diarization_jobs: Mapped[list[DiarizationJobModel]] = relationship(
|
||||
"DiarizationJobModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
streaming_turns: Mapped[list[StreamingDiarizationTurnModel]] = relationship(
|
||||
"StreamingDiarizationTurnModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
speakers: Mapped[list[MeetingSpeakerModel]] = relationship(
|
||||
"MeetingSpeakerModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
meeting_tags: Mapped[list[MeetingTagModel]] = relationship(
|
||||
"MeetingTagModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
tasks: Mapped[list[TaskModel]] = relationship(
|
||||
"TaskModel",
|
||||
back_populates="meeting",
|
||||
)
|
||||
calendar_links: Mapped[list[MeetingCalendarLinkModel]] = relationship(
|
||||
"MeetingCalendarLinkModel",
|
||||
back_populates="meeting",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class SegmentModel(Base):
|
||||
"""Represent a transcript segment within a meeting."""
|
||||
|
||||
__tablename__ = "segments"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint("meeting_id", "segment_id", name="segments_unique_per_meeting"),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
meeting_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
segment_id: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
language: Mapped[str] = mapped_column(String(10), nullable=False, default="en")
|
||||
language_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
avg_logprob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
no_speech_prob: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
embedding: Mapped[list[float] | None] = mapped_column(
|
||||
Vector(EMBEDDING_DIM),
|
||||
nullable=True,
|
||||
)
|
||||
speaker_id: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||
speaker_confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="segments",
|
||||
)
|
||||
words: Mapped[list[WordTimingModel]] = relationship(
|
||||
"WordTimingModel",
|
||||
back_populates="segment",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
|
||||
|
||||
class WordTimingModel(Base):
|
||||
"""Represent word-level timing within a segment."""
|
||||
|
||||
__tablename__ = "word_timings"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint("segment_pk", "word_index", name="word_timings_unique_per_segment"),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
segment_pk: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey("noteflow.segments.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
word_index: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
word: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
probability: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
|
||||
# Relationships
|
||||
segment: Mapped[SegmentModel] = relationship(
|
||||
"SegmentModel",
|
||||
back_populates="words",
|
||||
)
|
||||
143
src/noteflow/infrastructure/persistence/models/core/summary.py
Normal file
143
src/noteflow/infrastructure/persistence/models/core/summary.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""Summary and intelligence output models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
|
||||
from sqlalchemy import DateTime, Float, ForeignKey, Integer, Text, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
|
||||
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
|
||||
|
||||
|
||||
class SummaryModel(Base):
|
||||
"""Represent an LLM-generated meeting summary."""
|
||||
|
||||
__tablename__ = "summaries"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
meeting_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
unique=True,
|
||||
)
|
||||
executive_summary: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
generated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
# Provider tracking
|
||||
provider_name: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
model_name: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
tokens_used: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
latency_ms: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
# Verification/citation data
|
||||
verification: Mapped[dict[str, object]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="summary",
|
||||
)
|
||||
key_points: Mapped[list[KeyPointModel]] = relationship(
|
||||
"KeyPointModel",
|
||||
back_populates="summary",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
action_items: Mapped[list[ActionItemModel]] = relationship(
|
||||
"ActionItemModel",
|
||||
back_populates="summary",
|
||||
cascade="all, delete-orphan",
|
||||
lazy="selectin",
|
||||
)
|
||||
|
||||
|
||||
class KeyPointModel(Base):
|
||||
"""Represent an extracted key point from a summary."""
|
||||
|
||||
__tablename__ = "key_points"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint("summary_id", "position", name="key_points_unique_position"),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
summary_id: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
position: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
segment_ids: Mapped[list[int]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=list,
|
||||
)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
|
||||
# Relationships
|
||||
summary: Mapped[SummaryModel] = relationship(
|
||||
"SummaryModel",
|
||||
back_populates="key_points",
|
||||
)
|
||||
|
||||
|
||||
class ActionItemModel(Base):
|
||||
"""Represent an extracted action item from a summary."""
|
||||
|
||||
__tablename__ = "action_items"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint("summary_id", "position", name="action_items_unique_position"),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
summary_id: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
position: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
segment_ids: Mapped[list[int]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=list,
|
||||
)
|
||||
start_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
end_time: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
|
||||
assignee: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
due_date: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
|
||||
# Relationships
|
||||
summary: Mapped[SummaryModel] = relationship(
|
||||
"SummaryModel",
|
||||
back_populates="action_items",
|
||||
)
|
||||
tasks: Mapped[list[TaskModel]] = relationship(
|
||||
"TaskModel",
|
||||
back_populates="action_item",
|
||||
)
|
||||
@@ -0,0 +1,11 @@
|
||||
"""Entity models for knowledge graph (persons, speakers, future: orgs, topics)."""
|
||||
|
||||
from noteflow.infrastructure.persistence.models.entities.speaker import (
|
||||
MeetingSpeakerModel,
|
||||
PersonModel,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"MeetingSpeakerModel",
|
||||
"PersonModel",
|
||||
]
|
||||
@@ -0,0 +1,115 @@
|
||||
"""Speaker identity models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, String, Text, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
|
||||
from noteflow.infrastructure.persistence.models.identity.identity import (
|
||||
WorkspaceModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
|
||||
|
||||
|
||||
class PersonModel(Base):
|
||||
"""Represent a known person (speaker identity) in a workspace.
|
||||
|
||||
Enables cross-meeting speaker recognition once voice embeddings are added.
|
||||
"""
|
||||
|
||||
__tablename__ = "persons"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint("workspace_id", "email", name="persons_unique_email_per_workspace"),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
workspace_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
display_name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
email: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
metadata_: Mapped[dict[str, object]] = mapped_column(
|
||||
"metadata",
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
workspace: Mapped[WorkspaceModel] = relationship(
|
||||
"WorkspaceModel",
|
||||
back_populates="persons",
|
||||
)
|
||||
meeting_speakers: Mapped[list[MeetingSpeakerModel]] = relationship(
|
||||
"MeetingSpeakerModel",
|
||||
back_populates="person",
|
||||
)
|
||||
assigned_tasks: Mapped[list[TaskModel]] = relationship(
|
||||
"TaskModel",
|
||||
back_populates="assignee_person",
|
||||
)
|
||||
|
||||
|
||||
class MeetingSpeakerModel(Base):
|
||||
"""Map speaker labels to display names and persons within a meeting."""
|
||||
|
||||
__tablename__ = "meeting_speakers"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
meeting_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
speaker_id: Mapped[str] = mapped_column(String(50), primary_key=True)
|
||||
display_name: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
person_id: Mapped[PyUUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.persons.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="speakers",
|
||||
)
|
||||
person: Mapped[PersonModel | None] = relationship(
|
||||
"PersonModel",
|
||||
back_populates="meeting_speakers",
|
||||
)
|
||||
@@ -0,0 +1,19 @@
|
||||
"""Identity and tenancy models."""
|
||||
|
||||
from noteflow.infrastructure.persistence.models.identity.identity import (
|
||||
UserModel,
|
||||
WorkspaceMembershipModel,
|
||||
WorkspaceModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.identity.settings import (
|
||||
SettingsModel,
|
||||
UserPreferencesModel,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"SettingsModel",
|
||||
"UserModel",
|
||||
"UserPreferencesModel",
|
||||
"WorkspaceMembershipModel",
|
||||
"WorkspaceModel",
|
||||
]
|
||||
@@ -0,0 +1,150 @@
|
||||
"""Identity and tenancy models for multi-user support."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
|
||||
from noteflow.infrastructure.persistence.models.entities.speaker import PersonModel
|
||||
from noteflow.infrastructure.persistence.models.organization.tagging import TagModel
|
||||
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
|
||||
|
||||
|
||||
class WorkspaceModel(Base):
|
||||
"""Represent a workspace for multi-tenant support."""
|
||||
|
||||
__tablename__ = "workspaces"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(UUID(as_uuid=True), primary_key=True)
|
||||
slug: Mapped[str | None] = mapped_column(Text, unique=True, nullable=True)
|
||||
name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
metadata_: Mapped[dict[str, object]] = mapped_column(
|
||||
"metadata",
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
memberships: Mapped[list[WorkspaceMembershipModel]] = relationship(
|
||||
"WorkspaceMembershipModel",
|
||||
back_populates="workspace",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
meetings: Mapped[list[MeetingModel]] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="workspace",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
persons: Mapped[list[PersonModel]] = relationship(
|
||||
"PersonModel",
|
||||
back_populates="workspace",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
tags: Mapped[list[TagModel]] = relationship(
|
||||
"TagModel",
|
||||
back_populates="workspace",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
tasks: Mapped[list[TaskModel]] = relationship(
|
||||
"TaskModel",
|
||||
back_populates="workspace",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class UserModel(Base):
|
||||
"""Represent a user account."""
|
||||
|
||||
__tablename__ = "users"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(UUID(as_uuid=True), primary_key=True)
|
||||
email: Mapped[str | None] = mapped_column(Text, unique=True, nullable=True)
|
||||
display_name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
metadata_: Mapped[dict[str, object]] = mapped_column(
|
||||
"metadata",
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
memberships: Mapped[list[WorkspaceMembershipModel]] = relationship(
|
||||
"WorkspaceMembershipModel",
|
||||
back_populates="user",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
created_meetings: Mapped[list[MeetingModel]] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="created_by",
|
||||
foreign_keys="MeetingModel.created_by_id",
|
||||
)
|
||||
|
||||
|
||||
class WorkspaceMembershipModel(Base):
|
||||
"""Represent workspace membership with role."""
|
||||
|
||||
__tablename__ = "workspace_memberships"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
workspace_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
user_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.users.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
role: Mapped[str] = mapped_column(String(50), nullable=False, default="owner")
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
workspace: Mapped[WorkspaceModel] = relationship(
|
||||
"WorkspaceModel",
|
||||
back_populates="memberships",
|
||||
)
|
||||
user: Mapped[UserModel] = relationship(
|
||||
"UserModel",
|
||||
back_populates="memberships",
|
||||
)
|
||||
@@ -0,0 +1,101 @@
|
||||
"""Settings and preferences models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import CheckConstraint, DateTime, ForeignKey, String, Text, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.identity.identity import (
|
||||
UserModel,
|
||||
WorkspaceModel,
|
||||
)
|
||||
|
||||
|
||||
class SettingsModel(Base):
|
||||
"""Represent scoped settings (system, workspace, or user level)."""
|
||||
|
||||
__tablename__ = "settings"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, CheckConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint(
|
||||
"scope",
|
||||
"workspace_id",
|
||||
"user_id",
|
||||
"key",
|
||||
name="settings_unique_scope_key",
|
||||
),
|
||||
CheckConstraint(
|
||||
"scope IN ('system', 'workspace', 'user')",
|
||||
name="settings_scope_chk",
|
||||
),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
scope: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
workspace_id: Mapped[PyUUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
|
||||
nullable=True,
|
||||
)
|
||||
user_id: Mapped[PyUUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.users.id", ondelete="CASCADE"),
|
||||
nullable=True,
|
||||
)
|
||||
key: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
value: Mapped[dict[str, object]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
workspace: Mapped[WorkspaceModel | None] = relationship("WorkspaceModel")
|
||||
user: Mapped[UserModel | None] = relationship("UserModel")
|
||||
|
||||
|
||||
class UserPreferencesModel(Base):
|
||||
"""Store key-value user preferences for persistence across server restarts.
|
||||
|
||||
Simple KV store compatible with current codebase pattern.
|
||||
Currently used for cloud consent and other settings.
|
||||
"""
|
||||
|
||||
__tablename__ = "user_preferences"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
# Using key as primary key (matching schema.sql design for KV store simplicity)
|
||||
key: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||
value: Mapped[dict[str, object]] = mapped_column(JSONB, nullable=False, default=dict)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
@@ -0,0 +1,19 @@
|
||||
"""External integration models (calendar, auth, PKM, etc.)."""
|
||||
|
||||
from noteflow.infrastructure.persistence.models.integrations.integration import (
|
||||
CalendarEventModel,
|
||||
ExternalRefModel,
|
||||
IntegrationModel,
|
||||
IntegrationSecretModel,
|
||||
IntegrationSyncRunModel,
|
||||
MeetingCalendarLinkModel,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"CalendarEventModel",
|
||||
"ExternalRefModel",
|
||||
"IntegrationModel",
|
||||
"IntegrationSecretModel",
|
||||
"IntegrationSyncRunModel",
|
||||
"MeetingCalendarLinkModel",
|
||||
]
|
||||
@@ -0,0 +1,323 @@
|
||||
"""Integration and calendar models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import (
|
||||
CheckConstraint,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
LargeBinary,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
|
||||
from noteflow.infrastructure.persistence.models.identity.identity import (
|
||||
WorkspaceModel,
|
||||
)
|
||||
|
||||
|
||||
class IntegrationModel(Base):
|
||||
"""Represent an external service integration."""
|
||||
|
||||
__tablename__ = "integrations"
|
||||
__table_args__: ClassVar[tuple[CheckConstraint, CheckConstraint, dict[str, str]]] = (
|
||||
CheckConstraint(
|
||||
"type IN ('auth', 'email', 'calendar', 'pkm', 'custom')",
|
||||
name="integrations_type_chk",
|
||||
),
|
||||
CheckConstraint(
|
||||
"status IN ('disconnected', 'connected', 'error')",
|
||||
name="integrations_status_chk",
|
||||
),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
workspace_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
type: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
status: Mapped[str] = mapped_column(Text, nullable=False, default="disconnected")
|
||||
config: Mapped[dict[str, object]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
last_sync: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
workspace: Mapped[WorkspaceModel] = relationship("WorkspaceModel")
|
||||
secrets: Mapped[list[IntegrationSecretModel]] = relationship(
|
||||
"IntegrationSecretModel",
|
||||
back_populates="integration",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
sync_runs: Mapped[list[IntegrationSyncRunModel]] = relationship(
|
||||
"IntegrationSyncRunModel",
|
||||
back_populates="integration",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
calendar_events: Mapped[list[CalendarEventModel]] = relationship(
|
||||
"CalendarEventModel",
|
||||
back_populates="integration",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
external_refs: Mapped[list[ExternalRefModel]] = relationship(
|
||||
"ExternalRefModel",
|
||||
back_populates="integration",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class IntegrationSecretModel(Base):
|
||||
"""Store encrypted secrets for an integration."""
|
||||
|
||||
__tablename__ = "integration_secrets"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
integration_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.integrations.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
secret_key: Mapped[str] = mapped_column(Text, primary_key=True)
|
||||
secret_value: Mapped[bytes] = mapped_column(LargeBinary, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
integration: Mapped[IntegrationModel] = relationship(
|
||||
"IntegrationModel",
|
||||
back_populates="secrets",
|
||||
)
|
||||
|
||||
|
||||
class IntegrationSyncRunModel(Base):
|
||||
"""Track sync operation history for an integration."""
|
||||
|
||||
__tablename__ = "integration_sync_runs"
|
||||
__table_args__: ClassVar[tuple[CheckConstraint, dict[str, str]]] = (
|
||||
CheckConstraint(
|
||||
"status IN ('running', 'success', 'error')",
|
||||
name="integration_sync_runs_status_chk",
|
||||
),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
integration_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.integrations.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
status: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
started_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
ended_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
stats: Mapped[dict[str, object]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
integration: Mapped[IntegrationModel] = relationship(
|
||||
"IntegrationModel",
|
||||
back_populates="sync_runs",
|
||||
)
|
||||
|
||||
|
||||
class CalendarEventModel(Base):
|
||||
"""Cache calendar event data from an integration."""
|
||||
|
||||
__tablename__ = "calendar_events"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint(
|
||||
"integration_id",
|
||||
"external_id",
|
||||
name="calendar_events_unique_external_id",
|
||||
),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
integration_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.integrations.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
external_id: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
calendar_id: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
calendar_name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
title: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
start_time: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
)
|
||||
end_time: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
)
|
||||
location: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
attendees: Mapped[list[str] | None] = mapped_column(ARRAY(Text), nullable=True)
|
||||
is_all_day: Mapped[bool] = mapped_column(default=False)
|
||||
meeting_link: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
raw: Mapped[dict[str, object]] = mapped_column(
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
integration: Mapped[IntegrationModel] = relationship(
|
||||
"IntegrationModel",
|
||||
back_populates="calendar_events",
|
||||
)
|
||||
meeting_links: Mapped[list[MeetingCalendarLinkModel]] = relationship(
|
||||
"MeetingCalendarLinkModel",
|
||||
back_populates="calendar_event",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class MeetingCalendarLinkModel(Base):
|
||||
"""Junction table linking meetings to calendar events."""
|
||||
|
||||
__tablename__ = "meeting_calendar_links"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
meeting_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
calendar_event_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.calendar_events.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="calendar_links",
|
||||
)
|
||||
calendar_event: Mapped[CalendarEventModel] = relationship(
|
||||
"CalendarEventModel",
|
||||
back_populates="meeting_links",
|
||||
)
|
||||
|
||||
|
||||
class ExternalRefModel(Base):
|
||||
"""Track references to external entities (generic ID mapping)."""
|
||||
|
||||
__tablename__ = "external_refs"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint(
|
||||
"integration_id",
|
||||
"entity_type",
|
||||
"entity_id",
|
||||
name="external_refs_unique_entity",
|
||||
),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
integration_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.integrations.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
entity_type: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
entity_id: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
external_id: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
external_url: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
integration: Mapped[IntegrationModel] = relationship(
|
||||
"IntegrationModel",
|
||||
back_populates="external_refs",
|
||||
)
|
||||
@@ -0,0 +1,13 @@
|
||||
"""Organization and workflow models (tagging, tasks)."""
|
||||
|
||||
from noteflow.infrastructure.persistence.models.organization.tagging import (
|
||||
MeetingTagModel,
|
||||
TagModel,
|
||||
)
|
||||
from noteflow.infrastructure.persistence.models.organization.task import TaskModel
|
||||
|
||||
__all__ = [
|
||||
"MeetingTagModel",
|
||||
"TagModel",
|
||||
"TaskModel",
|
||||
]
|
||||
@@ -0,0 +1,89 @@
|
||||
"""Tagging models for meetings."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Text, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
|
||||
from noteflow.infrastructure.persistence.models.identity.identity import (
|
||||
WorkspaceModel,
|
||||
)
|
||||
|
||||
|
||||
class TagModel(Base):
|
||||
"""Represent a tag that can be applied to meetings."""
|
||||
|
||||
__tablename__ = "tags"
|
||||
__table_args__: ClassVar[tuple[UniqueConstraint, dict[str, str]]] = (
|
||||
UniqueConstraint("workspace_id", "name", name="tags_unique_name_per_workspace"),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
workspace_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
color: Mapped[str] = mapped_column(Text, nullable=False, default="#888888")
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
workspace: Mapped[WorkspaceModel] = relationship(
|
||||
"WorkspaceModel",
|
||||
back_populates="tags",
|
||||
)
|
||||
meeting_tags: Mapped[list[MeetingTagModel]] = relationship(
|
||||
"MeetingTagModel",
|
||||
back_populates="tag",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class MeetingTagModel(Base):
|
||||
"""Junction table linking meetings to tags."""
|
||||
|
||||
__tablename__ = "meeting_tags"
|
||||
__table_args__: ClassVar[dict[str, str]] = {"schema": "noteflow"}
|
||||
|
||||
meeting_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
tag_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.tags.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
meeting: Mapped[MeetingModel] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="meeting_tags",
|
||||
)
|
||||
tag: Mapped[TagModel] = relationship(
|
||||
"TagModel",
|
||||
back_populates="meeting_tags",
|
||||
)
|
||||
@@ -0,0 +1,110 @@
|
||||
"""Task management models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from uuid import UUID as PyUUID
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import CheckConstraint, DateTime, ForeignKey, Integer, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from noteflow.domain.utils.time import utc_now
|
||||
|
||||
from .._base import Base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from noteflow.infrastructure.persistence.models.core.meeting import MeetingModel
|
||||
from noteflow.infrastructure.persistence.models.core.summary import ActionItemModel
|
||||
from noteflow.infrastructure.persistence.models.entities.speaker import PersonModel
|
||||
from noteflow.infrastructure.persistence.models.identity.identity import (
|
||||
WorkspaceModel,
|
||||
)
|
||||
|
||||
|
||||
class TaskModel(Base):
|
||||
"""Represent a user-managed task, optionally derived from an action item."""
|
||||
|
||||
__tablename__ = "tasks"
|
||||
__table_args__: ClassVar[tuple[CheckConstraint, dict[str, str]]] = (
|
||||
CheckConstraint(
|
||||
"status IN ('open', 'done', 'dismissed')",
|
||||
name="tasks_status_chk",
|
||||
),
|
||||
{"schema": "noteflow"},
|
||||
)
|
||||
|
||||
id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid4,
|
||||
)
|
||||
workspace_id: Mapped[PyUUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.workspaces.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
meeting_id: Mapped[PyUUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.meetings.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
action_item_id: Mapped[int | None] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey("noteflow.action_items.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
text: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
status: Mapped[str] = mapped_column(Text, nullable=False, default="open")
|
||||
assignee_person_id: Mapped[PyUUID | None] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("noteflow.persons.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
due_date: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
priority: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
)
|
||||
completed_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
metadata_: Mapped[dict[str, object]] = mapped_column(
|
||||
"metadata",
|
||||
JSONB,
|
||||
nullable=False,
|
||||
default=dict,
|
||||
)
|
||||
|
||||
# Relationships
|
||||
workspace: Mapped[WorkspaceModel] = relationship(
|
||||
"WorkspaceModel",
|
||||
back_populates="tasks",
|
||||
)
|
||||
meeting: Mapped[MeetingModel | None] = relationship(
|
||||
"MeetingModel",
|
||||
back_populates="tasks",
|
||||
)
|
||||
action_item: Mapped[ActionItemModel | None] = relationship(
|
||||
"ActionItemModel",
|
||||
back_populates="tasks",
|
||||
)
|
||||
assignee_person: Mapped[PersonModel | None] = relationship(
|
||||
"PersonModel",
|
||||
back_populates="assigned_tasks",
|
||||
)
|
||||
@@ -40,9 +40,9 @@ class SqlAlchemySegmentRepository(BaseRepository):
|
||||
speaker_confidence=segment.speaker_confidence,
|
||||
)
|
||||
|
||||
# Add word timings
|
||||
for word in segment.words:
|
||||
word_kwargs = OrmConverter.word_timing_to_orm_kwargs(word)
|
||||
# Add word timings with position index
|
||||
for word_index, word in enumerate(segment.words):
|
||||
word_kwargs = OrmConverter.word_timing_to_orm_kwargs(word, word_index)
|
||||
word_model = WordTimingModel(**word_kwargs)
|
||||
model.words.append(word_model)
|
||||
|
||||
|
||||
@@ -32,9 +32,10 @@ class SqlAlchemySummaryRepository(BaseRepository):
|
||||
key_points: Key points to add. Their db_id fields are updated in place.
|
||||
"""
|
||||
models: list[tuple[KeyPointModel, KeyPoint]] = []
|
||||
for kp in key_points:
|
||||
for position, kp in enumerate(key_points):
|
||||
kp_model = KeyPointModel(
|
||||
summary_id=summary_id,
|
||||
position=position,
|
||||
text=kp.text,
|
||||
start_time=kp.start_time,
|
||||
end_time=kp.end_time,
|
||||
@@ -46,6 +47,7 @@ class SqlAlchemySummaryRepository(BaseRepository):
|
||||
await self._session.flush()
|
||||
for kp_model, kp in models:
|
||||
kp.db_id = kp_model.id
|
||||
kp.position = kp_model.position
|
||||
|
||||
async def _add_action_items(self, summary_id: int, action_items: Sequence[ActionItem]) -> None:
|
||||
"""Add action items to a summary.
|
||||
@@ -55,14 +57,17 @@ class SqlAlchemySummaryRepository(BaseRepository):
|
||||
action_items: Action items to add. Their db_id fields are updated in place.
|
||||
"""
|
||||
models: list[tuple[ActionItemModel, ActionItem]] = []
|
||||
for ai in action_items:
|
||||
for position, ai in enumerate(action_items):
|
||||
ai_model = ActionItemModel(
|
||||
summary_id=summary_id,
|
||||
position=position,
|
||||
text=ai.text,
|
||||
assignee=ai.assignee,
|
||||
due_date=ai.due_date,
|
||||
priority=ai.priority,
|
||||
segment_ids=ai.segment_ids,
|
||||
start_time=ai.start_time,
|
||||
end_time=ai.end_time,
|
||||
)
|
||||
self._session.add(ai_model)
|
||||
models.append((ai_model, ai))
|
||||
@@ -70,6 +75,7 @@ class SqlAlchemySummaryRepository(BaseRepository):
|
||||
await self._session.flush()
|
||||
for ai_model, ai in models:
|
||||
ai.db_id = ai_model.id
|
||||
ai.position = ai_model.position
|
||||
|
||||
async def save(self, summary: Summary) -> Summary:
|
||||
"""Save or update a meeting summary.
|
||||
@@ -88,7 +94,11 @@ class SqlAlchemySummaryRepository(BaseRepository):
|
||||
existing.executive_summary = summary.executive_summary
|
||||
if summary.generated_at is not None:
|
||||
existing.generated_at = summary.generated_at
|
||||
existing.model_version = summary.model_version
|
||||
existing.provider_name = summary.provider_name
|
||||
existing.model_name = summary.model_name
|
||||
existing.tokens_used = summary.tokens_used
|
||||
existing.latency_ms = summary.latency_ms
|
||||
existing.verification = summary.verification
|
||||
|
||||
# Delete old key points and action items
|
||||
await self._session.execute(
|
||||
@@ -108,7 +118,11 @@ class SqlAlchemySummaryRepository(BaseRepository):
|
||||
meeting_id=UUID(str(summary.meeting_id)),
|
||||
executive_summary=summary.executive_summary,
|
||||
generated_at=summary.generated_at,
|
||||
model_version=summary.model_version,
|
||||
provider_name=summary.provider_name,
|
||||
model_name=summary.model_name,
|
||||
tokens_used=summary.tokens_used,
|
||||
latency_ms=summary.latency_ms,
|
||||
verification=summary.verification,
|
||||
)
|
||||
self._session.add(model)
|
||||
await self._session.flush()
|
||||
|
||||
@@ -119,6 +119,10 @@ class SegmentCitationVerifier:
|
||||
key_points=filtered_key_points,
|
||||
action_items=filtered_action_items,
|
||||
generated_at=summary.generated_at,
|
||||
model_version=summary.model_version,
|
||||
provider_name=summary.provider_name,
|
||||
model_name=summary.model_name,
|
||||
tokens_used=summary.tokens_used,
|
||||
latency_ms=summary.latency_ms,
|
||||
verification=summary.verification,
|
||||
db_id=summary.db_id,
|
||||
)
|
||||
|
||||
@@ -157,7 +157,8 @@ class CloudSummarizer:
|
||||
key_points=[],
|
||||
action_items=[],
|
||||
generated_at=datetime.now(UTC),
|
||||
model_version=self._model,
|
||||
provider_name=self.provider_name,
|
||||
model_name=self._model,
|
||||
),
|
||||
model_name=self._model,
|
||||
provider_name=self.provider_name,
|
||||
@@ -173,14 +174,15 @@ class CloudSummarizer:
|
||||
content, tokens_used = await asyncio.to_thread(self._call_anthropic, user_prompt)
|
||||
|
||||
# Parse into Summary
|
||||
summary = parse_llm_response(content, request)
|
||||
parsed = parse_llm_response(content, request)
|
||||
summary = Summary(
|
||||
meeting_id=summary.meeting_id,
|
||||
executive_summary=summary.executive_summary,
|
||||
key_points=summary.key_points,
|
||||
action_items=summary.action_items,
|
||||
generated_at=summary.generated_at,
|
||||
model_version=self._model,
|
||||
meeting_id=parsed.meeting_id,
|
||||
executive_summary=parsed.executive_summary,
|
||||
key_points=parsed.key_points,
|
||||
action_items=parsed.action_items,
|
||||
generated_at=parsed.generated_at,
|
||||
provider_name=self.provider_name,
|
||||
model_name=self._model,
|
||||
)
|
||||
|
||||
elapsed_ms = (time.monotonic() - start) * 1000
|
||||
|
||||
@@ -97,7 +97,8 @@ class MockSummarizer:
|
||||
key_points=key_points,
|
||||
action_items=action_items,
|
||||
generated_at=datetime.now(UTC),
|
||||
model_version="mock-1.0",
|
||||
provider_name=self.provider_name,
|
||||
model_name="mock-1.0",
|
||||
)
|
||||
|
||||
elapsed = (time.monotonic() - start) * 1000 + self._latency_ms
|
||||
|
||||
@@ -116,7 +116,8 @@ class OllamaSummarizer:
|
||||
key_points=[],
|
||||
action_items=[],
|
||||
generated_at=datetime.now(UTC),
|
||||
model_version=self._model,
|
||||
provider_name=self.provider_name,
|
||||
model_name=self._model,
|
||||
),
|
||||
model_name=self._model,
|
||||
provider_name=self.provider_name,
|
||||
@@ -157,14 +158,15 @@ class OllamaSummarizer:
|
||||
raise InvalidResponseError("Empty response from Ollama")
|
||||
|
||||
# Parse into Summary
|
||||
summary = parse_llm_response(content, request)
|
||||
parsed = parse_llm_response(content, request)
|
||||
summary = Summary(
|
||||
meeting_id=summary.meeting_id,
|
||||
executive_summary=summary.executive_summary,
|
||||
key_points=summary.key_points,
|
||||
action_items=summary.action_items,
|
||||
generated_at=summary.generated_at,
|
||||
model_version=self._model,
|
||||
meeting_id=parsed.meeting_id,
|
||||
executive_summary=parsed.executive_summary,
|
||||
key_points=parsed.key_points,
|
||||
action_items=parsed.action_items,
|
||||
generated_at=parsed.generated_at,
|
||||
provider_name=self.provider_name,
|
||||
model_name=self._model,
|
||||
)
|
||||
|
||||
elapsed_ms = (time.monotonic() - start) * 1000
|
||||
|
||||
@@ -333,7 +333,8 @@ class TestMeetingServiceSummaries:
|
||||
meeting_id=meeting_id,
|
||||
executive_summary="Test summary",
|
||||
generated_at=datetime.now(UTC),
|
||||
model_version="test-v1",
|
||||
provider_name="test",
|
||||
model_name="v1",
|
||||
)
|
||||
mock_uow.summaries.save = AsyncMock(return_value=summary)
|
||||
|
||||
@@ -341,7 +342,8 @@ class TestMeetingServiceSummaries:
|
||||
result = await service.save_summary(
|
||||
meeting_id=meeting_id,
|
||||
executive_summary="Test summary",
|
||||
model_version="test-v1",
|
||||
provider_name="test",
|
||||
model_name="v1",
|
||||
)
|
||||
|
||||
assert result.executive_summary == "Test summary"
|
||||
|
||||
@@ -36,7 +36,7 @@ async def test_generate_summary_uses_placeholder_when_service_missing() -> None:
|
||||
)
|
||||
|
||||
assert response.executive_summary != ""
|
||||
assert response.model_version == "placeholder-v0"
|
||||
assert response.model_version == "placeholder/v0"
|
||||
retrieved_meeting = store.get(str(meeting.id))
|
||||
assert retrieved_meeting is not None, "Meeting should exist after creation"
|
||||
assert retrieved_meeting.summary is not None
|
||||
@@ -70,4 +70,4 @@ async def test_generate_summary_falls_back_when_provider_unavailable() -> None:
|
||||
)
|
||||
|
||||
assert response.executive_summary != ""
|
||||
assert response.model_version == "placeholder-v0"
|
||||
assert response.model_version == "placeholder/v0"
|
||||
|
||||
@@ -231,7 +231,7 @@ class TestFilterInvalidCitations:
|
||||
("key_points[0].start_time", 1.0),
|
||||
("action_items[0].assignee", "Alice"),
|
||||
("action_items[0].priority", 2),
|
||||
("model_version", "test-1.0"),
|
||||
("model_version", "test/1.0"),
|
||||
],
|
||||
)
|
||||
def test_filter_preserves_other_fields(
|
||||
@@ -244,7 +244,8 @@ class TestFilterInvalidCitations:
|
||||
executive_summary="Important meeting",
|
||||
key_points=[KeyPoint(text="Key point", segment_ids=[0], start_time=1.0, end_time=2.0)],
|
||||
action_items=[ActionItem(text="Action", segment_ids=[0], assignee="Alice", priority=2)],
|
||||
model_version="test-1.0",
|
||||
provider_name="test",
|
||||
model_name="1.0",
|
||||
)
|
||||
filtered = verifier.filter_invalid_citations(summary, segments)
|
||||
# Navigate the attribute path
|
||||
|
||||
@@ -100,13 +100,14 @@ class TestOrmConverterToOrmKwargs:
|
||||
probability=0.9,
|
||||
)
|
||||
|
||||
result = OrmConverter.word_timing_to_orm_kwargs(word)
|
||||
result = OrmConverter.word_timing_to_orm_kwargs(word, word_index=0)
|
||||
|
||||
assert result == {
|
||||
"word": "test",
|
||||
"start_time": 1.5,
|
||||
"end_time": 2.0,
|
||||
"probability": 0.9,
|
||||
"word_index": 0,
|
||||
}
|
||||
|
||||
def test_preserves_precision(self) -> None:
|
||||
@@ -118,8 +119,9 @@ class TestOrmConverterToOrmKwargs:
|
||||
probability=0.111111,
|
||||
)
|
||||
|
||||
result = OrmConverter.word_timing_to_orm_kwargs(word)
|
||||
result = OrmConverter.word_timing_to_orm_kwargs(word, word_index=5)
|
||||
|
||||
assert result["start_time"] == 0.123456789
|
||||
assert result["end_time"] == 0.987654321
|
||||
assert result["probability"] == 0.111111
|
||||
assert result["word_index"] == 5
|
||||
|
||||
@@ -75,7 +75,8 @@ class TestSummarizationGeneration:
|
||||
action_items=[
|
||||
ActionItem(text="Action 1", assignee="Alice"),
|
||||
],
|
||||
model_version="test-model-v1",
|
||||
provider_name="test-model",
|
||||
model_name="v1",
|
||||
)
|
||||
|
||||
mock_service = MagicMock()
|
||||
@@ -207,7 +208,7 @@ class TestSummarizationGeneration:
|
||||
result = await servicer.GenerateSummary(request, MockContext())
|
||||
|
||||
assert "Segment 0" in result.executive_summary or "Segment 1" in result.executive_summary
|
||||
assert result.model_version == "placeholder-v0"
|
||||
assert result.model_version == "placeholder/v0"
|
||||
|
||||
async def test_generate_summary_placeholder_on_service_error(
|
||||
self, session_factory: async_sessionmaker[AsyncSession]
|
||||
@@ -242,7 +243,7 @@ class TestSummarizationGeneration:
|
||||
result = await servicer.GenerateSummary(request, MockContext())
|
||||
|
||||
assert "Content that should appear" in result.executive_summary
|
||||
assert result.model_version == "placeholder-v0"
|
||||
assert result.model_version == "placeholder/v0"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
|
||||
@@ -321,7 +321,8 @@ class TestSummaryRepository:
|
||||
meeting_id=meeting.id,
|
||||
executive_summary="This was a productive meeting.",
|
||||
generated_at=datetime.now(UTC),
|
||||
model_version="test-v1",
|
||||
provider_name="test",
|
||||
model_name="v1",
|
||||
)
|
||||
await summary_repo.save(summary)
|
||||
await session.commit()
|
||||
@@ -330,7 +331,7 @@ class TestSummaryRepository:
|
||||
|
||||
assert result is not None
|
||||
assert result.executive_summary == "This was a productive meeting."
|
||||
assert result.model_version == "test-v1"
|
||||
assert result.model_version == "test/v1"
|
||||
|
||||
async def test_save_summary_with_key_points(self, session: AsyncSession) -> None:
|
||||
"""Test saving summary with key points."""
|
||||
|
||||
Reference in New Issue
Block a user