chore: update client submodule to latest commit and add post-processing documentation

- Updated the client submodule to the latest commit for improved features and bug fixes.
- Added detailed documentation for the post-processing pipeline, including implementation checklists, testing strategies, and identified gaps in the current workflow.
- Introduced new files for tracking the implementation of automatic post-processing features, enhancing user experience and system efficiency.
This commit is contained in:
2026-01-04 20:46:08 -05:00
parent 1fa25d1311
commit 135796bdb9
6 changed files with 2767 additions and 2 deletions

View File

@@ -0,0 +1,807 @@
# Implementation Checklist: SPRINT-GAP-011
Step-by-step implementation guide for the post-processing pipeline.
## Pre-Implementation
- [ ] Read and understand the full README.md
- [ ] Review existing patterns in GAP-003 and GAP-004
- [ ] Set up test environment with database running
---
## Phase 1: Quick Wins (Estimated: 1 hour)
### 1.1 Enable Auto-Extract in MeetingDetail
**File**: `client/src/pages/MeetingDetail.tsx`
```typescript
// Find this block (around line 72-76):
const { extract: extractEntities } = useEntityExtraction({
meetingId: id,
meetingTitle: meeting?.title,
meetingState: meeting?.state,
});
// Change to:
const { extract: extractEntities } = useEntityExtraction({
meetingId: id,
meetingTitle: meeting?.title,
meetingState: meeting?.state,
autoExtract: true, // ADD THIS LINE
});
```
**Verify**:
- [ ] Stop a recording
- [ ] Navigate to meeting detail
- [ ] Entity extraction should start automatically (check network tab)
### 1.2 Add Summary Progress Listener
**File**: `client/src/pages/MeetingDetail.tsx`
Add imports:
```typescript
import { listen } from '@tauri-apps/api/event';
```
Add state and effect:
```typescript
const [summaryProgress, setSummaryProgress] = useState<number | null>(null);
useEffect(() => {
let unlisten: (() => void) | undefined;
const setupListener = async () => {
unlisten = await listen<{ meeting_id: string; elapsed_secs: number }>(
'summary_progress',
(event) => {
if (event.payload.meeting_id === id) {
setSummaryProgress(event.payload.elapsed_secs);
}
}
);
};
setupListener();
return () => {
unlisten?.();
};
}, [id]);
```
**Verify**:
- [ ] Click "Generate Summary" button
- [ ] Console shows progress events (or add UI indicator)
---
## Phase 2: Processing Status Entity (Estimated: 2 hours)
### 2.1 Add ProcessingStatus to Domain
**File**: `src/noteflow/domain/entities/meeting.py`
Add dataclass:
```python
from dataclasses import dataclass, field
@dataclass
class ProcessingStatus:
"""Tracks which post-processing steps have been completed."""
summary_generated: bool = False
summary_generated_at: datetime | None = None
entities_extracted: bool = False
entities_extracted_at: datetime | None = None
diarization_refined: bool = False
diarization_refined_at: datetime | None = None
```
Update Meeting class:
```python
@dataclass
class Meeting:
# ... existing fields ...
processing_status: ProcessingStatus = field(default_factory=ProcessingStatus)
```
### 2.2 Add ProcessingStatus to Proto
**File**: `src/noteflow/grpc/proto/noteflow.proto`
Add message:
```protobuf
message ProcessingStatus {
bool summary_generated = 1;
google.protobuf.Timestamp summary_generated_at = 2;
bool entities_extracted = 3;
google.protobuf.Timestamp entities_extracted_at = 4;
bool diarization_refined = 5;
google.protobuf.Timestamp diarization_refined_at = 6;
}
```
Update Meeting message:
```protobuf
message Meeting {
// ... existing fields ...
ProcessingStatus processing_status = 20;
}
```
### 2.3 Regenerate Proto Stubs
```bash
python -m grpc_tools.protoc -I src/noteflow/grpc/proto \
--python_out=src/noteflow/grpc/proto \
--grpc_python_out=src/noteflow/grpc/proto \
src/noteflow/grpc/proto/noteflow.proto
```
### 2.4 Update ORM Model
**File**: `src/noteflow/infrastructure/persistence/models/meeting.py`
Add columns:
```python
summary_generated = Column(Boolean, default=False, nullable=False)
summary_generated_at = Column(DateTime(timezone=True), nullable=True)
entities_extracted = Column(Boolean, default=False, nullable=False)
entities_extracted_at = Column(DateTime(timezone=True), nullable=True)
diarization_refined = Column(Boolean, default=False, nullable=False)
diarization_refined_at = Column(DateTime(timezone=True), nullable=True)
```
### 2.5 Create Migration
```bash
alembic revision --autogenerate -m "add_processing_status_to_meeting"
alembic upgrade head
```
### 2.6 Update Converters
**File**: `src/noteflow/infrastructure/converters/orm_converters.py`
Update `meeting_from_orm`:
```python
processing_status=ProcessingStatus(
summary_generated=model.summary_generated,
summary_generated_at=model.summary_generated_at,
entities_extracted=model.entities_extracted,
entities_extracted_at=model.entities_extracted_at,
diarization_refined=model.diarization_refined,
diarization_refined_at=model.diarization_refined_at,
)
```
**File**: `src/noteflow/grpc/_mixins/converters.py`
Update `meeting_to_proto`:
```python
processing_status=proto.ProcessingStatus(
summary_generated=meeting.processing_status.summary_generated,
# ... etc
)
```
### 2.7 Update Processing Mixins to Set Flags
**File**: `src/noteflow/grpc/_mixins/summarization.py`
After saving summary:
```python
meeting.processing_status.summary_generated = True
meeting.processing_status.summary_generated_at = utc_now()
await repo.meetings.update(meeting)
await repo.commit()
```
**File**: `src/noteflow/grpc/_mixins/entities.py`
After saving entities:
```python
meeting.processing_status.entities_extracted = True
meeting.processing_status.entities_extracted_at = utc_now()
await repo.meetings.update(meeting)
await repo.commit()
```
**File**: `src/noteflow/grpc/_mixins/diarization/_jobs.py`
After diarization completes:
```python
meeting.processing_status.diarization_refined = True
meeting.processing_status.diarization_refined_at = utc_now()
await repo.meetings.update(meeting)
await repo.commit()
```
**Verify**:
- [ ] Generate summary → `summary_generated` is True
- [ ] Extract entities → `entities_extracted` is True
- [ ] Refine diarization → `diarization_refined` is True
---
## Phase 3: Orchestration Hook (Estimated: 4 hours)
### 3.1 Create usePostProcessing Hook
**File**: `client/src/hooks/use-post-processing.ts`
```typescript
import { useCallback, useEffect, useState } from 'react';
import { listen } from '@tauri-apps/api/event';
import { useAPI } from '@/contexts/connection-context';
import { MeetingState } from '@/types';
export type ProcessingStepName = 'summary' | 'entities' | 'diarization';
export type ProcessingStepStatus =
| 'pending'
| 'running'
| 'completed'
| 'failed'
| 'skipped';
export interface ProcessingStep {
name: ProcessingStepName;
status: ProcessingStepStatus;
progress?: number;
error?: string;
startedAt?: Date;
completedAt?: Date;
}
export interface UsePostProcessingOptions {
meetingId: string;
meetingState?: MeetingState;
processingStatus?: {
summary_generated: boolean;
entities_extracted: boolean;
diarization_refined: boolean;
};
autoStart?: boolean;
onComplete?: () => void;
onStepComplete?: (step: ProcessingStepName) => void;
onStepFailed?: (step: ProcessingStepName, error: string) => void;
}
export interface UsePostProcessingResult {
steps: ProcessingStep[];
startProcessing: () => Promise<void>;
isProcessing: boolean;
isComplete: boolean;
hasFailures: boolean;
}
export function usePostProcessing(
options: UsePostProcessingOptions
): UsePostProcessingResult {
const {
meetingId,
meetingState,
processingStatus,
autoStart = false,
onComplete,
onStepComplete,
onStepFailed,
} = options;
const api = useAPI();
const [steps, setSteps] = useState<ProcessingStep[]>(() => [
{
name: 'summary',
status: processingStatus?.summary_generated ? 'completed' : 'pending',
},
{
name: 'entities',
status: processingStatus?.entities_extracted ? 'completed' : 'pending',
},
{
name: 'diarization',
status: processingStatus?.diarization_refined ? 'completed' : 'pending',
},
]);
const [isProcessing, setIsProcessing] = useState(false);
const updateStep = useCallback(
(name: ProcessingStepName, update: Partial<ProcessingStep>) => {
setSteps((prev) =>
prev.map((step) =>
step.name === name ? { ...step, ...update } : step
)
);
},
[]
);
// Summary processing
const runSummary = useCallback(async () => {
if (processingStatus?.summary_generated) {
updateStep('summary', { status: 'skipped' });
return;
}
updateStep('summary', { status: 'running', startedAt: new Date() });
try {
await api.generateSummary(meetingId, false);
updateStep('summary', { status: 'completed', completedAt: new Date() });
onStepComplete?.('summary');
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
updateStep('summary', { status: 'failed', error: errorMessage });
onStepFailed?.('summary', errorMessage);
}
}, [api, meetingId, processingStatus, updateStep, onStepComplete, onStepFailed]);
// Entity extraction
const runEntities = useCallback(async () => {
if (processingStatus?.entities_extracted) {
updateStep('entities', { status: 'skipped' });
return;
}
updateStep('entities', { status: 'running', startedAt: new Date() });
try {
await api.extractEntities(meetingId, false);
updateStep('entities', { status: 'completed', completedAt: new Date() });
onStepComplete?.('entities');
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
updateStep('entities', { status: 'failed', error: errorMessage });
onStepFailed?.('entities', errorMessage);
}
}, [api, meetingId, processingStatus, updateStep, onStepComplete, onStepFailed]);
// Diarization refinement
const runDiarization = useCallback(async () => {
if (processingStatus?.diarization_refined) {
updateStep('diarization', { status: 'skipped' });
return;
}
updateStep('diarization', { status: 'running', startedAt: new Date() });
try {
const { jobId } = await api.refineSpeakers(meetingId);
// Poll for completion
const pollInterval = setInterval(async () => {
try {
const status = await api.getDiarizationJobStatus(jobId);
if (status.status === 'completed') {
clearInterval(pollInterval);
updateStep('diarization', {
status: 'completed',
completedAt: new Date(),
});
onStepComplete?.('diarization');
} else if (status.status === 'failed') {
clearInterval(pollInterval);
updateStep('diarization', {
status: 'failed',
error: status.error || 'Diarization failed',
});
onStepFailed?.('diarization', status.error || 'Diarization failed');
} else {
// Update progress if available
if (status.progress !== undefined) {
updateStep('diarization', { progress: status.progress });
}
}
} catch (pollError) {
clearInterval(pollInterval);
const errorMessage =
pollError instanceof Error ? pollError.message : 'Poll failed';
updateStep('diarization', { status: 'failed', error: errorMessage });
onStepFailed?.('diarization', errorMessage);
}
}, 2000);
// Set up timeout
setTimeout(() => {
clearInterval(pollInterval);
}, 300000); // 5 minute max
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
updateStep('diarization', { status: 'failed', error: errorMessage });
onStepFailed?.('diarization', errorMessage);
}
}, [api, meetingId, processingStatus, updateStep, onStepComplete, onStepFailed]);
// Main orchestration function
const startProcessing = useCallback(async () => {
if (isProcessing) return;
setIsProcessing(true);
try {
// Run summary and entities in parallel, diarization separately
await Promise.allSettled([
runSummary(),
runEntities(),
runDiarization(),
]);
} finally {
setIsProcessing(false);
}
}, [isProcessing, runSummary, runEntities, runDiarization]);
// Auto-start when meeting becomes completed
useEffect(() => {
if (autoStart && meetingState === 'completed' && !isProcessing) {
const allPending = steps.every((s) => s.status === 'pending');
if (allPending) {
startProcessing();
}
}
}, [autoStart, meetingState, isProcessing, steps, startProcessing]);
// Listen for summary progress events
useEffect(() => {
let unlisten: (() => void) | undefined;
const setup = async () => {
unlisten = await listen<{ meeting_id: string; elapsed_secs: number }>(
'summary_progress',
(event) => {
if (event.payload.meeting_id === meetingId) {
// Estimate progress (assume 60 seconds typical)
const progress = Math.min(
(event.payload.elapsed_secs / 60) * 100,
99
);
updateStep('summary', { progress });
}
}
);
};
setup();
return () => {
unlisten?.();
};
}, [meetingId, updateStep]);
// Check completion and notify
useEffect(() => {
const allDone = steps.every(
(s) =>
s.status === 'completed' ||
s.status === 'failed' ||
s.status === 'skipped'
);
if (allDone && !isProcessing) {
onComplete?.();
}
}, [steps, isProcessing, onComplete]);
const isComplete = steps.every(
(s) =>
s.status === 'completed' || s.status === 'failed' || s.status === 'skipped'
);
const hasFailures = steps.some((s) => s.status === 'failed');
return {
steps,
startProcessing,
isProcessing,
isComplete,
hasFailures,
};
}
```
### 3.2 Create ProcessingStatus Component
**File**: `client/src/components/meeting/processing-status.tsx`
```typescript
import { Check, Loader2, X, SkipForward } from 'lucide-react';
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
import { Progress } from '@/components/ui/progress';
import { cn } from '@/lib/utils';
import type { ProcessingStep } from '@/hooks/use-post-processing';
const stepLabels: Record<string, string> = {
summary: 'Generate Summary',
entities: 'Extract Entities',
diarization: 'Refine Speakers',
};
function StatusIcon({ status }: { status: ProcessingStep['status'] }) {
switch (status) {
case 'completed':
return <Check className="h-4 w-4 text-green-500" />;
case 'failed':
return <X className="h-4 w-4 text-red-500" />;
case 'running':
return <Loader2 className="h-4 w-4 text-blue-500 animate-spin" />;
case 'skipped':
return <SkipForward className="h-4 w-4 text-gray-400" />;
default:
return <div className="h-4 w-4 rounded-full border-2 border-gray-300" />;
}
}
interface ProcessingStatusProps {
steps: ProcessingStep[];
className?: string;
}
export function ProcessingStatus({ steps, className }: ProcessingStatusProps) {
const allComplete = steps.every(
(s) =>
s.status === 'completed' || s.status === 'failed' || s.status === 'skipped'
);
if (allComplete) {
return null; // Hide when complete
}
return (
<Card className={cn('', className)}>
<CardHeader className="pb-2">
<CardTitle className="text-sm font-medium">Processing Meeting</CardTitle>
</CardHeader>
<CardContent className="space-y-3">
{steps.map((step) => (
<div key={step.name} className="space-y-1">
<div className="flex items-center gap-2">
<StatusIcon status={step.status} />
<span
className={cn(
'text-sm',
step.status === 'skipped' && 'text-gray-400',
step.status === 'failed' && 'text-red-500'
)}
>
{stepLabels[step.name]}
</span>
</div>
{step.status === 'running' && step.progress !== undefined && (
<Progress value={step.progress} className="h-1" />
)}
{step.status === 'failed' && step.error && (
<p className="text-xs text-red-500 ml-6">{step.error}</p>
)}
</div>
))}
</CardContent>
</Card>
);
}
```
### 3.3 Wire into MeetingDetail
**File**: `client/src/pages/MeetingDetail.tsx`
Add import:
```typescript
import { usePostProcessing } from '@/hooks/use-post-processing';
import { ProcessingStatus } from '@/components/meeting/processing-status';
```
Add hook usage:
```typescript
const { steps, isProcessing, isComplete } = usePostProcessing({
meetingId: id,
meetingState: meeting?.state,
processingStatus: meeting?.processing_status,
autoStart: true,
onComplete: () => {
// Refresh meeting data to get updated content
loadMeeting();
},
});
```
Add component to JSX:
```tsx
{!isComplete && (
<ProcessingStatus steps={steps} className="mb-4" />
)}
```
**Verify**:
- [ ] Stop a recording
- [ ] Navigate to meeting detail
- [ ] ProcessingStatus component shows progress
- [ ] All three steps complete
- [ ] Meeting data refreshes with summary/entities
---
## Phase 4: Webhook Events (Estimated: 2 hours)
### 4.1 Add New Event Types
**File**: `src/noteflow/domain/webhooks/events.py`
```python
class WebhookEventType(str, Enum):
RECORDING_STARTED = "recording.started"
RECORDING_STOPPED = "recording.stopped"
MEETING_COMPLETED = "meeting.completed"
SUMMARY_GENERATED = "summary.generated"
ENTITIES_EXTRACTED = "entities.extracted" # ADD
DIARIZATION_COMPLETED = "diarization.completed" # ADD
```
### 4.2 Add Payload Dataclasses
**File**: `src/noteflow/domain/webhooks/events.py`
```python
@dataclass
class EntitiesExtractedPayload:
meeting_id: str
entity_count: int
categories: dict[str, int] # e.g., {"person": 5, "company": 3}
@dataclass
class DiarizationCompletedPayload:
meeting_id: str
job_id: str
speaker_count: int
segments_updated: int
```
### 4.3 Add Service Methods
**File**: `src/noteflow/application/services/webhook_service.py`
```python
async def trigger_entities_extracted(
self,
meeting_id: str,
entities: list[NamedEntity],
) -> None:
"""Fire webhook for entities.extracted event."""
categories: dict[str, int] = {}
for entity in entities:
categories[entity.category] = categories.get(entity.category, 0) + 1
payload = EntitiesExtractedPayload(
meeting_id=meeting_id,
entity_count=len(entities),
categories=categories,
)
await self._dispatch(WebhookEventType.ENTITIES_EXTRACTED, payload)
async def trigger_diarization_completed(
self,
meeting_id: str,
job_id: str,
speaker_count: int,
segments_updated: int,
) -> None:
"""Fire webhook for diarization.completed event."""
payload = DiarizationCompletedPayload(
meeting_id=meeting_id,
job_id=job_id,
speaker_count=speaker_count,
segments_updated=segments_updated,
)
await self._dispatch(WebhookEventType.DIARIZATION_COMPLETED, payload)
```
### 4.4 Fire Events in Mixins
**File**: `src/noteflow/grpc/_mixins/entities.py`
After saving entities:
```python
await self._fire_webhooks_safely(
lambda: self._webhook_service.trigger_entities_extracted(
meeting_id=meeting_id,
entities=entities,
)
)
```
**File**: `src/noteflow/grpc/_mixins/diarization/_jobs.py`
After diarization completes:
```python
await self._fire_webhooks_safely(
lambda: self._webhook_service.trigger_diarization_completed(
meeting_id=meeting_id,
job_id=job.id,
speaker_count=len(speakers),
segments_updated=segments_updated_count,
)
)
```
---
## Phase 5: Error Surfacing (Estimated: 1 hour)
### 5.1 Add ASR Logging
**File**: `src/noteflow/grpc/_mixins/streaming/_asr.py`
```python
async def process_audio_segment(
host: "ServicerHost",
audio: np.ndarray,
meeting_id: str,
# ...
) -> AsyncIterator[TranscriptUpdate]:
if audio.size == 0:
logger.warning(
"Empty audio segment received",
meeting_id=meeting_id,
extra={"meeting_id": meeting_id},
)
return
if not host.asr_engine:
logger.error(
"ASR engine not available",
meeting_id=meeting_id,
extra={"meeting_id": meeting_id},
)
# Consider emitting error event to client
return
# ... rest of processing
```
---
## Final Verification
### Integration Test
1. [ ] Start fresh recording
2. [ ] Record for 30+ seconds with speech
3. [ ] Stop recording
4. [ ] Verify ProcessingStatus component appears
5. [ ] Verify summary generation starts automatically
6. [ ] Verify entity extraction runs
7. [ ] Verify diarization refinement runs
8. [ ] Verify all steps complete
9. [ ] Verify meeting detail shows summary, entities, speaker labels
10. [ ] Verify webhooks fire (if configured)
### Regression Test
1. [ ] Manual "Generate Summary" button still works
2. [ ] Manual "Extract Entities" button still works
3. [ ] Manual "Refine Speakers" button still works
4. [ ] Existing meetings without processing_status still load
5. [ ] Force regenerate still works
---
## Rollback Instructions
If issues are found:
1. **Disable auto-start**: Set `autoStart: false` in MeetingDetail.tsx
2. **Hide ProcessingStatus**: Remove component from JSX
3. **Revert entity auto-extract**: Remove `autoExtract: true`
The backend changes (processing status fields) are backward compatible and don't need rollback.

View File

@@ -0,0 +1,770 @@
# SPRINT-GAP-011: Post-Processing Pipeline Gaps
| Attribute | Value |
|-----------|-------|
| **Sprint** | GAP-011 |
| **Size** | L (Large) |
| **Owner** | TBD |
| **Phase** | Hardening |
| **Prerequisites** | GAP-003 (Error Handling), GAP-004 (Diarization Lifecycle) |
## Executive Summary
After a meeting recording completes, the system fails to automatically trigger post-processing workflows (summarization, entity extraction, diarization refinement). Users see only raw recordings without transcriptions, summaries, or extracted intelligence. The architecture has all the components but lacks orchestration to connect them.
## Open Issues
- [ ] Define post-processing trigger strategy (server-side vs client-side orchestration)
- [ ] Determine parallel vs sequential execution of post-processing steps
- [ ] Decide on failure handling for individual processing steps
- [ ] Define retry policy for failed processing
- [ ] Establish processing completion signals
## Validation Status
| Component | Exists | Status |
|-----------|--------|--------|
| Streaming transcription | Yes | Working |
| GenerateSummary RPC | Yes | Manual trigger only |
| ExtractEntities RPC | Yes | Manual trigger only |
| RefineSpeakerDiarization RPC | Yes | Manual trigger only |
| Auto-trigger on meeting stop | No | **Gap - needs implementation** |
| Processing completion signals | No | **Gap - needs implementation** |
| Progress tracking UI | Partial | Summary has events, others missing |
| Client-side orchestration | No | **Gap - needs implementation** |
## Objective
Implement automatic post-processing orchestration that triggers summarization, entity extraction, and diarization refinement after a meeting recording stops, with proper progress tracking, error handling, and completion signals.
## Key Decisions
| Decision | Choice | Rationale |
|----------|--------|-----------|
| Orchestration location | Client-side | Server should remain stateless; client knows user preferences |
| Processing order | Parallel where possible | Summarization and NER can run concurrently; diarization can start immediately |
| Failure handling | Continue on failure | One failed step shouldn't block others |
| Retry policy | User-initiated | Auto-retry risks resource exhaustion |
| Completion tracking | Per-step status | Enable partial success states |
## What Already Exists
### Backend Infrastructure
#### Summarization Service (`src/noteflow/grpc/_mixins/summarization.py`)
- `GenerateSummary` RPC fully functional
- Returns cached summary if exists (unless `force_regenerate=True`)
- Fires `SUMMARY_GENERATED` webhook on completion
- Supports multiple providers (Cloud, Ollama, Mock)
#### Entity Extraction (`src/noteflow/grpc/_mixins/entities.py`)
- `ExtractEntities` RPC fully functional
- Feature flag gated (`NOTEFLOW_FEATURE_NER_ENABLED`)
- Returns cached entities if exist (unless `force_refresh=True`)
- Uses spaCy NER engine
#### Diarization Refinement (`src/noteflow/grpc/_mixins/diarization/_mixin.py`)
- `RefineSpeakerDiarization` RPC launches background job
- `GetDiarizationJobStatus` for polling
- Persisted to database for recovery
#### Webhook Events (`src/noteflow/domain/webhooks/events.py`)
```python
class WebhookEventType(str, Enum):
RECORDING_STARTED = "recording.started"
RECORDING_STOPPED = "recording.stopped"
MEETING_COMPLETED = "meeting.completed"
SUMMARY_GENERATED = "summary.generated"
# Missing: ENTITIES_EXTRACTED, DIARIZATION_COMPLETED
```
### Client Infrastructure
#### Tauri Adapter (`client/src/api/tauri-adapter.ts`)
```typescript
// All RPCs available but require manual invocation
generateSummary(meetingId: string, forceRegenerate?: boolean): Promise<Summary>
extractEntities(meetingId: string, forceRefresh?: boolean): Promise<ExtractedEntity[]>
refineSpeakers(meetingId: string, numSpeakers?: number): Promise<{ jobId: string }>
getDiarizationJobStatus(jobId: string): Promise<DiarizationJobStatus>
```
#### Diarization Hook (`client/src/hooks/use-diarization.ts`)
- Polling pattern with exponential backoff
- Max duration aligned to server timeout
- Auto-recovery on mount
- Progress event handling
#### Entity Extraction Hook (`client/src/hooks/use-entity-extraction.ts`)
```typescript
// Auto-extract feature EXISTS but is DISABLED
useEffect(() => {
if (autoExtract && meetingId && meetingState === 'completed') {
extract(false); // This would work!
}
}, [autoExtract, meetingId, meetingState, extract]);
```
#### Summary Progress Events (`client/src-tauri/src/commands/summary.rs`)
```rust
// Server emits progress events every second
emit_summary_progress(app.clone(), meeting_id.clone(), elapsed_s);
// But no React component listens for these
```
## Identified Issues
### Gap 1: No Automatic Post-Meeting Workflows (Critical)
**Severity**: Critical
**Impact**: Core feature missing - users get no processed output
**Location**: `src/noteflow/grpc/_mixins/meeting.py:246-285`
```python
async def StopMeeting(self, request, context):
# ... state transition logic ...
# Webhooks fire, but no processing triggers
await self._fire_webhooks_safely(
lambda: self._webhook_service.trigger_recording_stopped(...)
)
await self._fire_webhooks_safely(
lambda: self._webhook_service.trigger_meeting_completed(...)
)
# GAP: No call to:
# - GenerateSummary
# - ExtractEntities
# - RefineSpeakerDiarization
return response
```
**Problem**: After `StopMeeting()` completes:
- Webhooks fire for external integrations
- But no internal processing is triggered
- All processing requires explicit client RPC calls
### Gap 2: Client Immediately Navigates Away (Critical)
**Severity**: Critical
**Impact**: No opportunity for client-side orchestration
**Location**: `client/src/pages/Recording.tsx:313-346`
```typescript
const stopRecording = async () => {
setIsRecording(false);
streamRef.current?.close();
const stoppedMeeting = await api.stopMeeting(meeting.id);
setMeeting(stoppedMeeting);
// GAP: Immediately navigates away - no processing triggered
navigate(
projectId
? `/projects/${projectId}/meetings/${meeting.id}`
: '/projects'
);
};
```
**Problem**:
- Stop button triggers navigation immediately
- No post-processing orchestration before navigation
- User lands on detail page with empty summary/entities
### Gap 3: MeetingDetail Only Fetches, Doesn't Process (Critical)
**Severity**: Critical
**Impact**: Viewing a meeting doesn't trigger missing processing
**Location**: `client/src/pages/MeetingDetail.tsx:79-98`
```typescript
useEffect(() => {
const loadMeeting = async () => {
const data = await getAPI().getMeeting({
meeting_id: id,
include_segments: true,
include_summary: true, // Fetches existing, doesn't generate
});
setMeeting(data.meeting);
setSegments(data.segments || []);
setSummary(data.summary); // null if not generated
};
loadMeeting();
}, [id]);
```
**Problem**:
- `include_summary: true` only includes existing summary
- Does not trigger generation if missing
- User must manually click "Generate Summary" button
### Gap 4: Auto-Extract Feature Disabled (Medium)
**Severity**: Medium
**Impact**: Working feature not utilized
**Location**: `client/src/hooks/use-entity-extraction.ts:116-121`
```typescript
// Feature exists but is never enabled
useEffect(() => {
if (autoExtract && meetingId && meetingState === 'completed') {
extract(false);
}
}, [autoExtract, meetingId, meetingState, extract]);
```
**Location**: `client/src/pages/MeetingDetail.tsx:72-76`
```typescript
const { extract: extractEntities } = useEntityExtraction({
meetingId: id,
meetingTitle: meeting?.title,
meetingState: meeting?.state,
// autoExtract: true <-- MISSING
});
```
**Problem**:
- Hook has auto-extract capability
- Never enabled in consuming components
- Simple fix: pass `autoExtract: true`
### Gap 5: No Processing Completion Signals (High)
**Severity**: High
**Impact**: Client cannot track what processing is done
**Location**: `src/noteflow/domain/entities/meeting.py`
```python
class Meeting:
id: MeetingId
title: str
state: MeetingState
# ... other fields ...
# GAP: No processing status fields
# Missing:
# - transcription_complete: bool
# - summary_generated: bool
# - entities_extracted: bool
# - diarization_refined: bool
```
**Problem**:
- Meeting entity has no processing status tracking
- Client must query each subsystem separately
- No way to show "processing complete" badge
### Gap 6: Summary Progress Events Ignored (Medium)
**Severity**: Medium
**Impact**: No progress UI during summarization
**Location**: `client/src-tauri/src/commands/summary.rs:96-134`
```rust
tauri::async_runtime::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(1));
loop {
interval.tick().await;
let elapsed_s = start.elapsed().as_secs();
// Emits event but nothing listens
emit_summary_progress(app.clone(), meeting_id.clone(), elapsed_s);
if elapsed_s >= 300 { break; }
}
});
```
**Problem**:
- Server emits `summary_progress` events every second
- No React component subscribes to these events
- User sees no progress during potentially long summarization
### Gap 7: No Diarization Auto-Trigger (Medium)
**Severity**: Medium
**Impact**: Speaker labels never refined automatically
**Location**: `client/src/hooks/use-diarization.ts`
```typescript
// Hook provides start() function but nothing calls it automatically
export function useDiarization(options: UseDiarizationOptions = {}) {
const start = useCallback(async (meetingId: string, numSpeakers?: number) => {
// ... implementation
}, [api]);
// No auto-start logic exists
return { state, start, cancel, poll, recover };
}
```
**Location**: `client/src/pages/MeetingDetail.tsx`
- No call to `start(meetingId)` on mount or meeting completion
- Diarization button exists but requires manual click
**Problem**:
- Diarization hook is well-designed with polling
- But never triggered automatically after recording
- User must manually click "Refine Speakers" button
### Gap 8: Silent ASR Error Handling (Medium)
**Severity**: Medium
**Impact**: Transcription failures invisible to user
**Location**: `src/noteflow/grpc/_mixins/streaming/_asr.py:45-60`
```python
async def process_audio_segment(
host: "ServicerHost",
audio: np.ndarray,
# ...
) -> AsyncIterator[TranscriptUpdate]:
if audio.size == 0:
return # Silent return - no logging
if not host.asr_engine:
return # Silent return - user not informed
# ... processing
```
**Problem**:
- Empty audio segments silently ignored
- ASR engine unavailability silently skipped
- No user notification of processing failures
- Only visible in server logs
## Architecture
### Current State (Broken)
```
Recording.tsx Server MeetingDetail.tsx
│ │ │
│ stopRecording() │ │
├────────────────────────────>│ │
│ │ StopMeeting() │
│ │ ├─ State → STOPPED │
│ │ ├─ Webhooks fired │
│ │ └─ [NO PROCESSING] │
│<────────────────────────────┤ │
│ │ │
│ navigate(/meeting/id) ──────┼──────────────────────────────────>
│ │ │
│ │ getMeeting() │
│ │<─────────────────────────────────┤
│ │ include_summary: true │
│ │ (fetches null - not generated) │
│ ├─────────────────────────────────>│
│ │ │
│ │ Shows empty summary │
```
### Target State (Fixed)
```
Recording.tsx Server MeetingDetail.tsx
│ │ │
│ stopRecording() │ │
├────────────────────────────>│ │
│ │ StopMeeting() │
│ │ ├─ State → STOPPED │
│ │ └─ Webhooks fired │
│<────────────────────────────┤ │
│ │ │
│ navigate(/meeting/id) ──────┼──────────────────────────────────>
│ │ │
│ │ getMeeting() │
│ │<─────────────────────────────────┤
│ ├─────────────────────────────────>│
│ │ │
│ │ ┌─────────────────────────────┐│
│ │ │ usePostProcessing() hook ││
│ │ │ ├─ Check processing status ││
│ │ │ ├─ Trigger if needed: ││
│ │ │ │ ├─ generateSummary() ││
│ │ │ │ ├─ extractEntities() ││
│ │ │ │ └─ refineSpeakers() ││
│ │ │ └─ Track progress ││
│ │ └─────────────────────────────┘│
│ │ │
│ │<── generateSummary() ────────────┤
│ │────────────────────────────────>│
│ │ Progress events... │
│ │<── extractEntities() ────────────┤
│ │────────────────────────────────>│
│ │<── refineSpeakers() ─────────────┤
│ │────────────────────────────────>│
│ │ Poll status... │
│ │ │
│ │ All complete │
│ │ Show full meeting │
```
## Scope
### Task Breakdown
| Task | Effort | Priority | Description |
|------|--------|----------|-------------|
| Create `usePostProcessing` hook | M | P0 | Orchestrates all post-processing with progress tracking |
| Add processing status to Meeting entity | S | P0 | Track what processing has been done |
| Enable auto-extract in MeetingDetail | S | P0 | Pass `autoExtract: true` to entity hook |
| Add summary progress listener | S | P1 | Subscribe to `summary_progress` events |
| Create auto-diarization trigger | S | P1 | Trigger refinement on meeting completion |
| Add `ProcessingStatus` component | M | P1 | Shows progress for all processing steps |
| Add missing webhook events | S | P2 | ENTITIES_EXTRACTED, DIARIZATION_COMPLETED |
| Add ASR error surfacing | S | P2 | Emit events for ASR failures |
| Add processing status to proto | S | P2 | Include in GetMeeting response |
### Files to Create
**Client:**
- `client/src/hooks/use-post-processing.ts` - Orchestration hook
- `client/src/components/meeting/processing-status.tsx` - Progress UI component
### Files to Modify
**Client:**
- `client/src/pages/MeetingDetail.tsx` - Add `usePostProcessing` hook, enable auto-extract
- `client/src/hooks/use-entity-extraction.ts` - Minor fixes if needed
- `client/src/hooks/use-diarization.ts` - Add auto-start capability
- `client/src/api/tauri-adapter.ts` - Add event listeners for summary progress
**Backend:**
- `src/noteflow/domain/entities/meeting.py` - Add processing status fields
- `src/noteflow/domain/webhooks/events.py` - Add new event types
- `src/noteflow/grpc/_mixins/streaming/_asr.py` - Add error event emission
- `src/noteflow/grpc/proto/noteflow.proto` - Add ProcessingStatus message
## Implementation Plan
### Phase 1: Quick Wins (Low Risk)
**Goal**: Enable existing but disabled functionality
1. **Enable auto-extract in MeetingDetail**
```typescript
// client/src/pages/MeetingDetail.tsx
const { extract: extractEntities } = useEntityExtraction({
meetingId: id,
meetingTitle: meeting?.title,
meetingState: meeting?.state,
autoExtract: true, // ADD THIS
});
```
2. **Add summary progress listener**
```typescript
// In MeetingDetail or new useSummaryProgress hook
useEffect(() => {
const unlisten = listen<{ meetingId: string; elapsed: number }>(
'summary_progress',
(event) => setSummaryProgress(event.payload)
);
return () => { unlisten.then(fn => fn()); };
}, []);
```
### Phase 2: Processing Status (Low Risk)
**Goal**: Track what processing has been done
1. **Add processing status to Meeting entity**
```python
# src/noteflow/domain/entities/meeting.py
@dataclass
class ProcessingStatus:
summary_generated: bool = False
entities_extracted: bool = False
diarization_refined: bool = False
@dataclass
class Meeting:
# ... existing fields ...
processing_status: ProcessingStatus = field(default_factory=ProcessingStatus)
```
2. **Add ProcessingStatus to proto**
```protobuf
message ProcessingStatus {
bool summary_generated = 1;
bool entities_extracted = 2;
bool diarization_refined = 3;
}
message Meeting {
// ... existing fields ...
ProcessingStatus processing_status = 20;
}
```
### Phase 3: Orchestration Hook (Medium Risk)
**Goal**: Create unified post-processing orchestration
1. **Create `usePostProcessing` hook**
```typescript
// client/src/hooks/use-post-processing.ts
interface UsePostProcessingOptions {
meetingId: string;
meetingState: MeetingState;
autoStart?: boolean;
onComplete?: () => void;
}
interface ProcessingStep {
name: 'summary' | 'entities' | 'diarization';
status: 'pending' | 'running' | 'completed' | 'failed' | 'skipped';
progress?: number;
error?: string;
}
export function usePostProcessing(options: UsePostProcessingOptions) {
const [steps, setSteps] = useState<ProcessingStep[]>([
{ name: 'summary', status: 'pending' },
{ name: 'entities', status: 'pending' },
{ name: 'diarization', status: 'pending' },
]);
const api = useAPI();
const startProcessing = useCallback(async () => {
// Run summary and entities in parallel
const summaryPromise = runSummary();
const entitiesPromise = runEntities();
// Start diarization (polling-based)
const diarizationPromise = runDiarization();
await Promise.allSettled([
summaryPromise,
entitiesPromise,
diarizationPromise,
]);
}, [meetingId]);
// Auto-start when meeting becomes completed
useEffect(() => {
if (autoStart && meetingState === 'completed') {
startProcessing();
}
}, [autoStart, meetingState, startProcessing]);
return { steps, startProcessing, isProcessing, isComplete };
}
```
2. **Create `ProcessingStatus` component**
```typescript
// client/src/components/meeting/processing-status.tsx
export function ProcessingStatus({ steps }: { steps: ProcessingStep[] }) {
return (
<Card>
<CardHeader>
<CardTitle>Processing</CardTitle>
</CardHeader>
<CardContent>
{steps.map(step => (
<div key={step.name} className="flex items-center gap-2">
<StatusIcon status={step.status} />
<span>{stepLabels[step.name]}</span>
{step.status === 'running' && step.progress && (
<Progress value={step.progress} />
)}
</div>
))}
</CardContent>
</Card>
);
}
```
### Phase 4: Webhook Events (Low Risk)
**Goal**: Add missing webhook events for external integrations
1. **Add new event types**
```python
# src/noteflow/domain/webhooks/events.py
class WebhookEventType(str, Enum):
RECORDING_STARTED = "recording.started"
RECORDING_STOPPED = "recording.stopped"
MEETING_COMPLETED = "meeting.completed"
SUMMARY_GENERATED = "summary.generated"
ENTITIES_EXTRACTED = "entities.extracted" # NEW
DIARIZATION_COMPLETED = "diarization.completed" # NEW
```
2. **Fire events on completion**
```python
# In entities mixin
await self._webhook_service.trigger_entities_extracted(meeting_id, entities)
# In diarization mixin
await self._webhook_service.trigger_diarization_completed(meeting_id, job_id)
```
### Phase 5: Error Surfacing (Low Risk)
**Goal**: Make ASR and processing errors visible
1. **Add ASR error events**
```python
# src/noteflow/grpc/_mixins/streaming/_asr.py
if audio.size == 0:
logger.warning("Empty audio segment received", meeting_id=meeting_id)
# Optionally emit metric
return
if not host.asr_engine:
logger.error("ASR engine not available", meeting_id=meeting_id)
# Could emit error event to client
return
```
## Deliverables
### Backend
- [ ] Add `ProcessingStatus` to Meeting entity
- [ ] Add `ProcessingStatus` to proto schema
- [ ] Add `ENTITIES_EXTRACTED` webhook event type
- [ ] Add `DIARIZATION_COMPLETED` webhook event type
- [ ] Fire webhook events on processing completion
- [ ] Add ASR error logging with meeting context
- [ ] Update GetMeeting to include processing status
### Client
- [ ] Create `usePostProcessing` orchestration hook
- [ ] Create `ProcessingStatus` UI component
- [ ] Enable `autoExtract: true` in MeetingDetail
- [ ] Add summary progress event listener
- [ ] Add auto-diarization trigger
- [ ] Wire `usePostProcessing` into MeetingDetail
### Tests
- [ ] Unit test: `usePostProcessing` hook state transitions
- [ ] Unit test: `ProcessingStatus` component rendering
- [ ] Integration test: Full post-processing flow
- [ ] E2E test: Recording → Processing → Complete flow
## Test Strategy
### Fixtures
- Mock meeting with `state: 'completed'`
- Mock API responses for each processing step
- Mock Tauri events for progress
### Test Cases
| Case | Input | Expected |
|------|-------|----------|
| Auto-trigger on mount | Meeting with state=completed, no summary | Summary generation starts |
| Parallel processing | Meeting ready for processing | Summary + entities run in parallel |
| Individual failure | Summary fails, others succeed | Shows summary as failed, others complete |
| Progress tracking | Summary in progress | Shows progress percentage |
| Skip if done | Meeting with existing summary | Skips summary, runs others |
| Processing complete | All steps done | Shows complete badge, fires callback |
### E2E Test Plan
```typescript
// client/e2e/post-processing.spec.ts
test('automatically processes meeting after recording stops', async ({ page }) => {
// 1. Start recording
await page.click('[data-testid="start-recording"]');
// 2. Record for a few seconds
await page.waitForTimeout(3000);
// 3. Stop recording
await page.click('[data-testid="stop-recording"]');
// 4. Should navigate to detail page
await expect(page).toHaveURL(/\/meetings\/.+/);
// 5. Should show processing status
await expect(page.locator('[data-testid="processing-status"]')).toBeVisible();
// 6. Wait for processing to complete
await expect(page.locator('[data-testid="summary-status"]')).toHaveAttribute(
'data-status',
'completed',
{ timeout: 60000 }
);
// 7. Summary should be visible
await expect(page.locator('[data-testid="summary-content"]')).toBeVisible();
});
```
## Quality Gates
- [ ] All processing triggers automatically on meeting completion
- [ ] Progress visible for all processing steps
- [ ] Failures don't block other processing steps
- [ ] Processing status persisted and queryable
- [ ] Webhook events fire for all processing completions
- [ ] E2E test validates full flow
- [ ] No regression in manual trigger paths
## Migration Strategy
### Rollout Order
1. **Phase 1** (Day 1): Enable auto-extract - immediate improvement, no new code
2. **Phase 2** (Day 2-3): Add processing status tracking - backend changes
3. **Phase 3** (Day 4-5): Create orchestration hook - main feature
4. **Phase 4** (Day 6): Add webhook events - external integrations
5. **Phase 5** (Day 7): Add error surfacing - observability
### Rollback Plan
Each phase is independently reversible:
- Phase 1: Set `autoExtract: false`
- Phase 2: Processing status optional, can ignore
- Phase 3: Hook is additive, can disable
- Phase 4: Webhook events are additive
- Phase 5: Error logging is additive
## Dependencies
### External Dependencies
- GAP-003 (Error Handling) - Proper error classification for processing failures
- GAP-004 (Diarization Lifecycle) - Diarization polling pattern
### Feature Flags
- `NOTEFLOW_FEATURE_NER_ENABLED` - Controls entity extraction availability
- Consider: `NOTEFLOW_FEATURE_AUTO_PROCESSING` - Enable/disable auto-processing
## Risks and Mitigations
| Risk | Likelihood | Impact | Mitigation |
|------|------------|--------|------------|
| Processing overload on server | Medium | High | Rate limit concurrent processing per user |
| Long summarization blocks UI | Medium | Medium | Progress events + async processing |
| Failed processing not visible | High | Medium | ProcessingStatus component |
| Diarization timeout | Low | Low | Already handled by GAP-004 |
## References
- `src/noteflow/grpc/_mixins/summarization.py` - Summary generation RPC
- `src/noteflow/grpc/_mixins/entities.py` - Entity extraction RPC
- `src/noteflow/grpc/_mixins/diarization/` - Diarization refinement
- `client/src/hooks/use-diarization.ts` - Polling pattern to reuse
- `client/src/hooks/use-entity-extraction.ts` - Auto-extract pattern
- GAP-003 - Error handling patterns
- GAP-004 - Diarization lifecycle patterns

View File

@@ -0,0 +1,895 @@
# Testing Guide: SPRINT-GAP-011
Comprehensive testing strategy for the post-processing pipeline.
## Unit Tests
### usePostProcessing Hook (`client/src/hooks/use-post-processing.test.ts`)
```typescript
import { renderHook, act, waitFor } from '@testing-library/react';
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { usePostProcessing } from './use-post-processing';
// Mock the API
const mockApi = {
generateSummary: vi.fn(),
extractEntities: vi.fn(),
refineSpeakers: vi.fn(),
getDiarizationJobStatus: vi.fn(),
};
vi.mock('@/contexts/connection-context', () => ({
useAPI: () => mockApi,
}));
// Mock Tauri event listener
vi.mock('@tauri-apps/api/event', () => ({
listen: vi.fn(() => Promise.resolve(() => {})),
}));
describe('usePostProcessing', () => {
beforeEach(() => {
vi.clearAllMocks();
});
describe('initial state', () => {
it('should initialize all steps as pending when no processing done', () => {
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
})
);
expect(result.current.steps).toEqual([
{ name: 'summary', status: 'pending' },
{ name: 'entities', status: 'pending' },
{ name: 'diarization', status: 'pending' },
]);
expect(result.current.isProcessing).toBe(false);
expect(result.current.isComplete).toBe(false);
});
it('should mark steps as completed if already processed', () => {
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
processingStatus: {
summary_generated: true,
entities_extracted: false,
diarization_refined: true,
},
})
);
expect(result.current.steps[0].status).toBe('completed'); // summary
expect(result.current.steps[1].status).toBe('pending'); // entities
expect(result.current.steps[2].status).toBe('completed'); // diarization
});
});
describe('auto-start', () => {
it('should not auto-start when autoStart is false', () => {
renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
autoStart: false,
})
);
expect(mockApi.generateSummary).not.toHaveBeenCalled();
expect(mockApi.extractEntities).not.toHaveBeenCalled();
expect(mockApi.refineSpeakers).not.toHaveBeenCalled();
});
it('should auto-start when autoStart is true and meeting is completed', async () => {
mockApi.generateSummary.mockResolvedValue({ id: 'summary-1' });
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
autoStart: true,
})
);
await waitFor(() => {
expect(mockApi.generateSummary).toHaveBeenCalledWith('test-meeting', false);
expect(mockApi.extractEntities).toHaveBeenCalledWith('test-meeting', false);
expect(mockApi.refineSpeakers).toHaveBeenCalledWith('test-meeting');
});
});
it('should not auto-start when meeting is not completed', () => {
renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'recording',
autoStart: true,
})
);
expect(mockApi.generateSummary).not.toHaveBeenCalled();
});
});
describe('manual start', () => {
it('should start all processing when startProcessing is called', async () => {
mockApi.generateSummary.mockResolvedValue({ id: 'summary-1' });
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
})
);
await act(async () => {
await result.current.startProcessing();
});
expect(mockApi.generateSummary).toHaveBeenCalled();
expect(mockApi.extractEntities).toHaveBeenCalled();
expect(mockApi.refineSpeakers).toHaveBeenCalled();
});
it('should not start if already processing', async () => {
mockApi.generateSummary.mockImplementation(
() => new Promise((resolve) => setTimeout(resolve, 1000))
);
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
})
);
// Start processing
act(() => {
result.current.startProcessing();
});
// Try to start again immediately
await act(async () => {
await result.current.startProcessing();
});
// Should only have been called once
expect(mockApi.generateSummary).toHaveBeenCalledTimes(1);
});
});
describe('step status updates', () => {
it('should update step status to running then completed', async () => {
mockApi.generateSummary.mockResolvedValue({ id: 'summary-1' });
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
})
);
await act(async () => {
await result.current.startProcessing();
});
await waitFor(() => {
expect(result.current.steps[0].status).toBe('completed');
expect(result.current.steps[1].status).toBe('completed');
expect(result.current.steps[2].status).toBe('completed');
});
expect(result.current.isComplete).toBe(true);
});
it('should mark step as failed on error', async () => {
mockApi.generateSummary.mockRejectedValue(new Error('Summary failed'));
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
})
);
await act(async () => {
await result.current.startProcessing();
});
await waitFor(() => {
expect(result.current.steps[0].status).toBe('failed');
expect(result.current.steps[0].error).toBe('Summary failed');
});
// Other steps should still complete
expect(result.current.steps[1].status).toBe('completed');
expect(result.current.hasFailures).toBe(true);
});
it('should skip already processed steps', async () => {
mockApi.generateSummary.mockResolvedValue({ id: 'summary-1' });
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
processingStatus: {
summary_generated: true,
entities_extracted: false,
diarization_refined: false,
},
})
);
await act(async () => {
await result.current.startProcessing();
});
await waitFor(() => {
expect(result.current.steps[0].status).toBe('skipped');
});
// Should not have called generateSummary
expect(mockApi.generateSummary).not.toHaveBeenCalled();
});
});
describe('callbacks', () => {
it('should call onStepComplete when step finishes', async () => {
mockApi.generateSummary.mockResolvedValue({ id: 'summary-1' });
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
const onStepComplete = vi.fn();
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
onStepComplete,
})
);
await act(async () => {
await result.current.startProcessing();
});
await waitFor(() => {
expect(onStepComplete).toHaveBeenCalledWith('summary');
expect(onStepComplete).toHaveBeenCalledWith('entities');
expect(onStepComplete).toHaveBeenCalledWith('diarization');
});
});
it('should call onComplete when all steps finish', async () => {
mockApi.generateSummary.mockResolvedValue({ id: 'summary-1' });
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
const onComplete = vi.fn();
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
onComplete,
})
);
await act(async () => {
await result.current.startProcessing();
});
await waitFor(() => {
expect(onComplete).toHaveBeenCalled();
});
});
it('should call onStepFailed when step fails', async () => {
mockApi.generateSummary.mockRejectedValue(new Error('API error'));
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({ status: 'completed' });
const onStepFailed = vi.fn();
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
onStepFailed,
})
);
await act(async () => {
await result.current.startProcessing();
});
await waitFor(() => {
expect(onStepFailed).toHaveBeenCalledWith('summary', 'API error');
});
});
});
describe('diarization polling', () => {
it('should poll until diarization completes', async () => {
mockApi.generateSummary.mockResolvedValue({ id: 'summary-1' });
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
let pollCount = 0;
mockApi.getDiarizationJobStatus.mockImplementation(() => {
pollCount++;
if (pollCount < 3) {
return Promise.resolve({ status: 'running', progress: pollCount * 30 });
}
return Promise.resolve({ status: 'completed' });
});
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
})
);
await act(async () => {
await result.current.startProcessing();
});
// Wait for polling to complete
await waitFor(
() => {
expect(result.current.steps[2].status).toBe('completed');
},
{ timeout: 10000 }
);
expect(mockApi.getDiarizationJobStatus).toHaveBeenCalledTimes(3);
});
it('should handle diarization failure', async () => {
mockApi.generateSummary.mockResolvedValue({ id: 'summary-1' });
mockApi.extractEntities.mockResolvedValue([]);
mockApi.refineSpeakers.mockResolvedValue({ jobId: 'job-1' });
mockApi.getDiarizationJobStatus.mockResolvedValue({
status: 'failed',
error: 'Speaker detection failed',
});
const { result } = renderHook(() =>
usePostProcessing({
meetingId: 'test-meeting',
meetingState: 'completed',
})
);
await act(async () => {
await result.current.startProcessing();
});
await waitFor(() => {
expect(result.current.steps[2].status).toBe('failed');
expect(result.current.steps[2].error).toBe('Speaker detection failed');
});
});
});
});
```
### ProcessingStatus Component (`client/src/components/meeting/processing-status.test.tsx`)
```typescript
import { render, screen } from '@testing-library/react';
import { describe, it, expect } from 'vitest';
import { ProcessingStatus } from './processing-status';
import type { ProcessingStep } from '@/hooks/use-post-processing';
describe('ProcessingStatus', () => {
const defaultSteps: ProcessingStep[] = [
{ name: 'summary', status: 'pending' },
{ name: 'entities', status: 'pending' },
{ name: 'diarization', status: 'pending' },
];
it('should render all steps', () => {
render(<ProcessingStatus steps={defaultSteps} />);
expect(screen.getByText('Generate Summary')).toBeInTheDocument();
expect(screen.getByText('Extract Entities')).toBeInTheDocument();
expect(screen.getByText('Refine Speakers')).toBeInTheDocument();
});
it('should show spinner for running steps', () => {
const steps: ProcessingStep[] = [
{ name: 'summary', status: 'running' },
{ name: 'entities', status: 'pending' },
{ name: 'diarization', status: 'pending' },
];
render(<ProcessingStatus steps={steps} />);
// Check for spinner (animate-spin class)
const spinner = document.querySelector('.animate-spin');
expect(spinner).toBeInTheDocument();
});
it('should show check for completed steps', () => {
const steps: ProcessingStep[] = [
{ name: 'summary', status: 'completed' },
{ name: 'entities', status: 'pending' },
{ name: 'diarization', status: 'pending' },
];
render(<ProcessingStatus steps={steps} />);
// Check icon should be visible
const checkIcon = document.querySelector('.text-green-500');
expect(checkIcon).toBeInTheDocument();
});
it('should show X for failed steps with error message', () => {
const steps: ProcessingStep[] = [
{ name: 'summary', status: 'failed', error: 'LLM unavailable' },
{ name: 'entities', status: 'pending' },
{ name: 'diarization', status: 'pending' },
];
render(<ProcessingStatus steps={steps} />);
// X icon should be visible
const xIcon = document.querySelector('.text-red-500');
expect(xIcon).toBeInTheDocument();
// Error message should be shown
expect(screen.getByText('LLM unavailable')).toBeInTheDocument();
});
it('should show progress bar for running steps with progress', () => {
const steps: ProcessingStep[] = [
{ name: 'summary', status: 'running', progress: 50 },
{ name: 'entities', status: 'pending' },
{ name: 'diarization', status: 'pending' },
];
render(<ProcessingStatus steps={steps} />);
// Progress bar should be visible
const progressBar = document.querySelector('[role="progressbar"]');
expect(progressBar).toBeInTheDocument();
});
it('should not render when all steps are complete', () => {
const steps: ProcessingStep[] = [
{ name: 'summary', status: 'completed' },
{ name: 'entities', status: 'completed' },
{ name: 'diarization', status: 'completed' },
];
const { container } = render(<ProcessingStatus steps={steps} />);
expect(container.firstChild).toBeNull();
});
it('should not render when all steps are skipped', () => {
const steps: ProcessingStep[] = [
{ name: 'summary', status: 'skipped' },
{ name: 'entities', status: 'skipped' },
{ name: 'diarization', status: 'skipped' },
];
const { container } = render(<ProcessingStatus steps={steps} />);
expect(container.firstChild).toBeNull();
});
});
```
## Integration Tests
### Backend Processing Status (`tests/integration/test_processing_status.py`)
```python
import pytest
from noteflow.domain.entities.meeting import Meeting, MeetingState, ProcessingStatus
from noteflow.domain.value_objects import MeetingId
class TestProcessingStatus:
"""Tests for processing status tracking on meetings."""
@pytest.fixture
def meeting_with_status(self) -> Meeting:
return Meeting(
id=MeetingId.generate(),
title="Test Meeting",
state=MeetingState.COMPLETED,
processing_status=ProcessingStatus(
summary_generated=False,
entities_extracted=False,
diarization_refined=False,
),
)
async def test_summary_generated_updates_status(
self,
mock_uow,
meeting_with_status,
):
"""Generating summary should update processing_status."""
# Arrange
async with mock_uow as uow:
await uow.meetings.create(meeting_with_status)
await uow.commit()
# Act - simulate summary generation
async with mock_uow as uow:
meeting = await uow.meetings.get(meeting_with_status.id)
assert meeting is not None
meeting.processing_status.summary_generated = True
await uow.meetings.update(meeting)
await uow.commit()
# Assert
async with mock_uow as uow:
meeting = await uow.meetings.get(meeting_with_status.id)
assert meeting is not None
assert meeting.processing_status.summary_generated is True
async def test_entities_extracted_updates_status(
self,
mock_uow,
meeting_with_status,
):
"""Extracting entities should update processing_status."""
async with mock_uow as uow:
await uow.meetings.create(meeting_with_status)
await uow.commit()
async with mock_uow as uow:
meeting = await uow.meetings.get(meeting_with_status.id)
assert meeting is not None
meeting.processing_status.entities_extracted = True
await uow.meetings.update(meeting)
await uow.commit()
async with mock_uow as uow:
meeting = await uow.meetings.get(meeting_with_status.id)
assert meeting is not None
assert meeting.processing_status.entities_extracted is True
async def test_diarization_refined_updates_status(
self,
mock_uow,
meeting_with_status,
):
"""Refining diarization should update processing_status."""
async with mock_uow as uow:
await uow.meetings.create(meeting_with_status)
await uow.commit()
async with mock_uow as uow:
meeting = await uow.meetings.get(meeting_with_status.id)
assert meeting is not None
meeting.processing_status.diarization_refined = True
await uow.meetings.update(meeting)
await uow.commit()
async with mock_uow as uow:
meeting = await uow.meetings.get(meeting_with_status.id)
assert meeting is not None
assert meeting.processing_status.diarization_refined is True
async def test_get_meeting_includes_processing_status(
self,
mock_uow,
meeting_with_status,
):
"""GetMeeting should include processing_status in response."""
meeting_with_status.processing_status.summary_generated = True
async with mock_uow as uow:
await uow.meetings.create(meeting_with_status)
await uow.commit()
async with mock_uow as uow:
meeting = await uow.meetings.get(meeting_with_status.id)
assert meeting is not None
assert meeting.processing_status is not None
assert meeting.processing_status.summary_generated is True
assert meeting.processing_status.entities_extracted is False
assert meeting.processing_status.diarization_refined is False
```
### Webhook Events (`tests/integration/test_webhook_events.py`)
```python
import pytest
from unittest.mock import AsyncMock, MagicMock
from noteflow.domain.webhooks.events import WebhookEventType
from noteflow.application.services.webhook_service import WebhookService
class TestNewWebhookEvents:
"""Tests for new webhook event types."""
@pytest.fixture
def webhook_service(self, mock_uow) -> WebhookService:
executor = MagicMock()
executor.deliver = AsyncMock()
return WebhookService(
unit_of_work=mock_uow,
executor=executor,
)
async def test_entities_extracted_event(self, webhook_service):
"""Should fire entities.extracted webhook event."""
entities = [
MagicMock(category="person"),
MagicMock(category="person"),
MagicMock(category="company"),
]
await webhook_service.trigger_entities_extracted(
meeting_id="meeting-123",
entities=entities,
)
webhook_service._executor.deliver.assert_called()
call_args = webhook_service._executor.deliver.call_args
assert call_args[0][0] == WebhookEventType.ENTITIES_EXTRACTED
async def test_diarization_completed_event(self, webhook_service):
"""Should fire diarization.completed webhook event."""
await webhook_service.trigger_diarization_completed(
meeting_id="meeting-123",
job_id="job-456",
speaker_count=3,
segments_updated=42,
)
webhook_service._executor.deliver.assert_called()
call_args = webhook_service._executor.deliver.call_args
assert call_args[0][0] == WebhookEventType.DIARIZATION_COMPLETED
```
## E2E Tests
### Full Processing Flow (`client/e2e/post-processing.spec.ts`)
```typescript
import { test, expect } from '@playwright/test';
test.describe('Post-Processing Pipeline', () => {
test.beforeEach(async ({ page }) => {
// Navigate to app and ensure connected
await page.goto('/');
await expect(page.locator('[data-testid="connection-status"]')).toHaveText(
'Connected'
);
});
test('shows processing status after recording stops', async ({ page }) => {
// Start a new recording
await page.click('[data-testid="new-meeting-button"]');
await page.fill('[data-testid="meeting-title-input"]', 'E2E Test Meeting');
await page.click('[data-testid="start-recording-button"]');
// Wait for recording to start
await expect(page.locator('[data-testid="recording-indicator"]')).toBeVisible();
// Record for 5 seconds
await page.waitForTimeout(5000);
// Stop recording
await page.click('[data-testid="stop-recording-button"]');
// Should navigate to meeting detail
await expect(page).toHaveURL(/\/meetings\/.+/);
// Processing status should be visible
await expect(
page.locator('[data-testid="processing-status"]')
).toBeVisible();
// Should show steps
await expect(page.getByText('Generate Summary')).toBeVisible();
await expect(page.getByText('Extract Entities')).toBeVisible();
await expect(page.getByText('Refine Speakers')).toBeVisible();
});
test('completes all processing steps', async ({ page }) => {
// This test requires a longer timeout due to LLM calls
test.setTimeout(120000);
// Start and complete a recording
await page.click('[data-testid="new-meeting-button"]');
await page.fill('[data-testid="meeting-title-input"]', 'Processing Test');
await page.click('[data-testid="start-recording-button"]');
await page.waitForTimeout(10000);
await page.click('[data-testid="stop-recording-button"]');
// Wait for all processing to complete
await expect(
page.locator('[data-testid="summary-status"][data-status="completed"]')
).toBeVisible({ timeout: 60000 });
await expect(
page.locator('[data-testid="entities-status"][data-status="completed"]')
).toBeVisible({ timeout: 60000 });
await expect(
page.locator('[data-testid="diarization-status"][data-status="completed"]')
).toBeVisible({ timeout: 60000 });
// Processing status card should disappear after completion
await expect(
page.locator('[data-testid="processing-status"]')
).not.toBeVisible();
// Summary should now be visible
await expect(page.locator('[data-testid="summary-content"]')).toBeVisible();
});
test('handles processing failure gracefully', async ({ page }) => {
// This test simulates a failure scenario
// Requires mocking or a specific test mode
await page.click('[data-testid="new-meeting-button"]');
await page.fill('[data-testid="meeting-title-input"]', 'Failure Test');
await page.click('[data-testid="start-recording-button"]');
await page.waitForTimeout(3000);
await page.click('[data-testid="stop-recording-button"]');
// If a step fails, it should show error state
// Other steps should still complete
await page.waitForSelector(
'[data-testid="processing-status"]',
{ state: 'visible' }
);
// Wait for processing to finish (success or failure)
await page.waitForFunction(
() => {
const steps = document.querySelectorAll('[data-status]');
return Array.from(steps).every(
(s) =>
s.getAttribute('data-status') === 'completed' ||
s.getAttribute('data-status') === 'failed' ||
s.getAttribute('data-status') === 'skipped'
);
},
{ timeout: 60000 }
);
// If summary failed, error should be shown
const summaryStep = page.locator('[data-testid="summary-status"]');
const status = await summaryStep.getAttribute('data-status');
if (status === 'failed') {
await expect(
page.locator('[data-testid="summary-error"]')
).toBeVisible();
}
});
test('manual processing buttons still work', async ({ page }) => {
// Navigate to an existing meeting without processing
await page.goto('/meetings/existing-meeting-id');
// Manual buttons should be visible
await expect(
page.locator('[data-testid="generate-summary-button"]')
).toBeVisible();
await expect(
page.locator('[data-testid="extract-entities-button"]')
).toBeVisible();
await expect(
page.locator('[data-testid="refine-speakers-button"]')
).toBeVisible();
// Click generate summary
await page.click('[data-testid="generate-summary-button"]');
// Should show loading state
await expect(
page.locator('[data-testid="generate-summary-button"]')
).toBeDisabled();
// Wait for completion
await expect(page.locator('[data-testid="summary-content"]')).toBeVisible({
timeout: 60000,
});
});
});
```
## Test Data Fixtures
### Sample Meeting for Testing
```typescript
// client/src/__fixtures__/meetings.ts
export const meetingWithoutProcessing = {
id: 'test-meeting-1',
title: 'Test Meeting',
state: 'completed',
created_at: '2024-01-15T10:00:00Z',
processing_status: {
summary_generated: false,
entities_extracted: false,
diarization_refined: false,
},
};
export const meetingWithSummary = {
...meetingWithoutProcessing,
id: 'test-meeting-2',
processing_status: {
summary_generated: true,
summary_generated_at: '2024-01-15T10:05:00Z',
entities_extracted: false,
diarization_refined: false,
},
};
export const fullyProcessedMeeting = {
...meetingWithoutProcessing,
id: 'test-meeting-3',
processing_status: {
summary_generated: true,
summary_generated_at: '2024-01-15T10:05:00Z',
entities_extracted: true,
entities_extracted_at: '2024-01-15T10:05:30Z',
diarization_refined: true,
diarization_refined_at: '2024-01-15T10:06:00Z',
},
};
```
## Running Tests
```bash
# Unit tests (TypeScript)
cd client
npm run test
# Unit tests (Python)
pytest tests/domain/ tests/application/ -v
# Integration tests
pytest tests/integration/ -v
# E2E tests (requires running server)
cd client
npm run e2e
# All quality checks
make quality
```