From ea0e8ee1e43c1ca0bc8af07459207819ea7f633c Mon Sep 17 00:00:00 2001 From: Travis Vasceannie Date: Thu, 22 Jan 2026 16:15:56 +0000 Subject: [PATCH] recovery --- .../sprint-25-foundation/README.md | 590 ++++++++++++++++++ .../sprint-26-meeting-qa/README.md | 530 ++++++++++++++++ .../sprint-27-cross-meeting/README.md | 87 +++ .../sprint-28-advanced/README.md | 146 +++++ src/noteflow/domain/ai/__init__.py | 38 ++ src/noteflow/domain/ai/citations.py | 43 ++ src/noteflow/domain/ai/interrupts.py | 202 ++++++ src/noteflow/domain/ai/ports.py | 26 + src/noteflow/domain/ai/state.py | 39 ++ src/noteflow/infrastructure/ai/__init__.py | 45 ++ src/noteflow/infrastructure/ai/cache.py | 212 +++++++ .../infrastructure/ai/checkpointer.py | 56 ++ .../infrastructure/ai/graphs/__init__.py | 51 ++ .../infrastructure/ai/graphs/meeting_qa.py | 193 ++++++ .../infrastructure/ai/graphs/summarization.py | 103 +++ .../infrastructure/ai/graphs/workspace_qa.py | 198 ++++++ src/noteflow/infrastructure/ai/guardrails.py | 312 +++++++++ src/noteflow/infrastructure/ai/interrupts.py | 231 +++++++ .../infrastructure/ai/nodes/__init__.py | 39 ++ .../ai/nodes/annotation_suggester.py | 121 ++++ .../infrastructure/ai/nodes/verification.py | 61 ++ .../infrastructure/ai/nodes/web_search.py | 226 +++++++ .../infrastructure/ai/tools/__init__.py | 27 + .../infrastructure/ai/tools/retrieval.py | 237 +++++++ .../infrastructure/ai/tools/synthesis.py | 60 ++ tests/domain/ai/__init__.py | 1 + tests/domain/ai/test_citations.py | 118 ++++ tests/infrastructure/ai/__init__.py | 1 + tests/infrastructure/ai/test_retrieval.py | 268 ++++++++ tests/infrastructure/ai/test_synthesis.py | 149 +++++ typings/langgraph/__init__.pyi | 1 + .../checkpoint/postgres/__init__.pyi | 1 + typings/langgraph/checkpoint/postgres/aio.pyi | 10 + typings/langgraph/graph/__init__.pyi | 41 ++ 34 files changed, 4463 insertions(+) create mode 100644 docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-25-foundation/README.md create mode 100644 docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-26-meeting-qa/README.md create mode 100644 docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-27-cross-meeting/README.md create mode 100644 docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-28-advanced/README.md create mode 100644 src/noteflow/domain/ai/__init__.py create mode 100644 src/noteflow/domain/ai/citations.py create mode 100644 src/noteflow/domain/ai/interrupts.py create mode 100644 src/noteflow/domain/ai/ports.py create mode 100644 src/noteflow/domain/ai/state.py create mode 100644 src/noteflow/infrastructure/ai/__init__.py create mode 100644 src/noteflow/infrastructure/ai/cache.py create mode 100644 src/noteflow/infrastructure/ai/checkpointer.py create mode 100644 src/noteflow/infrastructure/ai/graphs/__init__.py create mode 100644 src/noteflow/infrastructure/ai/graphs/meeting_qa.py create mode 100644 src/noteflow/infrastructure/ai/graphs/summarization.py create mode 100644 src/noteflow/infrastructure/ai/graphs/workspace_qa.py create mode 100644 src/noteflow/infrastructure/ai/guardrails.py create mode 100644 src/noteflow/infrastructure/ai/interrupts.py create mode 100644 src/noteflow/infrastructure/ai/nodes/__init__.py create mode 100644 src/noteflow/infrastructure/ai/nodes/annotation_suggester.py create mode 100644 src/noteflow/infrastructure/ai/nodes/verification.py create mode 100644 src/noteflow/infrastructure/ai/nodes/web_search.py create mode 100644 src/noteflow/infrastructure/ai/tools/__init__.py create mode 100644 src/noteflow/infrastructure/ai/tools/retrieval.py create mode 100644 src/noteflow/infrastructure/ai/tools/synthesis.py create mode 100644 tests/domain/ai/__init__.py create mode 100644 tests/domain/ai/test_citations.py create mode 100644 tests/infrastructure/ai/__init__.py create mode 100644 tests/infrastructure/ai/test_retrieval.py create mode 100644 tests/infrastructure/ai/test_synthesis.py create mode 100644 typings/langgraph/__init__.pyi create mode 100644 typings/langgraph/checkpoint/postgres/__init__.pyi create mode 100644 typings/langgraph/checkpoint/postgres/aio.pyi create mode 100644 typings/langgraph/graph/__init__.pyi diff --git a/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-25-foundation/README.md b/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-25-foundation/README.md new file mode 100644 index 0000000..a4de47d --- /dev/null +++ b/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-25-foundation/README.md @@ -0,0 +1,590 @@ +# Sprint 25: LangGraph Foundation + +> **Size**: L | **Owner**: Backend | **Phase**: 5 - Platform Evolution +> **Effort**: ~1 sprint | **Prerequisites**: Sprint 19 (Embeddings) + +--- + +## Objective + +Establish LangGraph infrastructure and wrap existing summarization as proof of pattern. + +--- + +## Current State Analysis + +### What Exists + +| Component | Location | Status | +|-----------|----------|--------| +| Summarization service | `application/services/summarization/` | ✅ Working | +| Segment semantic search | `infrastructure/persistence/repositories/segment_repo.py` | ✅ Working | +| Cloud consent pattern | `application/services/summarization/_consent_manager.py` | ✅ Working | +| Usage event tracking | `application/observability/ports.py` | ✅ Working | +| gRPC mixin pattern | `grpc/_mixins/` | ✅ Working | + +### What's Missing + +| Component | Target Location | Sprint Task | +|-----------|-----------------|-------------| +| LangGraph dependencies | `pyproject.toml` | Task 1 | +| State schemas | `domain/ai/state.py` | Task 2 | +| Checkpointer factory | `infrastructure/ai/checkpointer.py` | Task 3 | +| Retrieval tools | `infrastructure/ai/tools/retrieval.py` | Task 4 | +| Synthesis tools | `infrastructure/ai/tools/synthesis.py` | Task 5 | +| Summarization graph | `infrastructure/ai/graphs/summarization.py` | Task 6 | +| AssistantService | `application/services/assistant/` | Task 7 | + +--- + +## Implementation Tasks + +### Task 1: Add LangGraph Dependencies + +**File**: `pyproject.toml` + +Add new optional dependency group: + +```toml +langgraph = [ + "langgraph>=0.2", + "langgraph-checkpoint-postgres>=2.0", + "langchain-core>=0.3", +] +``` + +Also add to `optional` and `all` groups. + +**Verification**: `uv pip install -e ".[langgraph]"` succeeds + +--- + +### Task 2: Create State Schemas + +**Files to create**: +- `src/noteflow/domain/ai/__init__.py` +- `src/noteflow/domain/ai/state.py` +- `src/noteflow/domain/ai/citations.py` +- `src/noteflow/domain/ai/ports.py` + +#### `state.py` - Input/Output/Internal Separation + +```python +from __future__ import annotations + +from typing import Annotated, TypedDict +from uuid import UUID + +import operator + + +class AssistantInputState(TypedDict): + """Public API input - what clients send.""" + question: str + meeting_id: UUID | None + thread_id: str | None + allow_web: bool + top_k: int + + +class AssistantOutputState(TypedDict): + """Public API output - what clients receive.""" + answer: str + citations: list[dict] + suggested_annotations: list[dict] + thread_id: str + + +class AssistantInternalState(AssistantInputState): + """Internal graph state - can evolve without breaking API.""" + # Retrieval + retrieved_segment_ids: Annotated[list[int], operator.add] + retrieved_segments: list[dict] + + # Synthesis + draft_answer: str + verification_passed: bool + + # Tracking + loop_count: int +``` + +#### `citations.py` - Value Object + +```python +from dataclasses import dataclass +from uuid import UUID + + +@dataclass(frozen=True) +class SegmentCitation: + """Reference to transcript segment used as evidence.""" + meeting_id: UUID + segment_id: int + start_time: float + end_time: float + text: str + score: float = 0.0 +``` + +#### `ports.py` - Protocol Definition + +```python +from typing import Protocol +from uuid import UUID + +from noteflow.domain.ai.state import AssistantOutputState + + +class AssistantPort(Protocol): + """Protocol for AI assistant operations.""" + + async def ask( + self, + question: str, + meeting_id: UUID | None = None, + thread_id: str | None = None, + allow_web: bool = False, + top_k: int = 8, + ) -> AssistantOutputState: + ... +``` + +**Verification**: `basedpyright src/noteflow/domain/ai/` passes + +--- + +### Task 3: Create Checkpointer Factory + +**File**: `src/noteflow/infrastructure/ai/checkpointer.py` + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Final + +if TYPE_CHECKING: + from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver + from psycopg_pool import AsyncConnectionPool + +CHECKPOINTER_POOL_SIZE: Final[int] = 5 + + +async def create_checkpointer( + database_url: str, + pool_size: int = CHECKPOINTER_POOL_SIZE, +) -> AsyncPostgresSaver: + """Create async Postgres checkpointer for LangGraph state persistence.""" + from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver + from psycopg_pool import AsyncConnectionPool + + pool = AsyncConnectionPool( + conninfo=database_url, + max_size=pool_size, + kwargs={"autocommit": True}, + ) + checkpointer = AsyncPostgresSaver(pool) + await checkpointer.setup() + return checkpointer +``` + +**Verification**: Unit test with mock pool + +--- + +### Task 4: Create Retrieval Tools + +**File**: `src/noteflow/infrastructure/ai/tools/retrieval.py` + +```python +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Protocol +from uuid import UUID + +if TYPE_CHECKING: + from noteflow.domain.entities import Segment + + +class EmbedderProtocol(Protocol): + """Protocol for text embedding.""" + async def embed(self, text: str) -> list[float]: ... + + +class SegmentSearchProtocol(Protocol): + """Protocol for semantic segment search.""" + async def search_semantic( + self, + query_embedding: list[float], + meeting_id: UUID | None, + limit: int, + ) -> list[tuple[Segment, float]]: ... + + +@dataclass +class RetrievalResult: + """Result from segment retrieval.""" + segment_id: int + meeting_id: UUID + text: str + start_time: float + end_time: float + score: float + + +async def retrieve_segments( + query: str, + embedder: EmbedderProtocol, + segment_repo: SegmentSearchProtocol, + meeting_id: UUID | None = None, + top_k: int = 8, +) -> list[RetrievalResult]: + """Retrieve relevant transcript segments via semantic search.""" + query_embedding = await embedder.embed(query) + results = await segment_repo.search_semantic( + query_embedding=query_embedding, + meeting_id=meeting_id, + limit=top_k, + ) + return [ + RetrievalResult( + segment_id=segment.segment_id, + meeting_id=segment.meeting_id, + text=segment.text, + start_time=segment.start_time, + end_time=segment.end_time, + score=score, + ) + for segment, score in results + ] +``` + +**Verification**: Unit test with mock embedder and repo + +--- + +### Task 5: Create Synthesis Tools + +**File**: `src/noteflow/infrastructure/ai/tools/synthesis.py` + +```python +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from noteflow.infrastructure.ai.tools.retrieval import RetrievalResult + + +class LLMProtocol(Protocol): + """Protocol for LLM completion.""" + async def complete(self, prompt: str) -> str: ... + + +@dataclass +class SynthesisResult: + """Result from answer synthesis.""" + answer: str + cited_segment_ids: list[int] + + +SYNTHESIS_PROMPT_TEMPLATE = '''Answer the question based on the following transcript segments. +Cite specific segments by their ID when making claims. + +Question: {question} + +Segments: +{segments} + +Answer (cite segment IDs in brackets like [1], [3]):''' + + +async def synthesize_answer( + question: str, + segments: list[RetrievalResult], + llm: LLMProtocol, +) -> SynthesisResult: + """Generate answer with segment citations.""" + segment_text = "\n".join( + f"[{s.segment_id}] ({s.start_time:.1f}s-{s.end_time:.1f}s): {s.text}" + for s in segments + ) + prompt = SYNTHESIS_PROMPT_TEMPLATE.format( + question=question, + segments=segment_text, + ) + answer = await llm.complete(prompt) + cited_ids = _extract_cited_ids(answer, [s.segment_id for s in segments]) + return SynthesisResult(answer=answer, cited_segment_ids=cited_ids) + + +def _extract_cited_ids(answer: str, valid_ids: list[int]) -> list[int]: + """Extract segment IDs cited in the answer.""" + import re + pattern = r'\[(\d+)\]' + matches = re.findall(pattern, answer) + cited = [int(m) for m in matches if int(m) in valid_ids] + return list(dict.fromkeys(cited)) # Dedupe preserving order +``` + +**Verification**: Unit test with mock LLM + +--- + +### Task 6: Create Summarization Graph Wrapper + +**File**: `src/noteflow/infrastructure/ai/graphs/summarization.py` + +This wraps the existing SummarizationService in a LangGraph graph as proof of pattern. + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, TypedDict + +from langgraph.graph import END, START, StateGraph + +if TYPE_CHECKING: + from uuid import UUID + from collections.abc import Sequence + from noteflow.domain.entities import Segment + from noteflow.application.services.summarization import SummarizationService + + +class SummarizationState(TypedDict): + """State for summarization graph.""" + meeting_id: UUID + segments: Sequence[Segment] + summary_text: str + key_points: list[dict] + action_items: list[dict] + + +def build_summarization_graph( + summarization_service: SummarizationService, +) -> StateGraph: + """Build LangGraph wrapper around existing summarization service.""" + + async def summarize_node(state: SummarizationState) -> dict: + result = await summarization_service.summarize( + meeting_id=state["meeting_id"], + segments=state["segments"], + ) + summary = result.summary + return { + "summary_text": summary.executive_summary, + "key_points": [ + {"text": kp.text, "segment_ids": kp.segment_ids} + for kp in summary.key_points + ], + "action_items": [ + {"text": ai.text, "segment_ids": ai.segment_ids, "assignee": ai.assignee} + for ai in summary.action_items + ], + } + + builder = StateGraph(SummarizationState) + builder.add_node("summarize", summarize_node) + builder.add_edge(START, "summarize") + builder.add_edge("summarize", END) + + return builder.compile() +``` + +**Verification**: Integration test with mock summarization service + +--- + +### Task 7: Create AssistantService Shell + +**Files**: +- `src/noteflow/application/services/assistant/__init__.py` +- `src/noteflow/application/services/assistant/assistant_service.py` + +#### `__init__.py` + +```python +from noteflow.application.services.assistant.assistant_service import AssistantService + +__all__ = ["AssistantService"] +``` + +#### `assistant_service.py` + +```python +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Final +from uuid import UUID + +from noteflow.application.observability.ports import NullUsageEventSink, UsageEventSink +from noteflow.domain.ai.state import AssistantOutputState +from noteflow.infrastructure.logging import get_logger + +if TYPE_CHECKING: + from collections.abc import Callable + from noteflow.domain.ports.unit_of_work import UnitOfWork + +logger = get_logger(__name__) + +DEFAULT_TOP_K: Final[int] = 8 +THREAD_ID_PREFIX: Final[str] = "meeting" + + +def build_thread_id(meeting_id: UUID | None, user_id: UUID, graph_name: str) -> str: + """Build deterministic thread_id for checkpointing.""" + meeting_part = str(meeting_id) if meeting_id else "workspace" + return f"{THREAD_ID_PREFIX}:{meeting_part}:user:{user_id}:graph:{graph_name}:v1" + + +@dataclass +class AssistantService: + """Orchestrates AI assistant workflows via LangGraph.""" + + uow_factory: Callable[[], UnitOfWork] + usage_events: UsageEventSink = field(default_factory=NullUsageEventSink) + + async def ask( + self, + question: str, + user_id: UUID, + meeting_id: UUID | None = None, + thread_id: str | None = None, + allow_web: bool = False, + top_k: int = DEFAULT_TOP_K, + ) -> AssistantOutputState: + """Ask a question about meeting transcript(s).""" + effective_thread_id = thread_id or build_thread_id( + meeting_id, user_id, "meeting_qa" + ) + logger.info( + "assistant_ask", + question_length=len(question), + meeting_id=str(meeting_id) if meeting_id else None, + thread_id=effective_thread_id, + ) + # TODO: Implement in Sprint 26 + return AssistantOutputState( + answer="Not implemented yet", + citations=[], + suggested_annotations=[], + thread_id=effective_thread_id, + ) +``` + +**Verification**: Unit test for thread_id generation + +--- + +## Test Plan + +### Unit Tests (`tests/domain/ai/`) + +```python +# test_citations.py +def test_segment_citation_creation(): + citation = SegmentCitation( + meeting_id=uuid4(), + segment_id=1, + start_time=0.0, + end_time=5.0, + text="Test segment", + score=0.95, + ) + assert citation.duration == 5.0 + + +def test_segment_citation_invalid_times(): + with pytest.raises(ValueError): + SegmentCitation( + meeting_id=uuid4(), + segment_id=1, + start_time=10.0, + end_time=5.0, # Invalid: end < start + text="Test", + ) +``` + +### Unit Tests (`tests/infrastructure/ai/`) + +```python +# test_retrieval.py +async def test_retrieve_segments_success(mock_embedder, mock_segment_repo): + mock_embedder.embed.return_value = [0.1, 0.2, 0.3] + mock_segment_repo.search_semantic.return_value = [ + (sample_segment, 0.95), + ] + + results = await retrieve_segments( + query="test query", + embedder=mock_embedder, + segment_repo=mock_segment_repo, + top_k=5, + ) + + assert len(results) == 1 + assert results[0].score == 0.95 + + +# test_synthesis.py +async def test_synthesize_answer_extracts_citations(mock_llm): + mock_llm.complete.return_value = "The answer is X [1] and Y [3]." + + result = await synthesize_answer( + question="What happened?", + segments=[...], + llm=mock_llm, + ) + + assert result.cited_segment_ids == [1, 3] +``` + +--- + +## Acceptance Criteria + +- [ ] `uv pip install -e ".[langgraph]"` succeeds +- [ ] `basedpyright src/noteflow/domain/ai/` passes with 0 errors +- [ ] `basedpyright src/noteflow/infrastructure/ai/` passes with 0 errors +- [ ] `pytest tests/domain/ai/` passes +- [ ] `pytest tests/infrastructure/ai/` passes +- [ ] Existing summarization behavior unchanged (`pytest tests/application/services/test_summarization*`) +- [ ] `make quality` passes + +--- + +## Rollback Plan + +If issues arise: +1. Remove langgraph from dependencies +2. Delete `domain/ai/` and `infrastructure/ai/` directories +3. AssistantService is not wired to gRPC yet, so no API impact + +--- + +## Files Created/Modified + +| Action | Path | +|--------|------| +| Modified | `pyproject.toml` | +| Created | `src/noteflow/domain/ai/__init__.py` | +| Created | `src/noteflow/domain/ai/state.py` | +| Created | `src/noteflow/domain/ai/citations.py` | +| Created | `src/noteflow/domain/ai/ports.py` | +| Created | `src/noteflow/infrastructure/ai/__init__.py` | +| Created | `src/noteflow/infrastructure/ai/checkpointer.py` | +| Created | `src/noteflow/infrastructure/ai/tools/__init__.py` | +| Created | `src/noteflow/infrastructure/ai/tools/retrieval.py` | +| Created | `src/noteflow/infrastructure/ai/tools/synthesis.py` | +| Created | `src/noteflow/infrastructure/ai/graphs/__init__.py` | +| Created | `src/noteflow/infrastructure/ai/graphs/summarization.py` | +| Created | `src/noteflow/application/services/assistant/__init__.py` | +| Created | `src/noteflow/application/services/assistant/assistant_service.py` | +| Created | `tests/domain/ai/test_citations.py` | +| Created | `tests/domain/ai/test_state.py` | +| Created | `tests/infrastructure/ai/test_retrieval.py` | +| Created | `tests/infrastructure/ai/test_synthesis.py` | +| Created | `tests/infrastructure/ai/test_checkpointer.py` | diff --git a/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-26-meeting-qa/README.md b/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-26-meeting-qa/README.md new file mode 100644 index 0000000..716817f --- /dev/null +++ b/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-26-meeting-qa/README.md @@ -0,0 +1,530 @@ +# Sprint 26: Meeting Q&A MVP + +> **Size**: L | **Owner**: Backend + Client | **Phase**: 5 - Platform Evolution +> **Effort**: ~1 sprint | **Prerequisites**: Sprint 25 (Foundation) + +--- + +## Objective + +Implement single-meeting Q&A with segment citations via gRPC API and React UI. + +--- + +## Current State (After Sprint 25) + +| Component | Status | +|-----------|--------| +| LangGraph infrastructure | ✅ Ready | +| State schemas | ✅ Ready | +| Retrieval tools | ✅ Ready | +| Synthesis tools | ✅ Ready | +| AssistantService shell | ✅ Ready | + +--- + +## Implementation Tasks + +### Task 1: Define MeetingQA Graph + +**File**: `src/noteflow/infrastructure/ai/graphs/meeting_qa.py` + +Graph flow: `retrieve → verify → synthesize` + +```python +from langgraph.graph import StateGraph, START, END + +class MeetingQAState(TypedDict): + # Input + question: str + meeting_id: UUID + top_k: int + + # Internal + retrieved_segments: list[RetrievalResult] + verification_passed: bool + + # Output + answer: str + citations: list[SegmentCitation] + + +def build_meeting_qa_graph( + embedder: EmbedderProtocol, + segment_repo: SegmentSearchProtocol, + llm: LLMProtocol, + verifier: CitationVerifier, +) -> StateGraph: + + async def retrieve_node(state: MeetingQAState) -> dict: + results = await retrieve_segments( + query=state["question"], + embedder=embedder, + segment_repo=segment_repo, + meeting_id=state["meeting_id"], + top_k=state["top_k"], + ) + return {"retrieved_segments": results} + + async def verify_node(state: MeetingQAState) -> dict: + # Verify segments exist and are relevant + valid = len(state["retrieved_segments"]) > 0 + return {"verification_passed": valid} + + async def synthesize_node(state: MeetingQAState) -> dict: + if not state["verification_passed"]: + return { + "answer": "I couldn't find relevant information in this meeting.", + "citations": [], + } + + result = await synthesize_answer( + question=state["question"], + segments=state["retrieved_segments"], + llm=llm, + ) + + citations = [ + SegmentCitation( + meeting_id=state["meeting_id"], + segment_id=seg.segment_id, + start_time=seg.start_time, + end_time=seg.end_time, + text=seg.text, + score=seg.score, + ) + for seg in state["retrieved_segments"] + if seg.segment_id in result.cited_segment_ids + ] + + return {"answer": result.answer, "citations": citations} + + builder = StateGraph(MeetingQAState) + builder.add_node("retrieve", retrieve_node) + builder.add_node("verify", verify_node) + builder.add_node("synthesize", synthesize_node) + + builder.add_edge(START, "retrieve") + builder.add_edge("retrieve", "verify") + builder.add_edge("verify", "synthesize") + builder.add_edge("synthesize", END) + + return builder.compile() +``` + +--- + +### Task 2: Create Citation Verifier Node + +**File**: `src/noteflow/infrastructure/ai/nodes/verification.py` + +```python +from dataclasses import dataclass + + +@dataclass +class VerificationResult: + is_valid: bool + invalid_citation_indices: list[int] + reason: str | None = None + + +def verify_citations( + answer: str, + cited_ids: list[int], + available_ids: set[int], +) -> VerificationResult: + """Verify all cited segment IDs exist in available segments.""" + invalid = [i for i, cid in enumerate(cited_ids) if cid not in available_ids] + return VerificationResult( + is_valid=len(invalid) == 0, + invalid_citation_indices=invalid, + reason=f"Invalid citations: {invalid}" if invalid else None, + ) +``` + +--- + +### Task 3: Add Proto Messages + +**File**: `src/noteflow/grpc/proto/noteflow.proto` + +```protobuf +// Add to existing proto + +message SegmentCitation { + string meeting_id = 1; + int32 segment_id = 2; + float start_time = 3; + float end_time = 4; + string text = 5; + float score = 6; +} + +message AskAssistantRequest { + string question = 1; + optional string meeting_id = 2; + optional string thread_id = 3; + bool allow_web = 4; + int32 top_k = 5; +} + +message AskAssistantResponse { + string answer = 1; + repeated SegmentCitation citations = 2; + repeated SuggestedAnnotation suggested_annotations = 3; + string thread_id = 4; +} + +message SuggestedAnnotation { + string text = 1; + AnnotationType type = 2; + repeated int32 segment_ids = 3; +} + +// Add to NoteFlowService +rpc AskAssistant(AskAssistantRequest) returns (AskAssistantResponse); +``` + +After modifying proto: +```bash +python -m grpc_tools.protoc -I src/noteflow/grpc/proto \ + --python_out=src/noteflow/grpc/proto \ + --grpc_python_out=src/noteflow/grpc/proto \ + src/noteflow/grpc/proto/noteflow.proto + +python scripts/patch_grpc_stubs.py +``` + +--- + +### Task 4: Add gRPC Mixin + +**File**: `src/noteflow/grpc/_mixins/assistant.py` + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +from noteflow.grpc.proto import noteflow_pb2 as pb +from noteflow.grpc._mixins.protocols import ServicerHost + +if TYPE_CHECKING: + from grpc.aio import ServicerContext + + +class AssistantMixin: + """gRPC mixin for AI assistant operations.""" + + async def AskAssistant( + self: ServicerHost, + request: pb.AskAssistantRequest, + context: ServicerContext, + ) -> pb.AskAssistantResponse: + from uuid import UUID + + meeting_id = UUID(request.meeting_id) if request.meeting_id else None + op_context = await self.get_operation_context(context) + + result = await self.assistant_service.ask( + question=request.question, + user_id=op_context.user.id, + meeting_id=meeting_id, + thread_id=request.thread_id or None, + allow_web=request.allow_web, + top_k=request.top_k or 8, + ) + + return pb.AskAssistantResponse( + answer=result["answer"], + citations=[ + pb.SegmentCitation( + meeting_id=str(c["meeting_id"]), + segment_id=c["segment_id"], + start_time=c["start_time"], + end_time=c["end_time"], + text=c["text"], + score=c["score"], + ) + for c in result["citations"] + ], + thread_id=result["thread_id"], + ) +``` + +--- + +### Task 5: Add Rust Command + +**File**: `client/src-tauri/src/commands/assistant.rs` + +```rust +use crate::grpc::client::GrpcClient; +use crate::grpc::types::assistant::{AskAssistantRequest, AskAssistantResponse}; +use tauri::State; + +#[tauri::command] +pub async fn ask_assistant( + client: State<'_, GrpcClient>, + question: String, + meeting_id: Option, + thread_id: Option, + allow_web: bool, + top_k: i32, +) -> Result { + let request = AskAssistantRequest { + question, + meeting_id, + thread_id, + allow_web, + top_k, + }; + + client + .ask_assistant(request) + .await + .map_err(|e| e.to_string()) +} +``` + +--- + +### Task 6: Add TypeScript Adapter + +**File**: `client/src/api/tauri-adapter.ts` (add method) + +```typescript +async askAssistant(params: AskAssistantParams): Promise { + return invoke('ask_assistant', { + question: params.question, + meetingId: params.meetingId, + threadId: params.threadId, + allowWeb: params.allowWeb ?? false, + topK: params.topK ?? 8, + }); +} +``` + +**File**: `client/src/api/types/assistant.ts` + +```typescript +export interface SegmentCitation { + meetingId: string; + segmentId: number; + startTime: number; + endTime: number; + text: string; + score: number; +} + +export interface AskAssistantParams { + question: string; + meetingId?: string; + threadId?: string; + allowWeb?: boolean; + topK?: number; +} + +export interface AskAssistantResponse { + answer: string; + citations: SegmentCitation[]; + suggestedAnnotations: SuggestedAnnotation[]; + threadId: string; +} + +export interface SuggestedAnnotation { + text: string; + type: AnnotationType; + segmentIds: number[]; +} +``` + +--- + +### Task 7: Create Ask UI Component + +**File**: `client/src/components/meeting/AskPanel.tsx` + +```tsx +import { useState } from 'react'; +import { useAssistant } from '@/hooks/use-assistant'; +import { Button } from '@/components/ui/button'; +import { Textarea } from '@/components/ui/textarea'; +import { Card } from '@/components/ui/card'; + +interface AskPanelProps { + meetingId: string; + onCitationClick?: (segmentId: number) => void; +} + +export function AskPanel({ meetingId, onCitationClick }: AskPanelProps) { + const [question, setQuestion] = useState(''); + const { ask, isLoading, response, error } = useAssistant(); + + const handleAsk = async () => { + if (!question.trim()) return; + await ask({ question, meetingId }); + }; + + return ( +
+