recovery

2026-01-22 16:15:56 +00:00
parent 19e39bed5a
commit ea0e8ee1e4
34 changed files with 4463 additions and 0 deletions
--- a/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-25-foundation/README.md
+++ b/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-25-foundation/README.md
@@ -0,0 +1,590 @@
+# Sprint 25: LangGraph Foundation
+
+> **Size**: L | **Owner**: Backend | **Phase**: 5 - Platform Evolution
+> **Effort**: ~1 sprint | **Prerequisites**: Sprint 19 (Embeddings)
+
+---
+
+## Objective
+
+Establish LangGraph infrastructure and wrap existing summarization as proof of pattern.
+
+---
+
+## Current State Analysis
+
+### What Exists
+
+| Component | Location | Status |
+|-----------|----------|--------|
+| Summarization service | `application/services/summarization/` | ✅ Working |
+| Segment semantic search | `infrastructure/persistence/repositories/segment_repo.py` | ✅ Working |
+| Cloud consent pattern | `application/services/summarization/_consent_manager.py` | ✅ Working |
+| Usage event tracking | `application/observability/ports.py` | ✅ Working |
+| gRPC mixin pattern | `grpc/_mixins/` | ✅ Working |
+
+### What's Missing
+
+| Component | Target Location | Sprint Task |
+|-----------|-----------------|-------------|
+| LangGraph dependencies | `pyproject.toml` | Task 1 |
+| State schemas | `domain/ai/state.py` | Task 2 |
+| Checkpointer factory | `infrastructure/ai/checkpointer.py` | Task 3 |
+| Retrieval tools | `infrastructure/ai/tools/retrieval.py` | Task 4 |
+| Synthesis tools | `infrastructure/ai/tools/synthesis.py` | Task 5 |
+| Summarization graph | `infrastructure/ai/graphs/summarization.py` | Task 6 |
+| AssistantService | `application/services/assistant/` | Task 7 |
+
+---
+
+## Implementation Tasks
+
+### Task 1: Add LangGraph Dependencies
+
+**File**: `pyproject.toml`
+
+Add new optional dependency group:
+
+```toml
+langgraph = [
+    "langgraph>=0.2",
+    "langgraph-checkpoint-postgres>=2.0",
+    "langchain-core>=0.3",
+]
+```
+
+Also add to `optional` and `all` groups.
+
+**Verification**: `uv pip install -e ".[langgraph]"` succeeds
+
+---
+
+### Task 2: Create State Schemas
+
+**Files to create**:
+- `src/noteflow/domain/ai/__init__.py`
+- `src/noteflow/domain/ai/state.py`
+- `src/noteflow/domain/ai/citations.py`
+- `src/noteflow/domain/ai/ports.py`
+
+#### `state.py` - Input/Output/Internal Separation
+
+```python
+from __future__ import annotations
+
+from typing import Annotated, TypedDict
+from uuid import UUID
+
+import operator
+
+
+class AssistantInputState(TypedDict):
+    """Public API input - what clients send."""
+    question: str
+    meeting_id: UUID | None
+    thread_id: str | None
+    allow_web: bool
+    top_k: int
+
+
+class AssistantOutputState(TypedDict):
+    """Public API output - what clients receive."""
+    answer: str
+    citations: list[dict]
+    suggested_annotations: list[dict]
+    thread_id: str
+
+
+class AssistantInternalState(AssistantInputState):
+    """Internal graph state - can evolve without breaking API."""
+    # Retrieval
+    retrieved_segment_ids: Annotated[list[int], operator.add]
+    retrieved_segments: list[dict]
+    
+    # Synthesis
+    draft_answer: str
+    verification_passed: bool
+    
+    # Tracking
+    loop_count: int
+```
+
+#### `citations.py` - Value Object
+
+```python
+from dataclasses import dataclass
+from uuid import UUID
+
+
+@dataclass(frozen=True)
+class SegmentCitation:
+    """Reference to transcript segment used as evidence."""
+    meeting_id: UUID
+    segment_id: int
+    start_time: float
+    end_time: float
+    text: str
+    score: float = 0.0
+```
+
+#### `ports.py` - Protocol Definition
+
+```python
+from typing import Protocol
+from uuid import UUID
+
+from noteflow.domain.ai.state import AssistantOutputState
+
+
+class AssistantPort(Protocol):
+    """Protocol for AI assistant operations."""
+
+    async def ask(
+        self,
+        question: str,
+        meeting_id: UUID | None = None,
+        thread_id: str | None = None,
+        allow_web: bool = False,
+        top_k: int = 8,
+    ) -> AssistantOutputState:
+        ...
+```
+
+**Verification**: `basedpyright src/noteflow/domain/ai/` passes
+
+---
+
+### Task 3: Create Checkpointer Factory
+
+**File**: `src/noteflow/infrastructure/ai/checkpointer.py`
+
+```python
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Final
+
+if TYPE_CHECKING:
+    from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
+    from psycopg_pool import AsyncConnectionPool
+
+CHECKPOINTER_POOL_SIZE: Final[int] = 5
+
+
+async def create_checkpointer(
+    database_url: str,
+    pool_size: int = CHECKPOINTER_POOL_SIZE,
+) -> AsyncPostgresSaver:
+    """Create async Postgres checkpointer for LangGraph state persistence."""
+    from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
+    from psycopg_pool import AsyncConnectionPool
+
+    pool = AsyncConnectionPool(
+        conninfo=database_url,
+        max_size=pool_size,
+        kwargs={"autocommit": True},
+    )
+    checkpointer = AsyncPostgresSaver(pool)
+    await checkpointer.setup()
+    return checkpointer
+```
+
+**Verification**: Unit test with mock pool
+
+---
+
+### Task 4: Create Retrieval Tools
+
+**File**: `src/noteflow/infrastructure/ai/tools/retrieval.py`
+
+```python
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Protocol
+from uuid import UUID
+
+if TYPE_CHECKING:
+    from noteflow.domain.entities import Segment
+
+
+class EmbedderProtocol(Protocol):
+    """Protocol for text embedding."""
+    async def embed(self, text: str) -> list[float]: ...
+
+
+class SegmentSearchProtocol(Protocol):
+    """Protocol for semantic segment search."""
+    async def search_semantic(
+        self,
+        query_embedding: list[float],
+        meeting_id: UUID | None,
+        limit: int,
+    ) -> list[tuple[Segment, float]]: ...
+
+
+@dataclass
+class RetrievalResult:
+    """Result from segment retrieval."""
+    segment_id: int
+    meeting_id: UUID
+    text: str
+    start_time: float
+    end_time: float
+    score: float
+
+
+async def retrieve_segments(
+    query: str,
+    embedder: EmbedderProtocol,
+    segment_repo: SegmentSearchProtocol,
+    meeting_id: UUID | None = None,
+    top_k: int = 8,
+) -> list[RetrievalResult]:
+    """Retrieve relevant transcript segments via semantic search."""
+    query_embedding = await embedder.embed(query)
+    results = await segment_repo.search_semantic(
+        query_embedding=query_embedding,
+        meeting_id=meeting_id,
+        limit=top_k,
+    )
+    return [
+        RetrievalResult(
+            segment_id=segment.segment_id,
+            meeting_id=segment.meeting_id,
+            text=segment.text,
+            start_time=segment.start_time,
+            end_time=segment.end_time,
+            score=score,
+        )
+        for segment, score in results
+    ]
+```
+
+**Verification**: Unit test with mock embedder and repo
+
+---
+
+### Task 5: Create Synthesis Tools
+
+**File**: `src/noteflow/infrastructure/ai/tools/synthesis.py`
+
+```python
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Protocol
+
+if TYPE_CHECKING:
+    from noteflow.infrastructure.ai.tools.retrieval import RetrievalResult
+
+
+class LLMProtocol(Protocol):
+    """Protocol for LLM completion."""
+    async def complete(self, prompt: str) -> str: ...
+
+
+@dataclass
+class SynthesisResult:
+    """Result from answer synthesis."""
+    answer: str
+    cited_segment_ids: list[int]
+
+
+SYNTHESIS_PROMPT_TEMPLATE = '''Answer the question based on the following transcript segments.
+Cite specific segments by their ID when making claims.
+
+Question: {question}
+
+Segments:
+{segments}
+
+Answer (cite segment IDs in brackets like [1], [3]):'''
+
+
+async def synthesize_answer(
+    question: str,
+    segments: list[RetrievalResult],
+    llm: LLMProtocol,
+) -> SynthesisResult:
+    """Generate answer with segment citations."""
+    segment_text = "\n".join(
+        f"[{s.segment_id}] ({s.start_time:.1f}s-{s.end_time:.1f}s): {s.text}"
+        for s in segments
+    )
+    prompt = SYNTHESIS_PROMPT_TEMPLATE.format(
+        question=question,
+        segments=segment_text,
+    )
+    answer = await llm.complete(prompt)
+    cited_ids = _extract_cited_ids(answer, [s.segment_id for s in segments])
+    return SynthesisResult(answer=answer, cited_segment_ids=cited_ids)
+
+
+def _extract_cited_ids(answer: str, valid_ids: list[int]) -> list[int]:
+    """Extract segment IDs cited in the answer."""
+    import re
+    pattern = r'\[(\d+)\]'
+    matches = re.findall(pattern, answer)
+    cited = [int(m) for m in matches if int(m) in valid_ids]
+    return list(dict.fromkeys(cited))  # Dedupe preserving order
+```
+
+**Verification**: Unit test with mock LLM
+
+---
+
+### Task 6: Create Summarization Graph Wrapper
+
+**File**: `src/noteflow/infrastructure/ai/graphs/summarization.py`
+
+This wraps the existing SummarizationService in a LangGraph graph as proof of pattern.
+
+```python
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, TypedDict
+
+from langgraph.graph import END, START, StateGraph
+
+if TYPE_CHECKING:
+    from uuid import UUID
+    from collections.abc import Sequence
+    from noteflow.domain.entities import Segment
+    from noteflow.application.services.summarization import SummarizationService
+
+
+class SummarizationState(TypedDict):
+    """State for summarization graph."""
+    meeting_id: UUID
+    segments: Sequence[Segment]
+    summary_text: str
+    key_points: list[dict]
+    action_items: list[dict]
+
+
+def build_summarization_graph(
+    summarization_service: SummarizationService,
+) -> StateGraph:
+    """Build LangGraph wrapper around existing summarization service."""
+
+    async def summarize_node(state: SummarizationState) -> dict:
+        result = await summarization_service.summarize(
+            meeting_id=state["meeting_id"],
+            segments=state["segments"],
+        )
+        summary = result.summary
+        return {
+            "summary_text": summary.executive_summary,
+            "key_points": [
+                {"text": kp.text, "segment_ids": kp.segment_ids}
+                for kp in summary.key_points
+            ],
+            "action_items": [
+                {"text": ai.text, "segment_ids": ai.segment_ids, "assignee": ai.assignee}
+                for ai in summary.action_items
+            ],
+        }
+
+    builder = StateGraph(SummarizationState)
+    builder.add_node("summarize", summarize_node)
+    builder.add_edge(START, "summarize")
+    builder.add_edge("summarize", END)
+
+    return builder.compile()
+```
+
+**Verification**: Integration test with mock summarization service
+
+---
+
+### Task 7: Create AssistantService Shell
+
+**Files**:
+- `src/noteflow/application/services/assistant/__init__.py`
+- `src/noteflow/application/services/assistant/assistant_service.py`
+
+#### `__init__.py`
+
+```python
+from noteflow.application.services.assistant.assistant_service import AssistantService
+
+__all__ = ["AssistantService"]
+```
+
+#### `assistant_service.py`
+
+```python
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Final
+from uuid import UUID
+
+from noteflow.application.observability.ports import NullUsageEventSink, UsageEventSink
+from noteflow.domain.ai.state import AssistantOutputState
+from noteflow.infrastructure.logging import get_logger
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from noteflow.domain.ports.unit_of_work import UnitOfWork
+
+logger = get_logger(__name__)
+
+DEFAULT_TOP_K: Final[int] = 8
+THREAD_ID_PREFIX: Final[str] = "meeting"
+
+
+def build_thread_id(meeting_id: UUID | None, user_id: UUID, graph_name: str) -> str:
+    """Build deterministic thread_id for checkpointing."""
+    meeting_part = str(meeting_id) if meeting_id else "workspace"
+    return f"{THREAD_ID_PREFIX}:{meeting_part}:user:{user_id}:graph:{graph_name}:v1"
+
+
+@dataclass
+class AssistantService:
+    """Orchestrates AI assistant workflows via LangGraph."""
+
+    uow_factory: Callable[[], UnitOfWork]
+    usage_events: UsageEventSink = field(default_factory=NullUsageEventSink)
+
+    async def ask(
+        self,
+        question: str,
+        user_id: UUID,
+        meeting_id: UUID | None = None,
+        thread_id: str | None = None,
+        allow_web: bool = False,
+        top_k: int = DEFAULT_TOP_K,
+    ) -> AssistantOutputState:
+        """Ask a question about meeting transcript(s)."""
+        effective_thread_id = thread_id or build_thread_id(
+            meeting_id, user_id, "meeting_qa"
+        )
+        logger.info(
+            "assistant_ask",
+            question_length=len(question),
+            meeting_id=str(meeting_id) if meeting_id else None,
+            thread_id=effective_thread_id,
+        )
+        # TODO: Implement in Sprint 26
+        return AssistantOutputState(
+            answer="Not implemented yet",
+            citations=[],
+            suggested_annotations=[],
+            thread_id=effective_thread_id,
+        )
+```
+
+**Verification**: Unit test for thread_id generation
+
+---
+
+## Test Plan
+
+### Unit Tests (`tests/domain/ai/`)
+
+```python
+# test_citations.py
+def test_segment_citation_creation():
+    citation = SegmentCitation(
+        meeting_id=uuid4(),
+        segment_id=1,
+        start_time=0.0,
+        end_time=5.0,
+        text="Test segment",
+        score=0.95,
+    )
+    assert citation.duration == 5.0
+
+
+def test_segment_citation_invalid_times():
+    with pytest.raises(ValueError):
+        SegmentCitation(
+            meeting_id=uuid4(),
+            segment_id=1,
+            start_time=10.0,
+            end_time=5.0,  # Invalid: end < start
+            text="Test",
+        )
+```
+
+### Unit Tests (`tests/infrastructure/ai/`)
+
+```python
+# test_retrieval.py
+async def test_retrieve_segments_success(mock_embedder, mock_segment_repo):
+    mock_embedder.embed.return_value = [0.1, 0.2, 0.3]
+    mock_segment_repo.search_semantic.return_value = [
+        (sample_segment, 0.95),
+    ]
+    
+    results = await retrieve_segments(
+        query="test query",
+        embedder=mock_embedder,
+        segment_repo=mock_segment_repo,
+        top_k=5,
+    )
+    
+    assert len(results) == 1
+    assert results[0].score == 0.95
+
+
+# test_synthesis.py
+async def test_synthesize_answer_extracts_citations(mock_llm):
+    mock_llm.complete.return_value = "The answer is X [1] and Y [3]."
+    
+    result = await synthesize_answer(
+        question="What happened?",
+        segments=[...],
+        llm=mock_llm,
+    )
+    
+    assert result.cited_segment_ids == [1, 3]
+```
+
+---
+
+## Acceptance Criteria
+
+- [ ] `uv pip install -e ".[langgraph]"` succeeds
+- [ ] `basedpyright src/noteflow/domain/ai/` passes with 0 errors
+- [ ] `basedpyright src/noteflow/infrastructure/ai/` passes with 0 errors
+- [ ] `pytest tests/domain/ai/` passes
+- [ ] `pytest tests/infrastructure/ai/` passes
+- [ ] Existing summarization behavior unchanged (`pytest tests/application/services/test_summarization*`)
+- [ ] `make quality` passes
+
+---
+
+## Rollback Plan
+
+If issues arise:
+1. Remove langgraph from dependencies
+2. Delete `domain/ai/` and `infrastructure/ai/` directories
+3. AssistantService is not wired to gRPC yet, so no API impact
+
+---
+
+## Files Created/Modified
+
+| Action | Path |
+|--------|------|
+| Modified | `pyproject.toml` |
+| Created | `src/noteflow/domain/ai/__init__.py` |
+| Created | `src/noteflow/domain/ai/state.py` |
+| Created | `src/noteflow/domain/ai/citations.py` |
+| Created | `src/noteflow/domain/ai/ports.py` |
+| Created | `src/noteflow/infrastructure/ai/__init__.py` |
+| Created | `src/noteflow/infrastructure/ai/checkpointer.py` |
+| Created | `src/noteflow/infrastructure/ai/tools/__init__.py` |
+| Created | `src/noteflow/infrastructure/ai/tools/retrieval.py` |
+| Created | `src/noteflow/infrastructure/ai/tools/synthesis.py` |
+| Created | `src/noteflow/infrastructure/ai/graphs/__init__.py` |
+| Created | `src/noteflow/infrastructure/ai/graphs/summarization.py` |
+| Created | `src/noteflow/application/services/assistant/__init__.py` |
+| Created | `src/noteflow/application/services/assistant/assistant_service.py` |
+| Created | `tests/domain/ai/test_citations.py` |
+| Created | `tests/domain/ai/test_state.py` |
+| Created | `tests/infrastructure/ai/test_retrieval.py` |
+| Created | `tests/infrastructure/ai/test_synthesis.py` |
+| Created | `tests/infrastructure/ai/test_checkpointer.py` |
--- a/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-26-meeting-qa/README.md
+++ b/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-26-meeting-qa/README.md
@@ -0,0 +1,530 @@
+# Sprint 26: Meeting Q&A MVP
+
+> **Size**: L | **Owner**: Backend + Client | **Phase**: 5 - Platform Evolution
+> **Effort**: ~1 sprint | **Prerequisites**: Sprint 25 (Foundation)
+
+---
+
+## Objective
+
+Implement single-meeting Q&A with segment citations via gRPC API and React UI.
+
+---
+
+## Current State (After Sprint 25)
+
+| Component | Status |
+|-----------|--------|
+| LangGraph infrastructure | ✅ Ready |
+| State schemas | ✅ Ready |
+| Retrieval tools | ✅ Ready |
+| Synthesis tools | ✅ Ready |
+| AssistantService shell | ✅ Ready |
+
+---
+
+## Implementation Tasks
+
+### Task 1: Define MeetingQA Graph
+
+**File**: `src/noteflow/infrastructure/ai/graphs/meeting_qa.py`
+
+Graph flow: `retrieve → verify → synthesize`
+
+```python
+from langgraph.graph import StateGraph, START, END
+
+class MeetingQAState(TypedDict):
+    # Input
+    question: str
+    meeting_id: UUID
+    top_k: int
+    
+    # Internal
+    retrieved_segments: list[RetrievalResult]
+    verification_passed: bool
+    
+    # Output
+    answer: str
+    citations: list[SegmentCitation]
+
+
+def build_meeting_qa_graph(
+    embedder: EmbedderProtocol,
+    segment_repo: SegmentSearchProtocol,
+    llm: LLMProtocol,
+    verifier: CitationVerifier,
+) -> StateGraph:
+    
+    async def retrieve_node(state: MeetingQAState) -> dict:
+        results = await retrieve_segments(
+            query=state["question"],
+            embedder=embedder,
+            segment_repo=segment_repo,
+            meeting_id=state["meeting_id"],
+            top_k=state["top_k"],
+        )
+        return {"retrieved_segments": results}
+    
+    async def verify_node(state: MeetingQAState) -> dict:
+        # Verify segments exist and are relevant
+        valid = len(state["retrieved_segments"]) > 0
+        return {"verification_passed": valid}
+    
+    async def synthesize_node(state: MeetingQAState) -> dict:
+        if not state["verification_passed"]:
+            return {
+                "answer": "I couldn't find relevant information in this meeting.",
+                "citations": [],
+            }
+        
+        result = await synthesize_answer(
+            question=state["question"],
+            segments=state["retrieved_segments"],
+            llm=llm,
+        )
+        
+        citations = [
+            SegmentCitation(
+                meeting_id=state["meeting_id"],
+                segment_id=seg.segment_id,
+                start_time=seg.start_time,
+                end_time=seg.end_time,
+                text=seg.text,
+                score=seg.score,
+            )
+            for seg in state["retrieved_segments"]
+            if seg.segment_id in result.cited_segment_ids
+        ]
+        
+        return {"answer": result.answer, "citations": citations}
+    
+    builder = StateGraph(MeetingQAState)
+    builder.add_node("retrieve", retrieve_node)
+    builder.add_node("verify", verify_node)
+    builder.add_node("synthesize", synthesize_node)
+    
+    builder.add_edge(START, "retrieve")
+    builder.add_edge("retrieve", "verify")
+    builder.add_edge("verify", "synthesize")
+    builder.add_edge("synthesize", END)
+    
+    return builder.compile()
+```
+
+---
+
+### Task 2: Create Citation Verifier Node
+
+**File**: `src/noteflow/infrastructure/ai/nodes/verification.py`
+
+```python
+from dataclasses import dataclass
+
+
+@dataclass
+class VerificationResult:
+    is_valid: bool
+    invalid_citation_indices: list[int]
+    reason: str | None = None
+
+
+def verify_citations(
+    answer: str,
+    cited_ids: list[int],
+    available_ids: set[int],
+) -> VerificationResult:
+    """Verify all cited segment IDs exist in available segments."""
+    invalid = [i for i, cid in enumerate(cited_ids) if cid not in available_ids]
+    return VerificationResult(
+        is_valid=len(invalid) == 0,
+        invalid_citation_indices=invalid,
+        reason=f"Invalid citations: {invalid}" if invalid else None,
+    )
+```
+
+---
+
+### Task 3: Add Proto Messages
+
+**File**: `src/noteflow/grpc/proto/noteflow.proto`
+
+```protobuf
+// Add to existing proto
+
+message SegmentCitation {
+  string meeting_id = 1;
+  int32 segment_id = 2;
+  float start_time = 3;
+  float end_time = 4;
+  string text = 5;
+  float score = 6;
+}
+
+message AskAssistantRequest {
+  string question = 1;
+  optional string meeting_id = 2;
+  optional string thread_id = 3;
+  bool allow_web = 4;
+  int32 top_k = 5;
+}
+
+message AskAssistantResponse {
+  string answer = 1;
+  repeated SegmentCitation citations = 2;
+  repeated SuggestedAnnotation suggested_annotations = 3;
+  string thread_id = 4;
+}
+
+message SuggestedAnnotation {
+  string text = 1;
+  AnnotationType type = 2;
+  repeated int32 segment_ids = 3;
+}
+
+// Add to NoteFlowService
+rpc AskAssistant(AskAssistantRequest) returns (AskAssistantResponse);
+```
+
+After modifying proto:
+```bash
+python -m grpc_tools.protoc -I src/noteflow/grpc/proto \
+  --python_out=src/noteflow/grpc/proto \
+  --grpc_python_out=src/noteflow/grpc/proto \
+  src/noteflow/grpc/proto/noteflow.proto
+
+python scripts/patch_grpc_stubs.py
+```
+
+---
+
+### Task 4: Add gRPC Mixin
+
+**File**: `src/noteflow/grpc/_mixins/assistant.py`
+
+```python
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from noteflow.grpc.proto import noteflow_pb2 as pb
+from noteflow.grpc._mixins.protocols import ServicerHost
+
+if TYPE_CHECKING:
+    from grpc.aio import ServicerContext
+
+
+class AssistantMixin:
+    """gRPC mixin for AI assistant operations."""
+
+    async def AskAssistant(
+        self: ServicerHost,
+        request: pb.AskAssistantRequest,
+        context: ServicerContext,
+    ) -> pb.AskAssistantResponse:
+        from uuid import UUID
+        
+        meeting_id = UUID(request.meeting_id) if request.meeting_id else None
+        op_context = await self.get_operation_context(context)
+        
+        result = await self.assistant_service.ask(
+            question=request.question,
+            user_id=op_context.user.id,
+            meeting_id=meeting_id,
+            thread_id=request.thread_id or None,
+            allow_web=request.allow_web,
+            top_k=request.top_k or 8,
+        )
+        
+        return pb.AskAssistantResponse(
+            answer=result["answer"],
+            citations=[
+                pb.SegmentCitation(
+                    meeting_id=str(c["meeting_id"]),
+                    segment_id=c["segment_id"],
+                    start_time=c["start_time"],
+                    end_time=c["end_time"],
+                    text=c["text"],
+                    score=c["score"],
+                )
+                for c in result["citations"]
+            ],
+            thread_id=result["thread_id"],
+        )
+```
+
+---
+
+### Task 5: Add Rust Command
+
+**File**: `client/src-tauri/src/commands/assistant.rs`
+
+```rust
+use crate::grpc::client::GrpcClient;
+use crate::grpc::types::assistant::{AskAssistantRequest, AskAssistantResponse};
+use tauri::State;
+
+#[tauri::command]
+pub async fn ask_assistant(
+    client: State<'_, GrpcClient>,
+    question: String,
+    meeting_id: Option<String>,
+    thread_id: Option<String>,
+    allow_web: bool,
+    top_k: i32,
+) -> Result<AskAssistantResponse, String> {
+    let request = AskAssistantRequest {
+        question,
+        meeting_id,
+        thread_id,
+        allow_web,
+        top_k,
+    };
+    
+    client
+        .ask_assistant(request)
+        .await
+        .map_err(|e| e.to_string())
+}
+```
+
+---
+
+### Task 6: Add TypeScript Adapter
+
+**File**: `client/src/api/tauri-adapter.ts` (add method)
+
+```typescript
+async askAssistant(params: AskAssistantParams): Promise<AskAssistantResponse> {
+  return invoke('ask_assistant', {
+    question: params.question,
+    meetingId: params.meetingId,
+    threadId: params.threadId,
+    allowWeb: params.allowWeb ?? false,
+    topK: params.topK ?? 8,
+  });
+}
+```
+
+**File**: `client/src/api/types/assistant.ts`
+
+```typescript
+export interface SegmentCitation {
+  meetingId: string;
+  segmentId: number;
+  startTime: number;
+  endTime: number;
+  text: string;
+  score: number;
+}
+
+export interface AskAssistantParams {
+  question: string;
+  meetingId?: string;
+  threadId?: string;
+  allowWeb?: boolean;
+  topK?: number;
+}
+
+export interface AskAssistantResponse {
+  answer: string;
+  citations: SegmentCitation[];
+  suggestedAnnotations: SuggestedAnnotation[];
+  threadId: string;
+}
+
+export interface SuggestedAnnotation {
+  text: string;
+  type: AnnotationType;
+  segmentIds: number[];
+}
+```
+
+---
+
+### Task 7: Create Ask UI Component
+
+**File**: `client/src/components/meeting/AskPanel.tsx`
+
+```tsx
+import { useState } from 'react';
+import { useAssistant } from '@/hooks/use-assistant';
+import { Button } from '@/components/ui/button';
+import { Textarea } from '@/components/ui/textarea';
+import { Card } from '@/components/ui/card';
+
+interface AskPanelProps {
+  meetingId: string;
+  onCitationClick?: (segmentId: number) => void;
+}
+
+export function AskPanel({ meetingId, onCitationClick }: AskPanelProps) {
+  const [question, setQuestion] = useState('');
+  const { ask, isLoading, response, error } = useAssistant();
+
+  const handleAsk = async () => {
+    if (!question.trim()) return;
+    await ask({ question, meetingId });
+  };
+
+  return (
+    <div className="flex flex-col gap-4 p-4">
+      <Textarea
+        placeholder="Ask a question about this meeting..."
+        value={question}
+        onChange={(e) => setQuestion(e.target.value)}
+        disabled={isLoading}
+      />
+      <Button onClick={handleAsk} disabled={isLoading || !question.trim()}>
+        {isLoading ? 'Thinking...' : 'Ask'}
+      </Button>
+      
+      {response && (
+        <Card className="p-4">
+          <p className="whitespace-pre-wrap">{response.answer}</p>
+          {response.citations.length > 0 && (
+            <div className="mt-4 border-t pt-2">
+              <p className="text-sm text-muted-foreground mb-2">Sources:</p>
+              {response.citations.map((citation) => (
+                <button
+                  key={citation.segmentId}
+                  onClick={() => onCitationClick?.(citation.segmentId)}
+                  className="text-sm text-blue-600 hover:underline block"
+                >
+                  [{citation.startTime.toFixed(1)}s] {citation.text.slice(0, 50)}...
+                </button>
+              ))}
+            </div>
+          )}
+        </Card>
+      )}
+      
+      {error && (
+        <p className="text-sm text-red-600">{error}</p>
+      )}
+    </div>
+  );
+}
+```
+
+**File**: `client/src/hooks/use-assistant.ts`
+
+```typescript
+import { useState, useCallback } from 'react';
+import { api } from '@/api';
+import type { AskAssistantParams, AskAssistantResponse } from '@/api/types/assistant';
+
+export function useAssistant() {
+  const [isLoading, setIsLoading] = useState(false);
+  const [response, setResponse] = useState<AskAssistantResponse | null>(null);
+  const [error, setError] = useState<string | null>(null);
+
+  const ask = useCallback(async (params: AskAssistantParams) => {
+    setIsLoading(true);
+    setError(null);
+    try {
+      const result = await api.askAssistant(params);
+      setResponse(result);
+      return result;
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Failed to get answer');
+      throw err;
+    } finally {
+      setIsLoading(false);
+    }
+  }, []);
+
+  const reset = useCallback(() => {
+    setResponse(null);
+    setError(null);
+  }, []);
+
+  return { ask, isLoading, response, error, reset };
+}
+```
+
+---
+
+### Task 8: Implement AssistantService.ask()
+
+Complete the implementation in `application/services/assistant/assistant_service.py`:
+
+```python
+async def ask(
+    self,
+    question: str,
+    user_id: UUID,
+    meeting_id: UUID | None = None,
+    thread_id: str | None = None,
+    allow_web: bool = False,
+    top_k: int = DEFAULT_TOP_K,
+) -> AssistantOutputState:
+    """Ask a question about meeting transcript(s)."""
+    effective_thread_id = thread_id or build_thread_id(
+        meeting_id, user_id, "meeting_qa"
+    )
+    
+    async with self.uow_factory() as uow:
+        # Build and run graph
+        graph = build_meeting_qa_graph(
+            embedder=self._embedder,
+            segment_repo=uow.segments,
+            llm=self._llm,
+            verifier=self._verifier,
+        )
+        
+        config = {"configurable": {"thread_id": effective_thread_id}}
+        result = await graph.ainvoke(
+            {
+                "question": question,
+                "meeting_id": meeting_id,
+                "top_k": top_k,
+            },
+            config,
+        )
+        
+        # Record usage
+        self.usage_events.record_simple(
+            "assistant.ask",
+            meeting_id=str(meeting_id) if meeting_id else None,
+            question_length=len(question),
+            citation_count=len(result.get("citations", [])),
+        )
+        
+        return AssistantOutputState(
+            answer=result["answer"],
+            citations=[asdict(c) for c in result["citations"]],
+            suggested_annotations=[],
+            thread_id=effective_thread_id,
+        )
+```
+
+---
+
+## Acceptance Criteria
+
+- [ ] Q&A returns answers with valid segment citations
+- [ ] Citations link to correct timestamps in transcript
+- [ ] Feature hidden when `rag_enabled=false` in project rules
+- [ ] Thread ID persists conversation context
+- [ ] `make quality` passes
+- [ ] `pytest tests/grpc/test_assistant.py` passes
+- [ ] UI component displays answer and clickable citations
+
+---
+
+## Files Created/Modified
+
+| Action | Path |
+|--------|------|
+| Created | `src/noteflow/infrastructure/ai/graphs/meeting_qa.py` |
+| Created | `src/noteflow/infrastructure/ai/nodes/verification.py` |
+| Modified | `src/noteflow/grpc/proto/noteflow.proto` |
+| Created | `src/noteflow/grpc/_mixins/assistant.py` |
+| Modified | `src/noteflow/grpc/service.py` (add mixin) |
+| Created | `client/src-tauri/src/commands/assistant.rs` |
+| Modified | `client/src/api/tauri-adapter.ts` |
+| Created | `client/src/api/types/assistant.ts` |
+| Created | `client/src/components/meeting/AskPanel.tsx` |
+| Created | `client/src/hooks/use-assistant.ts` |
+| Modified | `src/noteflow/application/services/assistant/assistant_service.py` |
--- a/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-27-cross-meeting/README.md
+++ b/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-27-cross-meeting/README.md
@@ -0,0 +1,87 @@
+# Sprint 27: Cross-Meeting RAG
+
+> **Size**: M | **Owner**: Backend + Client | **Phase**: 5 - Platform Evolution
+> **Effort**: ~1 sprint | **Prerequisites**: Sprint 26 (Meeting Q&A MVP)
+
+---
+
+## Objective
+
+Enable workspace-scoped Q&A and annotation suggestions across multiple meetings.
+
+---
+
+## Key Tasks
+
+### Task 1: Workspace-Scoped Semantic Search
+
+Extend `SegmentRepository` with workspace-scoped search:
+
+```python
+async def search_semantic_workspace(
+    self,
+    query_embedding: list[float],
+    workspace_id: UUID,
+    project_id: UUID | None = None,
+    limit: int = 20,
+) -> list[tuple[Segment, float]]:
+    """Search segments across all meetings in workspace/project."""
+```
+
+### Task 2: WorkspaceQA Graph
+
+Create `infrastructure/ai/graphs/workspace_qa.py`:
+
+- Similar to MeetingQA but omits `meeting_id` filter
+- Groups results by meeting for citation display
+- Returns cross-meeting citations
+
+### Task 3: Annotation Suggester
+
+Add annotation suggestion output to graph:
+
+```python
+class SuggestedAnnotation:
+    text: str
+    annotation_type: AnnotationType
+    segment_ids: list[int]
+    confidence: float
+```
+
+### Task 4: Conversation History
+
+Implement thread persistence with checkpointer:
+
+- Store conversation turns in graph state
+- Support follow-up questions
+- Maintain context across requests
+
+### Task 5: Apply Annotation Flow
+
+UI flow to apply suggested annotations:
+
+1. Display suggested annotations in AskPanel
+2. User clicks "Apply" on suggestion
+3. Call existing `AddAnnotation` RPC
+4. Update UI to show applied status
+
+---
+
+## Acceptance Criteria
+
+- [ ] Cross-meeting queries return results from multiple meetings
+- [ ] Suggested annotations can be applied with one click
+- [ ] Conversation history persists across requests
+- [ ] Follow-up questions reference previous context
+- [ ] `make quality` passes
+
+---
+
+## Files Created/Modified
+
+| Action | Path |
+|--------|------|
+| Modified | `src/noteflow/infrastructure/persistence/repositories/segment_repo.py` |
+| Created | `src/noteflow/infrastructure/ai/graphs/workspace_qa.py` |
+| Modified | `src/noteflow/application/services/assistant/assistant_service.py` |
+| Modified | `client/src/components/meeting/AskPanel.tsx` |
--- a/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-28-advanced/README.md
+++ b/docs/sprints/phase-5-evolution/sprint-25-langgraph/sprint-28-advanced/README.md
@@ -0,0 +1,146 @@
+# Sprint 28: Advanced Capabilities
+
+> **Size**: L | **Owner**: Backend + Client | **Phase**: 5 - Platform Evolution
+> **Effort**: ~1 sprint | **Prerequisites**: Sprint 27 (Cross-Meeting RAG)
+
+---
+
+## Objective
+
+Production hardening with streaming responses, caching, guardrails, and optional web search.
+
+---
+
+## Key Tasks
+
+### Task 1: Streaming Responses
+
+Implement `StreamAssistant` RPC for progressive answer generation:
+
+```protobuf
+rpc StreamAssistant(AskAssistantRequest) returns (stream AskAssistantResponse);
+```
+
+Use LangGraph's `astream` with `stream_mode="messages"`:
+
+```python
+async for chunk in graph.astream(state, config, stream_mode="messages"):
+    yield pb.AskAssistantResponse(answer=chunk.content, partial=True)
+```
+
+### Task 2: Embedding Cache
+
+Create `infrastructure/ai/cache.py`:
+
+- LRU cache for query embeddings
+- Reduce redundant embedding calls
+- Configurable TTL and max size
+
+```python
+@dataclass
+class EmbeddingCache:
+    max_size: int = 1000
+    ttl_seconds: int = 3600
+
+    async def get_or_compute(
+        self,
+        text: str,
+        embedder: EmbedderProtocol,
+    ) -> list[float]:
+        ...
+```
+
+### Task 3: Content Guardrails
+
+Create `infrastructure/ai/guardrails.py`:
+
+- Input validation (length, content)
+- Output filtering (PII, harmful content)
+- Configurable rules per workspace
+
+```python
+class GuardrailResult:
+    allowed: bool
+    reason: str | None
+    filtered_content: str | None
+
+
+async def check_input(text: str, rules: GuardrailRules) -> GuardrailResult:
+    ...
+
+async def filter_output(text: str, rules: GuardrailRules) -> GuardrailResult:
+    ...
+```
+
+### Task 4: Web Search Node
+
+Create `infrastructure/ai/nodes/web_search.py`:
+
+- Optional node triggered by `allow_web=true`
+- Integrate with web search API
+- Merge web results with transcript evidence
+
+### Task 5: AGENT_PROGRESS Tauri Event
+
+Emit progress events for UI feedback:
+
+```rust
+// Emit during graph execution
+app.emit_all("AGENT_PROGRESS", AgentProgressPayload {
+    stage: "retrieving",
+    progress: 0.3,
+    message: "Searching transcript...",
+})?;
+```
+
+### Task 6: Interrupts for Approval
+
+Implement LangGraph interrupts for:
+
+- Web search approval
+- Annotation creation approval
+- Sensitive action confirmation
+
+### Task 7: Performance Optimization
+
+- Batch embedding requests
+- Parallel segment retrieval
+- Connection pool tuning
+
+---
+
+## Acceptance Criteria
+
+- [ ] Streaming shows progressive answer generation in UI
+- [ ] Cache reduces embedding latency by >50% for repeated queries
+- [ ] Guardrails block inappropriate content
+- [ ] Web search gated by `allow_web` flag
+- [ ] Progress events update UI during long operations
+- [ ] `make quality` passes
+- [ ] E2E tests pass (`client/e2e/assistant.spec.ts`)
+
+---
+
+## Success Metrics
+
+| Metric             | Target |
+| ------------------ | ------ |
+| Q&A latency (p95)  | < 3s   |
+| Citation accuracy  | > 90%  |
+| Cache hit rate     | > 60%  |
+| Hallucination rate | < 5%   |
+
+---
+
+## Files Created/Modified
+
+| Action   | Path                                                 |
+| -------- | ---------------------------------------------------- |
+| Modified | `src/noteflow/grpc/proto/noteflow.proto`             |
+| Created  | `src/noteflow/grpc/_mixins/assistant_streaming.py`   |
+| Created  | `src/noteflow/infrastructure/ai/cache.py`            |
+| Created  | `src/noteflow/infrastructure/ai/guardrails.py`       |
+| Created  | `src/noteflow/infrastructure/ai/nodes/web_search.py` |
+| Modified | `client/src-tauri/src/commands/assistant.rs`         |
+| Modified | `client/src/components/meeting/AskPanel.tsx`         |
+| Created  | `client/e2e/assistant.spec.ts`                       |
--- a/src/noteflow/domain/ai/init.py
+++ b/src/noteflow/domain/ai/init.py
@@ -0,0 +1,38 @@
+"""AI domain types for LangGraph workflows.
+
+State schemas, citations, interrupts, and protocols for AI assistant functionality.
+"""
+
+from noteflow.domain.ai.citations import SegmentCitation
+from noteflow.domain.ai.interrupts import (
+    InterruptAction,
+    InterruptConfig,
+    InterruptRequest,
+    InterruptResponse,
+    InterruptType,
+    create_annotation_interrupt,
+    create_sensitive_action_interrupt,
+    create_web_search_interrupt,
+)
+from noteflow.domain.ai.ports import AssistantPort
+from noteflow.domain.ai.state import (
+    AssistantInputState,
+    AssistantInternalState,
+    AssistantOutputState,
+)
+
+__all__ = [
+    "AssistantInputState",
+    "AssistantInternalState",
+    "AssistantOutputState",
+    "AssistantPort",
+    "InterruptAction",
+    "InterruptConfig",
+    "InterruptRequest",
+    "InterruptResponse",
+    "InterruptType",
+    "SegmentCitation",
+    "create_annotation_interrupt",
+    "create_sensitive_action_interrupt",
+    "create_web_search_interrupt",
+]
--- a/src/noteflow/domain/ai/citations.py
+++ b/src/noteflow/domain/ai/citations.py
@@ -0,0 +1,43 @@
+"""Citation value objects for AI-generated responses."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from uuid import UUID
+
+
+@dataclass(frozen=True)
+class SegmentCitation:
+    """Reference to a transcript segment used as evidence.
+
+    Links AI-generated claims to source transcript segments for verification.
+    """
+
+    meeting_id: UUID
+    segment_id: int
+    start_time: float
+    end_time: float
+    text: str
+    score: float = 0.0
+
+    @property
+    def duration(self) -> float:
+        """Duration of the cited segment in seconds."""
+        return self.end_time - self.start_time
+
+    def __post_init__(self) -> None:
+        if self.segment_id < 0:
+            msg = "segment_id must be non-negative"
+            raise ValueError(msg)
+        if self.start_time < 0:
+            msg = "start_time must be non-negative"
+            raise ValueError(msg)
+        if self.end_time < self.start_time:
+            msg = "end_time must be >= start_time"
+            raise ValueError(msg)
+        if self.score < 0 or self.score > 1:
+            msg = "score must be between 0 and 1"
+            raise ValueError(msg)
--- a/src/noteflow/domain/ai/interrupts.py
+++ b/src/noteflow/domain/ai/interrupts.py
@@ -0,0 +1,202 @@
+"""Domain types for LangGraph human-in-the-loop interrupts.
+
+Defines interrupt request/response types for approval workflows:
+- Web search approval
+- Annotation creation approval
+- Sensitive action confirmation
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import StrEnum
+from typing import Final
+
+
+class InterruptType(StrEnum):
+    """Types of human-in-the-loop interrupts."""
+
+    WEB_SEARCH_APPROVAL = "web_search_approval"
+    ANNOTATION_APPROVAL = "annotation_approval"
+    SENSITIVE_ACTION = "sensitive_action"
+
+
+class InterruptAction(StrEnum):
+    """Possible user actions for an interrupt."""
+
+    APPROVE = "approve"
+    REJECT = "reject"
+    MODIFY = "modify"
+
+
+DEFAULT_WEB_SEARCH_OPTIONS: Final[tuple[str, ...]] = ("approve", "reject")
+DEFAULT_ANNOTATION_OPTIONS: Final[tuple[str, ...]] = ("approve", "reject", "modify")
+DEFAULT_SENSITIVE_OPTIONS: Final[tuple[str, ...]] = ("approve", "reject")
+
+
+@dataclass(frozen=True)
+class InterruptConfig:
+    """Configuration for interrupt behavior."""
+
+    allow_ignore: bool = False
+    allow_modify: bool = False
+    timeout_seconds: float | None = None
+
+
+@dataclass(frozen=True)
+class InterruptRequest:
+    """A request for human approval during graph execution.
+
+    Sent to the client when the graph hits an interrupt point.
+
+    Attributes:
+        interrupt_type: Category of interrupt (web_search, annotation, etc.)
+        message: Human-readable description of what needs approval.
+        context: Additional context data for the decision (query, entities, etc.)
+        options: Available response options.
+        config: Interrupt configuration (allow_ignore, timeout, etc.)
+        request_id: Unique identifier for this interrupt request.
+    """
+
+    interrupt_type: InterruptType
+    message: str
+    context: dict[str, object] = field(default_factory=dict)
+    options: tuple[str, ...] = field(default_factory=lambda: ("approve", "reject"))
+    config: InterruptConfig = field(default_factory=InterruptConfig)
+    request_id: str = ""
+
+    def to_dict(self) -> dict[str, object]:
+        """Convert to dictionary for serialization."""
+        return {
+            "interrupt_type": self.interrupt_type,
+            "message": self.message,
+            "context": self.context,
+            "options": list(self.options),
+            "config": {
+                "allow_ignore": self.config.allow_ignore,
+                "allow_modify": self.config.allow_modify,
+                "timeout_seconds": self.config.timeout_seconds,
+            },
+            "request_id": self.request_id,
+        }
+
+
+@dataclass(frozen=True)
+class InterruptResponse:
+    """User's response to an interrupt request.
+
+    Returned from the client to resume graph execution.
+
+    Attributes:
+        action: The action taken (approve, reject, modify).
+        request_id: ID of the interrupt request being responded to.
+        modified_value: If action is MODIFY, the modified value.
+        user_message: Optional message from the user.
+    """
+
+    action: InterruptAction
+    request_id: str = ""
+    modified_value: dict[str, object] | None = None
+    user_message: str | None = None
+
+    @property
+    def is_approved(self) -> bool:
+        """Check if the action was approved."""
+        return self.action == InterruptAction.APPROVE
+
+    @property
+    def is_rejected(self) -> bool:
+        """Check if the action was rejected."""
+        return self.action == InterruptAction.REJECT
+
+    @property
+    def is_modified(self) -> bool:
+        """Check if the action was modified."""
+        return self.action == InterruptAction.MODIFY
+
+    def to_dict(self) -> dict[str, object]:
+        """Convert to dictionary for serialization."""
+        result: dict[str, object] = {
+            "action": self.action,
+            "request_id": self.request_id,
+        }
+        if self.modified_value is not None:
+            result["modified_value"] = self.modified_value
+        if self.user_message is not None:
+            result["user_message"] = self.user_message
+        return result
+
+
+def create_web_search_interrupt(
+    query: str,
+    request_id: str,
+    *,
+    allow_modify: bool = False,
+) -> InterruptRequest:
+    """Create an interrupt request for web search approval.
+
+    Args:
+        query: The search query to be executed.
+        request_id: Unique identifier for this request.
+        allow_modify: Whether the user can modify the query.
+
+    Returns:
+        InterruptRequest configured for web search approval.
+    """
+    return InterruptRequest(
+        interrupt_type=InterruptType.WEB_SEARCH_APPROVAL,
+        message=f"Allow web search for additional context? Query: {query[:100]}",
+        context={"query": query},
+        options=("approve", "reject", "modify") if allow_modify else DEFAULT_WEB_SEARCH_OPTIONS,
+        config=InterruptConfig(allow_modify=allow_modify),
+        request_id=request_id,
+    )
+
+
+def create_annotation_interrupt(
+    annotations: list[dict[str, object]],
+    request_id: str,
+) -> InterruptRequest:
+    """Create an interrupt request for annotation approval.
+
+    Args:
+        annotations: List of suggested annotations to approve.
+        request_id: Unique identifier for this request.
+
+    Returns:
+        InterruptRequest configured for annotation approval.
+    """
+    count = len(annotations)
+    return InterruptRequest(
+        interrupt_type=InterruptType.ANNOTATION_APPROVAL,
+        message=f"Apply {count} suggested annotation(s)?",
+        context={"annotations": annotations, "count": count},
+        options=DEFAULT_ANNOTATION_OPTIONS,
+        config=InterruptConfig(allow_modify=True, allow_ignore=True),
+        request_id=request_id,
+    )
+
+
+def create_sensitive_action_interrupt(
+    action_name: str,
+    action_description: str,
+    request_id: str,
+) -> InterruptRequest:
+    """Create an interrupt request for sensitive action confirmation.
+
+    Args:
+        action_name: Name of the sensitive action.
+        action_description: Description of what the action does.
+        request_id: Unique identifier for this request.
+
+    Returns:
+        InterruptRequest configured for sensitive action confirmation.
+    """
+    return InterruptRequest(
+        interrupt_type=InterruptType.SENSITIVE_ACTION,
+        message=f"Confirm action: {action_name}",
+        context={"action_name": action_name, "description": action_description},
+        options=DEFAULT_SENSITIVE_OPTIONS,
+        config=InterruptConfig(allow_ignore=False),
+        request_id=request_id,
+    )
--- a/src/noteflow/domain/ai/ports.py
+++ b/src/noteflow/domain/ai/ports.py
@@ -0,0 +1,26 @@
+"""Protocol definitions for AI assistant operations."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Protocol
+
+if TYPE_CHECKING:
+    from uuid import UUID
+
+    from noteflow.domain.ai.state import AssistantOutputState
+
+
+class AssistantPort(Protocol):
+    """Protocol for AI assistant operations."""
+
+    async def ask(
+        self,
+        question: str,
+        user_id: UUID,
+        meeting_id: UUID | None = None,
+        thread_id: str | None = None,
+        allow_web: bool = False,
+        top_k: int = 8,
+    ) -> AssistantOutputState:
+        """Ask a question about meeting transcript(s)."""
+        ...
--- a/src/noteflow/domain/ai/state.py
+++ b/src/noteflow/domain/ai/state.py
@@ -0,0 +1,39 @@
+"""State schemas for LangGraph AI workflows.
+
+Separates Input/Output (public API) from Internal (can evolve freely).
+"""
+
+from __future__ import annotations
+
+import operator
+from typing import Annotated, TypedDict
+from uuid import UUID
+
+
+class AssistantInputState(TypedDict):
+    """Public API input - what clients send."""
+
+    question: str
+    meeting_id: UUID | None
+    thread_id: str | None
+    allow_web: bool
+    top_k: int
+
+
+class AssistantOutputState(TypedDict):
+    """Public API output - what clients receive."""
+
+    answer: str
+    citations: list[dict[str, object]]
+    suggested_annotations: list[dict[str, object]]
+    thread_id: str
+
+
+class AssistantInternalState(AssistantInputState):
+    """Internal graph state - can evolve without breaking API."""
+
+    retrieved_segment_ids: Annotated[list[int], operator.add]
+    retrieved_segments: list[dict[str, object]]
+    draft_answer: str
+    verification_passed: bool
+    loop_count: int
--- a/src/noteflow/infrastructure/ai/init.py
+++ b/src/noteflow/infrastructure/ai/init.py
@@ -0,0 +1,45 @@
+"""AI infrastructure components for LangGraph workflows."""
+
+from noteflow.infrastructure.ai.cache import (
+    CachedEmbedder,
+    EmbeddingCache,
+    EmbeddingCacheStats,
+)
+from noteflow.infrastructure.ai.checkpointer import create_checkpointer
+from noteflow.infrastructure.ai.guardrails import (
+    GuardrailResult,
+    GuardrailRules,
+    GuardrailViolation,
+    check_input,
+    create_default_rules,
+    create_strict_rules,
+    filter_output,
+)
+from noteflow.infrastructure.ai.interrupts import (
+    InterruptHandler,
+    check_annotation_approval,
+    check_web_search_approval,
+    create_resume_command,
+    request_annotation_approval,
+    request_web_search_approval,
+)
+
+__all__ = [
+    "CachedEmbedder",
+    "EmbeddingCache",
+    "EmbeddingCacheStats",
+    "GuardrailResult",
+    "GuardrailRules",
+    "GuardrailViolation",
+    "InterruptHandler",
+    "check_annotation_approval",
+    "check_input",
+    "check_web_search_approval",
+    "create_checkpointer",
+    "create_default_rules",
+    "create_resume_command",
+    "create_strict_rules",
+    "filter_output",
+    "request_annotation_approval",
+    "request_web_search_approval",
+]
--- a/src/noteflow/infrastructure/ai/cache.py
+++ b/src/noteflow/infrastructure/ai/cache.py
@@ -0,0 +1,212 @@
+"""Embedding cache with LRU eviction and TTL expiration."""
+
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import time
+from collections import OrderedDict
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Final
+
+from noteflow.infrastructure.logging import get_logger
+
+if TYPE_CHECKING:
+    from noteflow.infrastructure.ai.tools.retrieval import EmbedderProtocol
+
+logger = get_logger(__name__)
+
+DEFAULT_MAX_SIZE: Final[int] = 1000
+DEFAULT_TTL_SECONDS: Final[int] = 3600
+HASH_ALGORITHM: Final[str] = "sha256"
+
+
+@dataclass(frozen=True)
+class CacheEntry:
+    """Cached embedding with creation timestamp."""
+
+    embedding: tuple[float, ...]
+    created_at: float
+
+    def is_expired(self, ttl_seconds: float, current_time: float) -> bool:
+        """Check if entry has expired based on TTL."""
+        return (current_time - self.created_at) > ttl_seconds
+
+
+@dataclass
+class EmbeddingCacheStats:
+    """Statistics for cache performance monitoring."""
+
+    hits: int = 0
+    misses: int = 0
+    evictions: int = 0
+    expirations: int = 0
+
+    @property
+    def hit_rate(self) -> float:
+        """Calculate cache hit rate."""
+        total = self.hits + self.misses
+        return self.hits / total if total > 0 else 0.0
+
+
+@dataclass
+class EmbeddingCache:
+    """LRU cache for text embeddings with TTL expiration and deduplication."""
+
+    max_size: int = DEFAULT_MAX_SIZE
+    ttl_seconds: int = DEFAULT_TTL_SECONDS
+    _cache: OrderedDict[str, CacheEntry] = field(default_factory=OrderedDict)
+    _lock: asyncio.Lock = field(default_factory=asyncio.Lock)
+    _stats: EmbeddingCacheStats = field(default_factory=EmbeddingCacheStats)
+    _in_flight: dict[str, asyncio.Future[list[float]]] = field(default_factory=dict)
+
+    def _compute_key(self, text: str) -> str:
+        """Compute cache key from text using hash."""
+        return hashlib.new(HASH_ALGORITHM, text.encode("utf-8")).hexdigest()
+
+    async def get_or_compute(
+        self,
+        text: str,
+        embedder: EmbedderProtocol,
+    ) -> list[float]:
+        key = self._compute_key(text)
+        current_time = time.monotonic()
+
+        existing_future: asyncio.Future[list[float]] | None = None
+
+        async with self._lock:
+            if key in self._cache:
+                entry = self._cache[key]
+                if not entry.is_expired(self.ttl_seconds, current_time):
+                    self._cache.move_to_end(key)
+                    self._stats.hits += 1
+                    logger.debug("cache_hit", key=key[:16])
+                    return list(entry.embedding)
+                del self._cache[key]
+                self._stats.expirations += 1
+                logger.debug("cache_expired", key=key[:16])
+
+            if key in self._in_flight:
+                logger.debug("cache_in_flight_join", key=key[:16])
+                existing_future = self._in_flight[key]
+
+        if existing_future is not None:
+            return list(await existing_future)
+
+        new_future: asyncio.Future[list[float]] = asyncio.get_running_loop().create_future()
+
+        async with self._lock:
+            if key in self._in_flight:
+                existing_future = self._in_flight[key]
+            else:
+                self._stats.misses += 1
+                self._in_flight[key] = new_future
+
+        if existing_future is not None:
+            return list(await existing_future)
+
+        try:
+            embedding = await embedder.embed(text)
+        except Exception:
+            async with self._lock:
+                _ = self._in_flight.pop(key, None)
+            new_future.set_exception(asyncio.CancelledError())
+            raise
+
+        async with self._lock:
+            _ = self._in_flight.pop(key, None)
+
+            while len(self._cache) >= self.max_size:
+                evicted_key, _ = self._cache.popitem(last=False)
+                self._stats.evictions += 1
+                logger.debug("cache_eviction", evicted_key=evicted_key[:16])
+
+            self._cache[key] = CacheEntry(
+                embedding=tuple(embedding),
+                created_at=current_time,
+            )
+            logger.debug("cache_store", key=key[:16])
+
+        new_future.set_result(embedding)
+        return embedding
+
+    async def get(self, text: str) -> list[float] | None:
+        """Get embedding from cache without computing.
+
+        Args:
+            text: Text to look up.
+
+        Returns:
+            Embedding if cached and not expired, None otherwise.
+        """
+        key = self._compute_key(text)
+        current_time = time.monotonic()
+
+        async with self._lock:
+            if key in self._cache:
+                entry = self._cache[key]
+                if not entry.is_expired(self.ttl_seconds, current_time):
+                    self._cache.move_to_end(key)
+                    return list(entry.embedding)
+                else:
+                    del self._cache[key]
+                    self._stats.expirations += 1
+
+        return None
+
+    async def clear(self) -> int:
+        """Clear all cached entries.
+
+        Returns:
+            Number of entries cleared.
+        """
+        async with self._lock:
+            count = len(self._cache)
+            self._cache.clear()
+            logger.info("cache_cleared", entries_cleared=count)
+            return count
+
+    async def size(self) -> int:
+        """Get current number of cached entries."""
+        async with self._lock:
+            return len(self._cache)
+
+    def get_stats(self) -> EmbeddingCacheStats:
+        """Get cache statistics (not async - reads are atomic)."""
+        return EmbeddingCacheStats(
+            hits=self._stats.hits,
+            misses=self._stats.misses,
+            evictions=self._stats.evictions,
+            expirations=self._stats.expirations,
+        )
+
+
+class CachedEmbedder:
+    """Wrapper that adds caching to any EmbedderProtocol implementation.
+
+    Example:
+        base_embedder = MyEmbedder()
+        cached = CachedEmbedder(base_embedder, max_size=500, ttl_seconds=1800)
+        embedding = await cached.embed("hello world")
+    """
+
+    _embedder: EmbedderProtocol
+    _cache: EmbeddingCache
+
+    def __init__(
+        self,
+        embedder: EmbedderProtocol,
+        max_size: int = DEFAULT_MAX_SIZE,
+        ttl_seconds: int = DEFAULT_TTL_SECONDS,
+    ) -> None:
+        self._embedder = embedder
+        self._cache = EmbeddingCache(max_size=max_size, ttl_seconds=ttl_seconds)
+
+    async def embed(self, text: str) -> list[float]:
+        """Embed text with caching."""
+        return await self._cache.get_or_compute(text, self._embedder)
+
+    @property
+    def cache(self) -> EmbeddingCache:
+        """Access underlying cache for stats/management."""
+        return self._cache
--- a/src/noteflow/infrastructure/ai/checkpointer.py
+++ b/src/noteflow/infrastructure/ai/checkpointer.py
@@ -0,0 +1,56 @@
+"""PostgreSQL checkpointer factory for LangGraph state persistence."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Final
+
+if TYPE_CHECKING:
+    from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
+    from psycopg_pool import AsyncConnectionPool
+
+CHECKPOINTER_POOL_SIZE: Final[int] = 5
+
+
+@dataclass
+class CheckpointerResult:
+    """Wraps checkpointer with pool lifecycle management to prevent connection leaks."""
+
+    checkpointer: AsyncPostgresSaver
+    _pool: AsyncConnectionPool
+
+    async def close(self) -> None:
+        # psycopg_pool.AsyncConnectionPool.close() exists at runtime but type stubs
+        # are incomplete. Use getattr to satisfy the type checker.
+        close_fn = getattr(self._pool, "close", None)
+        if close_fn is not None:
+            await close_fn()
+
+    async def __aenter__(self) -> AsyncPostgresSaver:
+        return self.checkpointer
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: object,
+    ) -> None:
+        await self.close()
+
+
+async def create_checkpointer(
+    database_url: str,
+    pool_size: int = CHECKPOINTER_POOL_SIZE,
+) -> CheckpointerResult:
+    """Create async Postgres checkpointer for LangGraph state persistence."""
+    from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
+    from psycopg_pool import AsyncConnectionPool
+
+    pool = AsyncConnectionPool(
+        conninfo=database_url,
+        max_size=pool_size,
+        kwargs={"autocommit": True},
+    )
+    checkpointer = AsyncPostgresSaver(pool)
+    await checkpointer.setup()
+    return CheckpointerResult(checkpointer=checkpointer, _pool=pool)
--- a/src/noteflow/infrastructure/ai/graphs/init.py
+++ b/src/noteflow/infrastructure/ai/graphs/init.py
@@ -0,0 +1,51 @@
+"""LangGraph workflow definitions."""
+
+from noteflow.infrastructure.ai.graphs.meeting_qa import (
+    MEETING_QA_GRAPH_NAME,
+    MEETING_QA_GRAPH_VERSION,
+    MeetingQAConfig,
+    MeetingQAInputState,
+    MeetingQAInternalState,
+    MeetingQAOutputState,
+    build_meeting_qa_graph,
+)
+from noteflow.infrastructure.ai.graphs.summarization import (
+    SUMMARIZATION_GRAPH_NAME,
+    SUMMARIZATION_GRAPH_VERSION,
+    SummarizationInputState,
+    SummarizationOutputState,
+    SummarizationState,
+    build_summarization_graph,
+)
+from noteflow.infrastructure.ai.graphs.workspace_qa import (
+    WORKSPACE_QA_GRAPH_NAME,
+    WORKSPACE_QA_GRAPH_VERSION,
+    WorkspaceQAConfig,
+    WorkspaceQAInputState,
+    WorkspaceQAInternalState,
+    WorkspaceQAOutputState,
+    build_workspace_qa_graph,
+)
+
+__all__ = [
+    "MEETING_QA_GRAPH_NAME",
+    "MEETING_QA_GRAPH_VERSION",
+    "MeetingQAConfig",
+    "MeetingQAInputState",
+    "MeetingQAInternalState",
+    "MeetingQAOutputState",
+    "SUMMARIZATION_GRAPH_NAME",
+    "SUMMARIZATION_GRAPH_VERSION",
+    "SummarizationInputState",
+    "SummarizationOutputState",
+    "SummarizationState",
+    "WORKSPACE_QA_GRAPH_NAME",
+    "WORKSPACE_QA_GRAPH_VERSION",
+    "WorkspaceQAConfig",
+    "WorkspaceQAInputState",
+    "WorkspaceQAInternalState",
+    "WorkspaceQAOutputState",
+    "build_meeting_qa_graph",
+    "build_summarization_graph",
+    "build_workspace_qa_graph",
+]
--- a/src/noteflow/infrastructure/ai/graphs/meeting_qa.py
+++ b/src/noteflow/infrastructure/ai/graphs/meeting_qa.py
@@ -0,0 +1,193 @@
+"""Meeting Q&A graph for single-meeting question answering with citations."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Final, TypedDict
+
+if TYPE_CHECKING:
+    from langgraph.graph import CompiledStateGraph
+
+    from noteflow.domain.ai.citations import SegmentCitation
+    from noteflow.domain.value_objects import MeetingId
+    from noteflow.infrastructure.ai.nodes.annotation_suggester import SuggestedAnnotation
+    from noteflow.infrastructure.ai.nodes.web_search import WebSearchProvider
+    from noteflow.infrastructure.ai.tools.retrieval import (
+        EmbedderProtocol,
+        RetrievalResult,
+        SegmentSearchProtocol,
+    )
+    from noteflow.infrastructure.ai.tools.synthesis import LLMProtocol
+
+MEETING_QA_GRAPH_NAME: Final[str] = "meeting_qa"
+MEETING_QA_GRAPH_VERSION: Final[int] = 2
+NO_INFORMATION_ANSWER: Final[str] = "I couldn't find relevant information in this meeting."
+
+
+@dataclass(frozen=True)
+class MeetingQAConfig:
+    enable_web_search: bool = False
+    require_web_approval: bool = True
+    require_annotation_approval: bool = False
+
+
+class MeetingQAInputState(TypedDict):
+    question: str
+    meeting_id: MeetingId
+    top_k: int
+
+
+class MeetingQAOutputState(TypedDict):
+    answer: str
+    citations: list[SegmentCitation]
+    suggested_annotations: list[SuggestedAnnotation]
+
+
+class MeetingQAInternalState(MeetingQAInputState, MeetingQAOutputState):
+    retrieved_segments: list[RetrievalResult]
+    verification_passed: bool
+    web_search_approved: bool
+    web_context: str
+    annotations_approved: bool
+
+
+def build_meeting_qa_graph(
+    embedder: EmbedderProtocol,
+    segment_repo: SegmentSearchProtocol,
+    llm: LLMProtocol,
+    *,
+    web_search_provider: WebSearchProvider | None = None,
+    config: MeetingQAConfig | None = None,
+    checkpointer: object | None = None,
+) -> CompiledStateGraph[MeetingQAInternalState]:
+    """Build a Q&A graph for single-meeting questions with segment citations.
+
+    Graph flow (with web search): retrieve -> verify -> [web_search_approval] -> [web_search] -> synthesize
+    Graph flow (without): retrieve -> verify -> synthesize
+
+    Args:
+        embedder: Protocol for generating text embeddings.
+        segment_repo: Protocol for semantic segment search.
+        llm: Protocol for LLM text completion.
+        web_search_provider: Optional web search provider for augmentation.
+        config: Graph configuration for features/interrupts.
+        checkpointer: Optional checkpointer for interrupt support.
+
+    Returns:
+        Compiled graph that accepts question/meeting_id and returns answer/citations.
+    """
+    from langgraph.graph import END, START, StateGraph
+
+    from noteflow.domain.ai.citations import SegmentCitation
+    from noteflow.infrastructure.ai.interrupts import check_web_search_approval
+    from noteflow.infrastructure.ai.nodes.annotation_suggester import (
+        extract_annotations_from_answer,
+    )
+    from noteflow.infrastructure.ai.nodes.web_search import (
+        WebSearchConfig,
+        derive_search_query,
+        execute_web_search,
+        format_results_for_context,
+    )
+    from noteflow.infrastructure.ai.tools.retrieval import retrieve_segments
+    from noteflow.infrastructure.ai.tools.synthesis import synthesize_answer
+
+    effective_config = config or MeetingQAConfig()
+
+    async def retrieve_node(state: MeetingQAInternalState) -> dict[str, object]:
+        results = await retrieve_segments(
+            query=state["question"],
+            embedder=embedder,
+            segment_repo=segment_repo,
+            meeting_id=state["meeting_id"],
+            top_k=state["top_k"],
+        )
+        return {"retrieved_segments": results}
+
+    async def verify_node(state: MeetingQAInternalState) -> dict[str, object]:
+        has_segments = len(state["retrieved_segments"]) > 0
+        return {"verification_passed": has_segments}
+
+    def web_search_approval_node(state: MeetingQAInternalState) -> dict[str, object]:
+        if not effective_config.enable_web_search or web_search_provider is None:
+            return {"web_search_approved": False}
+
+        if not effective_config.require_web_approval:
+            return {"web_search_approved": True}
+
+        query = derive_search_query(state["question"])
+        approved = check_web_search_approval(query, require_approval=True)
+        return {"web_search_approved": approved}
+
+    async def web_search_node(state: MeetingQAInternalState) -> dict[str, object]:
+        if not state.get("web_search_approved", False) or web_search_provider is None:
+            return {"web_context": ""}
+
+        query = derive_search_query(state["question"])
+        search_config = WebSearchConfig(enabled=True, require_approval=False)
+        response = await execute_web_search(query, web_search_provider, search_config)
+        context = format_results_for_context(response.results)
+        return {"web_context": context}
+
+    async def synthesize_node(state: MeetingQAInternalState) -> dict[str, object]:
+        if not state["verification_passed"]:
+            return {
+                "answer": NO_INFORMATION_ANSWER,
+                "citations": [],
+                "suggested_annotations": [],
+            }
+
+        result = await synthesize_answer(
+            question=state["question"],
+            segments=state["retrieved_segments"],
+            llm=llm,
+        )
+
+        citations = [
+            SegmentCitation(
+                meeting_id=state["meeting_id"],
+                segment_id=seg.segment_id,
+                start_time=seg.start_time,
+                end_time=seg.end_time,
+                text=seg.text,
+                score=seg.score,
+            )
+            for seg in state["retrieved_segments"]
+            if seg.segment_id in result.cited_segment_ids
+        ]
+
+        suggested_annotations = extract_annotations_from_answer(
+            answer=result.answer,
+            cited_segment_ids=tuple(result.cited_segment_ids),
+        )
+
+        return {
+            "answer": result.answer,
+            "citations": citations,
+            "suggested_annotations": suggested_annotations,
+        }
+
+    builder: StateGraph[MeetingQAInternalState] = StateGraph(MeetingQAInternalState)
+    builder.add_node("retrieve", retrieve_node)
+    builder.add_node("verify", verify_node)
+    builder.add_node("synthesize", synthesize_node)
+
+    if effective_config.enable_web_search and web_search_provider is not None:
+        builder.add_node("web_search_approval", web_search_approval_node)
+        builder.add_node("web_search", web_search_node)
+
+        builder.add_edge(START, "retrieve")
+        builder.add_edge("retrieve", "verify")
+        builder.add_edge("verify", "web_search_approval")
+        builder.add_edge("web_search_approval", "web_search")
+        builder.add_edge("web_search", "synthesize")
+        builder.add_edge("synthesize", END)
+    else:
+        builder.add_edge(START, "retrieve")
+        builder.add_edge("retrieve", "verify")
+        builder.add_edge("verify", "synthesize")
+        builder.add_edge("synthesize", END)
+
+    compile_method = getattr(builder, "compile")
+    compiled: CompiledStateGraph[MeetingQAInternalState] = compile_method(checkpointer=checkpointer)
+    return compiled
--- a/src/noteflow/infrastructure/ai/graphs/summarization.py
+++ b/src/noteflow/infrastructure/ai/graphs/summarization.py
@@ -0,0 +1,103 @@
+"""LangGraph wrapper for existing SummarizationService.
+
+Demonstrates the LangGraph integration pattern by wrapping the existing
+summarization infrastructure in a StateGraph.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Final, TypedDict, cast
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+    from uuid import UUID
+
+    from langgraph.graph import CompiledStateGraph
+
+    from noteflow.application.services.summarization import SummarizationService
+    from noteflow.domain.entities import Segment
+    from noteflow.domain.value_objects import MeetingId
+
+SUMMARIZATION_GRAPH_NAME: Final[str] = "summarization"
+SUMMARIZATION_GRAPH_VERSION: Final[int] = 1
+
+
+class SummarizationInputState(TypedDict):
+    """Input state for summarization graph."""
+
+    meeting_id: UUID
+    segments: Sequence[Segment]
+
+
+class SummarizationOutputState(TypedDict):
+    """Output state from summarization graph."""
+
+    summary_text: str
+    key_points: list[dict[str, object]]
+    action_items: list[dict[str, object]]
+    provider_used: str
+    tokens_used: int | None
+    latency_ms: float | None
+
+
+class SummarizationState(SummarizationInputState, SummarizationOutputState):
+    """Full internal state for summarization graph."""
+
+
+def build_summarization_graph(
+    summarization_service: SummarizationService,
+) -> CompiledStateGraph[SummarizationState]:
+    """Build LangGraph wrapper around existing SummarizationService.
+
+    This demonstrates the pattern of wrapping existing services in LangGraph
+    graphs, enabling future expansion (checkpointing, conditional branching,
+    human-in-the-loop, etc.) while maintaining backward compatibility.
+
+    Args:
+        summarization_service: The existing summarization service to wrap.
+
+    Returns:
+        A compiled StateGraph that can be invoked with meeting_id and segments.
+    """
+    from langgraph.graph import END, START, StateGraph
+
+    async def summarize_node(state: SummarizationState) -> dict[str, object]:
+        # MeetingId is NewType of UUID, cast is safe
+        meeting_id = cast("MeetingId", state["meeting_id"])
+        result = await summarization_service.summarize(
+            meeting_id=meeting_id,
+            segments=state["segments"],
+        )
+        summary = result.summary
+        return {
+            "summary_text": summary.executive_summary,
+            "key_points": [
+                {
+                    "text": kp.text,
+                    "segment_ids": kp.segment_ids,
+                    "start_time": kp.start_time,
+                    "end_time": kp.end_time,
+                }
+                for kp in summary.key_points
+            ],
+            "action_items": [
+                {
+                    "text": ai.text,
+                    "segment_ids": ai.segment_ids,
+                    "assignee": ai.assignee,
+                    "start_time": ai.start_time,
+                    "end_time": ai.end_time,
+                }
+                for ai in summary.action_items
+            ],
+            "provider_used": result.provider_used,
+            "tokens_used": summary.tokens_used,
+            "latency_ms": summary.latency_ms,
+        }
+
+    builder = StateGraph(SummarizationState)
+    builder.add_node("summarize", summarize_node)
+    builder.add_edge(START, "summarize")
+    builder.add_edge("summarize", END)
+
+    return builder.compile()
--- a/src/noteflow/infrastructure/ai/graphs/workspace_qa.py
+++ b/src/noteflow/infrastructure/ai/graphs/workspace_qa.py
@@ -0,0 +1,198 @@
+"""Workspace Q&A graph for cross-meeting question answering with citations."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Final, TypedDict
+
+if TYPE_CHECKING:
+    from uuid import UUID
+
+    from langgraph.checkpoint.base import BaseCheckpointSaver
+    from langgraph.graph import CompiledStateGraph
+
+    from noteflow.domain.ai.citations import SegmentCitation
+    from noteflow.infrastructure.ai.nodes.annotation_suggester import SuggestedAnnotation
+    from noteflow.infrastructure.ai.nodes.web_search import WebSearchProvider
+    from noteflow.infrastructure.ai.tools.retrieval import (
+        EmbedderProtocol,
+        RetrievalResult,
+        WorkspaceSegmentSearchProtocol,
+    )
+    from noteflow.infrastructure.ai.tools.synthesis import LLMProtocol
+
+WORKSPACE_QA_GRAPH_NAME: Final[str] = "workspace_qa"
+WORKSPACE_QA_GRAPH_VERSION: Final[int] = 2
+NO_INFORMATION_ANSWER: Final[str] = "I couldn't find relevant information across your meetings."
+
+
+@dataclass(frozen=True)
+class WorkspaceQAConfig:
+    enable_web_search: bool = False
+    require_web_approval: bool = True
+    require_annotation_approval: bool = False
+
+
+class WorkspaceQAInputState(TypedDict):
+    question: str
+    workspace_id: UUID
+    project_id: UUID | None
+    top_k: int
+
+
+class WorkspaceQAOutputState(TypedDict):
+    answer: str
+    citations: list[SegmentCitation]
+    suggested_annotations: list[SuggestedAnnotation]
+
+
+class WorkspaceQAInternalState(WorkspaceQAInputState, WorkspaceQAOutputState):
+    retrieved_segments: list[RetrievalResult]
+    verification_passed: bool
+    web_search_approved: bool
+    web_context: str
+
+
+def build_workspace_qa_graph(
+    embedder: EmbedderProtocol,
+    segment_repo: WorkspaceSegmentSearchProtocol,
+    llm: LLMProtocol,
+    *,
+    web_search_provider: WebSearchProvider | None = None,
+    config: WorkspaceQAConfig | None = None,
+    checkpointer: BaseCheckpointSaver[str] | None = None,
+) -> CompiledStateGraph[WorkspaceQAInternalState]:
+    """Build Q&A graph for cross-meeting questions with segment citations.
+
+    Graph flow (with web search): retrieve -> verify -> [web_search_approval] -> [web_search] -> synthesize
+    Graph flow (without): retrieve -> verify -> synthesize
+
+    Args:
+        embedder: Protocol for generating text embeddings.
+        segment_repo: Protocol for workspace-scoped semantic segment search.
+        llm: Protocol for LLM text completion.
+        web_search_provider: Optional web search provider for augmentation.
+        config: Graph configuration for features/interrupts.
+        checkpointer: Optional checkpointer for interrupt support.
+
+    Returns:
+        Compiled graph that accepts question/workspace_id and returns answer/citations.
+    """
+    from langgraph.graph import END, START, StateGraph
+
+    from noteflow.domain.ai.citations import SegmentCitation
+    from noteflow.infrastructure.ai.interrupts import check_web_search_approval
+    from noteflow.infrastructure.ai.nodes.annotation_suggester import (
+        extract_annotations_from_answer,
+    )
+    from noteflow.infrastructure.ai.nodes.web_search import (
+        WebSearchConfig,
+        derive_search_query,
+        execute_web_search,
+        format_results_for_context,
+    )
+    from noteflow.infrastructure.ai.tools.retrieval import retrieve_segments_workspace
+    from noteflow.infrastructure.ai.tools.synthesis import synthesize_answer
+
+    effective_config = config or WorkspaceQAConfig()
+
+    async def retrieve_node(state: WorkspaceQAInternalState) -> dict[str, object]:
+        results = await retrieve_segments_workspace(
+            query=state["question"],
+            embedder=embedder,
+            segment_repo=segment_repo,
+            workspace_id=state["workspace_id"],
+            project_id=state["project_id"],
+            top_k=state["top_k"],
+        )
+        return {"retrieved_segments": results}
+
+    async def verify_node(state: WorkspaceQAInternalState) -> dict[str, object]:
+        has_segments = len(state["retrieved_segments"]) > 0
+        return {"verification_passed": has_segments}
+
+    def web_search_approval_node(state: WorkspaceQAInternalState) -> dict[str, object]:
+        if not effective_config.enable_web_search or web_search_provider is None:
+            return {"web_search_approved": False}
+
+        if not effective_config.require_web_approval:
+            return {"web_search_approved": True}
+
+        query = derive_search_query(state["question"])
+        approved = check_web_search_approval(query, require_approval=True)
+        return {"web_search_approved": approved}
+
+    async def web_search_node(state: WorkspaceQAInternalState) -> dict[str, object]:
+        if not state.get("web_search_approved", False) or web_search_provider is None:
+            return {"web_context": ""}
+
+        query = derive_search_query(state["question"])
+        search_config = WebSearchConfig(enabled=True, require_approval=False)
+        response = await execute_web_search(query, web_search_provider, search_config)
+        context = format_results_for_context(response.results)
+        return {"web_context": context}
+
+    async def synthesize_node(state: WorkspaceQAInternalState) -> dict[str, object]:
+        if not state["verification_passed"]:
+            return {
+                "answer": NO_INFORMATION_ANSWER,
+                "citations": [],
+                "suggested_annotations": [],
+            }
+
+        result = await synthesize_answer(
+            question=state["question"],
+            segments=state["retrieved_segments"],
+            llm=llm,
+        )
+
+        citations = [
+            SegmentCitation(
+                meeting_id=seg.meeting_id,
+                segment_id=seg.segment_id,
+                start_time=seg.start_time,
+                end_time=seg.end_time,
+                text=seg.text,
+                score=seg.score,
+            )
+            for seg in state["retrieved_segments"]
+            if seg.segment_id in result.cited_segment_ids
+        ]
+
+        suggested_annotations = extract_annotations_from_answer(
+            answer=result.answer,
+            cited_segment_ids=tuple(result.cited_segment_ids),
+        )
+
+        return {
+            "answer": result.answer,
+            "citations": citations,
+            "suggested_annotations": suggested_annotations,
+        }
+
+    builder: StateGraph[WorkspaceQAInternalState] = StateGraph(WorkspaceQAInternalState)
+    builder.add_node("retrieve", retrieve_node)
+    builder.add_node("verify", verify_node)
+    builder.add_node("synthesize", synthesize_node)
+
+    if effective_config.enable_web_search and web_search_provider is not None:
+        builder.add_node("web_search_approval", web_search_approval_node)
+        builder.add_node("web_search", web_search_node)
+
+        builder.add_edge(START, "retrieve")
+        builder.add_edge("retrieve", "verify")
+        builder.add_edge("verify", "web_search_approval")
+        builder.add_edge("web_search_approval", "web_search")
+        builder.add_edge("web_search", "synthesize")
+        builder.add_edge("synthesize", END)
+    else:
+        builder.add_edge(START, "retrieve")
+        builder.add_edge("retrieve", "verify")
+        builder.add_edge("verify", "synthesize")
+        builder.add_edge("synthesize", END)
+
+    compile_method = getattr(builder, "compile")
+    compiled: CompiledStateGraph[WorkspaceQAInternalState] = compile_method(
+        checkpointer=checkpointer
+    )
+    return compiled
--- a/src/noteflow/infrastructure/ai/guardrails.py
+++ b/src/noteflow/infrastructure/ai/guardrails.py
@@ -0,0 +1,312 @@
+"""Content guardrails for AI input validation and output filtering."""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Final
+
+from noteflow.infrastructure.logging import get_logger
+
+logger = get_logger(__name__)
+
+# Input validation limits
+DEFAULT_MIN_INPUT_LENGTH: Final[int] = 3
+DEFAULT_MAX_INPUT_LENGTH: Final[int] = 4000
+DEFAULT_MAX_OUTPUT_LENGTH: Final[int] = 10000
+
+# PII patterns (simplified - production would use more comprehensive detection)
+EMAIL_PATTERN: Final[re.Pattern[str]] = re.compile(
+    r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
+)
+PHONE_PATTERN: Final[re.Pattern[str]] = re.compile(
+    r"\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b"
+)
+SSN_PATTERN: Final[re.Pattern[str]] = re.compile(r"\b\d{3}-\d{2}-\d{4}\b")
+CREDIT_CARD_PATTERN: Final[re.Pattern[str]] = re.compile(r"\b(?:\d{4}[-\s]?){3}\d{4}\b")
+
+PII_PATTERNS: Final[tuple[tuple[str, re.Pattern[str]], ...]] = (
+    ("email", EMAIL_PATTERN),
+    ("phone", PHONE_PATTERN),
+    ("ssn", SSN_PATTERN),
+    ("credit_card", CREDIT_CARD_PATTERN),
+)
+
+# Redaction placeholder
+PII_REDACTION: Final[str] = "[REDACTED]"
+
+
+class GuardrailViolation(str, Enum):
+    """Types of guardrail violations."""
+
+    INPUT_TOO_SHORT = "input_too_short"
+    INPUT_TOO_LONG = "input_too_long"
+    OUTPUT_TOO_LONG = "output_too_long"
+    CONTAINS_PII = "contains_pii"
+    BLOCKED_CONTENT = "blocked_content"
+    INJECTION_ATTEMPT = "injection_attempt"
+
+
+@dataclass(frozen=True)
+class GuardrailResult:
+    """Result of a guardrail check."""
+
+    allowed: bool
+    violation: GuardrailViolation | None = None
+    reason: str | None = None
+    filtered_content: str | None = None
+
+    @staticmethod
+    def ok(content: str | None = None) -> GuardrailResult:
+        """Create a passing result."""
+        return GuardrailResult(allowed=True, filtered_content=content)
+
+    @staticmethod
+    def blocked(
+        violation: GuardrailViolation,
+        reason: str,
+    ) -> GuardrailResult:
+        """Create a blocking result."""
+        return GuardrailResult(allowed=False, violation=violation, reason=reason)
+
+    @staticmethod
+    def filtered(
+        content: str,
+        violation: GuardrailViolation,
+        reason: str,
+    ) -> GuardrailResult:
+        """Create a result with filtered content."""
+        return GuardrailResult(
+            allowed=True,
+            violation=violation,
+            reason=reason,
+            filtered_content=content,
+        )
+
+
+@dataclass
+class GuardrailRules:
+    """Configurable guardrail rules.
+
+    Attributes:
+        min_input_length: Minimum allowed input length.
+        max_input_length: Maximum allowed input length.
+        max_output_length: Maximum allowed output length.
+        block_pii: Whether to block content containing PII.
+        redact_pii: Whether to redact PII instead of blocking.
+        blocked_phrases: Phrases that should block content entirely.
+        detect_injection: Whether to detect prompt injection attempts.
+    """
+
+    min_input_length: int = DEFAULT_MIN_INPUT_LENGTH
+    max_input_length: int = DEFAULT_MAX_INPUT_LENGTH
+    max_output_length: int = DEFAULT_MAX_OUTPUT_LENGTH
+    block_pii: bool = False
+    redact_pii: bool = True
+    blocked_phrases: frozenset[str] = field(default_factory=frozenset)
+    detect_injection: bool = True
+
+
+# Common injection patterns
+INJECTION_PATTERNS: Final[tuple[re.Pattern[str], ...]] = (
+    re.compile(r"ignore\s+(?:all\s+)?(?:previous|above)\s+instructions", re.IGNORECASE),
+    re.compile(r"disregard\s+(?:all\s+)?(?:previous|prior)\s+", re.IGNORECASE),
+    re.compile(r"you\s+are\s+now\s+(?:a|an)\s+", re.IGNORECASE),
+    re.compile(r"forget\s+(?:everything|all)\s+(?:you|and)\s+", re.IGNORECASE),
+    re.compile(r"new\s+(?:system\s+)?instructions?:", re.IGNORECASE),
+)
+
+
+def _check_length(
+    text: str,
+    min_length: int,
+    max_length: int,
+    is_input: bool,
+) -> GuardrailResult | None:
+    """Check text length constraints."""
+    if is_input and len(text) < min_length:
+        return GuardrailResult.blocked(
+            GuardrailViolation.INPUT_TOO_SHORT,
+            f"Input must be at least {min_length} characters",
+        )
+
+    if is_input and len(text) > max_length:
+        return GuardrailResult.blocked(
+            GuardrailViolation.INPUT_TOO_LONG,
+            f"Input must be at most {max_length} characters",
+        )
+
+    if not is_input and len(text) > max_length:
+        return GuardrailResult.blocked(
+            GuardrailViolation.OUTPUT_TOO_LONG,
+            f"Output exceeds {max_length} characters",
+        )
+
+    return None
+
+
+def _check_blocked_phrases(
+    text: str,
+    blocked_phrases: frozenset[str],
+) -> GuardrailResult | None:
+    """Check for blocked phrases."""
+    text_lower = text.lower()
+    for phrase in blocked_phrases:
+        if phrase.lower() in text_lower:
+            logger.warning("blocked_phrase_detected", phrase=phrase[:20])
+            return GuardrailResult.blocked(
+                GuardrailViolation.BLOCKED_CONTENT,
+                "Content contains blocked phrase",
+            )
+    return None
+
+
+def _check_injection(text: str) -> GuardrailResult | None:
+    """Check for prompt injection attempts."""
+    for pattern in INJECTION_PATTERNS:
+        if pattern.search(text):
+            logger.warning("injection_attempt_detected")
+            return GuardrailResult.blocked(
+                GuardrailViolation.INJECTION_ATTEMPT,
+                "Potential prompt injection detected",
+            )
+    return None
+
+
+def _detect_pii(text: str) -> list[tuple[str, str]]:
+    """Detect PII in text.
+
+    Returns:
+        List of (pii_type, matched_text) tuples.
+    """
+    findings: list[tuple[str, str]] = []
+    for pii_type, pattern in PII_PATTERNS:
+        for match in pattern.finditer(text):
+            findings.append((pii_type, match.group()))
+    return findings
+
+
+def _redact_pii(text: str) -> str:
+    """Redact all PII in text."""
+    result = text
+    for _, pattern in PII_PATTERNS:
+        result = pattern.sub(PII_REDACTION, result)
+    return result
+
+
+async def check_input(text: str, rules: GuardrailRules) -> GuardrailResult:
+    """Validate input text against guardrail rules.
+
+    Args:
+        text: Input text to validate.
+        rules: Guardrail rules to apply.
+
+    Returns:
+        GuardrailResult indicating if input is allowed.
+    """
+    # Length checks
+    length_result = _check_length(
+        text,
+        rules.min_input_length,
+        rules.max_input_length,
+        is_input=True,
+    )
+    if length_result is not None:
+        return length_result
+
+    # Blocked phrases
+    phrase_result = _check_blocked_phrases(text, rules.blocked_phrases)
+    if phrase_result is not None:
+        return phrase_result
+
+    # Injection detection
+    if rules.detect_injection:
+        injection_result = _check_injection(text)
+        if injection_result is not None:
+            return injection_result
+
+    # PII checks
+    if rules.block_pii or rules.redact_pii:
+        pii_findings = _detect_pii(text)
+        if pii_findings:
+            pii_types = [f[0] for f in pii_findings]
+            logger.info("pii_detected_in_input", pii_types=pii_types)
+
+            if rules.block_pii:
+                return GuardrailResult.blocked(
+                    GuardrailViolation.CONTAINS_PII,
+                    f"Input contains PII: {', '.join(pii_types)}",
+                )
+
+            # Redact instead of block
+            redacted = _redact_pii(text)
+            return GuardrailResult.filtered(
+                redacted,
+                GuardrailViolation.CONTAINS_PII,
+                f"PII redacted: {', '.join(pii_types)}",
+            )
+
+    return GuardrailResult.ok(text)
+
+
+async def filter_output(text: str, rules: GuardrailRules) -> GuardrailResult:
+    """Filter output text, redacting sensitive content.
+
+    Args:
+        text: Output text to filter.
+        rules: Guardrail rules to apply.
+
+    Returns:
+        GuardrailResult with potentially filtered content.
+    """
+    # Length check
+    length_result = _check_length(
+        text,
+        min_length=0,  # No minimum for output
+        max_length=rules.max_output_length,
+        is_input=False,
+    )
+    if length_result is not None:
+        # Truncate instead of blocking for output
+        truncated = text[: rules.max_output_length]
+        return GuardrailResult.filtered(
+            truncated,
+            GuardrailViolation.OUTPUT_TOO_LONG,
+            f"Output truncated to {rules.max_output_length} characters",
+        )
+
+    # Blocked phrases in output
+    phrase_result = _check_blocked_phrases(text, rules.blocked_phrases)
+    if phrase_result is not None:
+        return phrase_result
+
+    # PII redaction in output (always redact, never block output)
+    if rules.redact_pii:
+        pii_findings = _detect_pii(text)
+        if pii_findings:
+            pii_types = [f[0] for f in pii_findings]
+            logger.info("pii_detected_in_output", pii_types=pii_types)
+            redacted = _redact_pii(text)
+            return GuardrailResult.filtered(
+                redacted,
+                GuardrailViolation.CONTAINS_PII,
+                f"PII redacted: {', '.join(pii_types)}",
+            )
+
+    return GuardrailResult.ok(text)
+
+
+def create_default_rules() -> GuardrailRules:
+    """Create default guardrail rules."""
+    return GuardrailRules()
+
+
+def create_strict_rules() -> GuardrailRules:
+    """Create strict guardrail rules with PII blocking."""
+    return GuardrailRules(
+        block_pii=True,
+        redact_pii=False,
+        detect_injection=True,
+        max_input_length=2000,
+    )
--- a/src/noteflow/infrastructure/ai/interrupts.py
+++ b/src/noteflow/infrastructure/ai/interrupts.py
@@ -0,0 +1,231 @@
+"""Infrastructure utilities for LangGraph human-in-the-loop interrupts.
+
+Wraps LangGraph's interrupt() and Command APIs for consistent usage across graphs.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Final
+from uuid import uuid4
+
+from langgraph.types import Command, interrupt
+
+from noteflow.domain.ai.interrupts import (
+    InterruptAction,
+    InterruptResponse,
+    create_annotation_interrupt,
+    create_web_search_interrupt,
+)
+from noteflow.infrastructure.logging import get_logger
+
+if TYPE_CHECKING:
+    from noteflow.infrastructure.ai.nodes.annotation_suggester import SuggestedAnnotation
+
+logger = get_logger(__name__)
+
+INTERRUPT_RESPONSE_KEY: Final[str] = "response"
+INTERRUPT_APPROVED_VALUE: Final[str] = "approved"
+
+
+def request_web_search_approval(
+    query: str,
+    *,
+    allow_modify: bool = False,
+) -> InterruptResponse:
+    """Request user approval for web search via LangGraph interrupt.
+
+    Args:
+        query: The search query to be executed.
+        allow_modify: Whether the user can modify the query.
+
+    Returns:
+        InterruptResponse with user's decision.
+    """
+    request_id = str(uuid4())
+    interrupt_request = create_web_search_interrupt(
+        query=query,
+        request_id=request_id,
+        allow_modify=allow_modify,
+    )
+
+    logger.info(
+        "interrupt_web_search_requested",
+        request_id=request_id,
+        query_preview=query[:50],
+    )
+
+    response_data = interrupt(interrupt_request.to_dict())
+
+    return _parse_interrupt_response(response_data, request_id)
+
+
+def request_annotation_approval(
+    annotations: list[SuggestedAnnotation],
+) -> InterruptResponse:
+    """Request user approval for suggested annotations via LangGraph interrupt.
+
+    Args:
+        annotations: List of suggested annotations to approve.
+
+    Returns:
+        InterruptResponse with user's decision.
+    """
+    request_id = str(uuid4())
+    annotation_dicts = [ann.to_dict() for ann in annotations]
+    interrupt_request = create_annotation_interrupt(
+        annotations=annotation_dicts,
+        request_id=request_id,
+    )
+
+    logger.info(
+        "interrupt_annotation_requested",
+        request_id=request_id,
+        annotation_count=len(annotations),
+    )
+
+    response_data = interrupt(interrupt_request.to_dict())
+
+    return _parse_interrupt_response(response_data, request_id)
+
+
+def _parse_interrupt_response(
+    response_data: object,
+    request_id: str,
+) -> InterruptResponse:
+    """Parse LangGraph interrupt response into domain type.
+
+    Args:
+        response_data: Raw response from LangGraph interrupt.
+        request_id: ID of the original request.
+
+    Returns:
+        Parsed InterruptResponse.
+    """
+    if isinstance(response_data, str):
+        action = _string_to_action(response_data)
+        return InterruptResponse(action=action, request_id=request_id)
+
+    if isinstance(response_data, dict):
+        action_str = str(response_data.get("action", "reject"))
+        action = _string_to_action(action_str)
+
+        modified_value = response_data.get("modified_value")
+        if modified_value is not None and not isinstance(modified_value, dict):
+            modified_value = None
+
+        user_message = response_data.get("user_message")
+        if user_message is not None:
+            user_message = str(user_message)
+
+        return InterruptResponse(
+            action=action,
+            request_id=request_id,
+            modified_value=modified_value,
+            user_message=user_message,
+        )
+
+    logger.warning(
+        "interrupt_response_unknown_format",
+        request_id=request_id,
+        response_type=type(response_data).__name__,
+    )
+    return InterruptResponse(action=InterruptAction.REJECT, request_id=request_id)
+
+
+def _string_to_action(value: str) -> InterruptAction:
+    """Convert string response to InterruptAction."""
+    normalized = value.lower().strip()
+    if normalized in ("approve", "yes", "approved", "accept"):
+        return InterruptAction.APPROVE
+    if normalized in ("modify", "edit", "change"):
+        return InterruptAction.MODIFY
+    return InterruptAction.REJECT
+
+
+def create_resume_command(response: InterruptResponse) -> Command[None]:
+    """Create a LangGraph Command to resume execution with user response.
+
+    Args:
+        response: User's interrupt response.
+
+    Returns:
+        Command to resume graph execution.
+    """
+    return Command(resume=response.to_dict())
+
+
+class InterruptHandler:
+    """Handles interrupt requests and responses for a graph execution."""
+
+    _require_web_approval: bool
+
+    def __init__(self, require_web_approval: bool = True) -> None:
+        self._require_web_approval = require_web_approval
+
+    def should_interrupt_for_web_search(self) -> bool:
+        return self._require_web_approval
+
+    def request_web_search(self, query: str) -> InterruptResponse:
+        return request_web_search_approval(query)
+
+    def request_annotation_approval(
+        self,
+        annotations: list[SuggestedAnnotation],
+    ) -> InterruptResponse:
+        return request_annotation_approval(annotations)
+
+
+def check_web_search_approval(
+    query: str,
+    require_approval: bool,
+) -> bool:
+    """Check if web search should proceed (with optional interrupt).
+
+    Args:
+        query: Search query to execute.
+        require_approval: Whether to interrupt for user approval.
+
+    Returns:
+        True if search should proceed, False if rejected.
+    """
+    if not require_approval:
+        return True
+
+    response = request_web_search_approval(query)
+    return response.is_approved
+
+
+def check_annotation_approval(
+    annotations: list[SuggestedAnnotation],
+) -> tuple[bool, list[SuggestedAnnotation]]:
+    """Check if annotations should be applied (with interrupt).
+
+    Args:
+        annotations: Suggested annotations to approve.
+
+    Returns:
+        Tuple of (should_apply, possibly_modified_annotations).
+    """
+    if not annotations:
+        return False, []
+
+    response = request_annotation_approval(annotations)
+
+    if response.is_rejected:
+        return False, []
+
+    if response.is_modified and response.modified_value:
+        modified_list_raw = response.modified_value.get("annotations", [])
+        if isinstance(modified_list_raw, list):
+            from noteflow.infrastructure.ai.nodes.annotation_suggester import (
+                SuggestedAnnotation,
+            )
+
+            modified_annotations: list[SuggestedAnnotation] = []
+            for item in modified_list_raw:
+                if isinstance(item, dict):
+                    item_dict: dict[str, object] = {str(k): v for k, v in item.items()}
+                    modified_annotations.append(SuggestedAnnotation.from_dict(item_dict))
+            return True, modified_annotations
+
+    return response.is_approved, annotations
--- a/src/noteflow/infrastructure/ai/nodes/init.py
+++ b/src/noteflow/infrastructure/ai/nodes/init.py
@@ -0,0 +1,39 @@
+"""LangGraph node implementations."""
+
+from noteflow.infrastructure.ai.nodes.annotation_suggester import (
+    SuggestedAnnotation,
+    SuggestedAnnotationType,
+    extract_annotations_from_answer,
+)
+from noteflow.infrastructure.ai.nodes.verification import (
+    VerificationResult,
+    verify_citations,
+)
+from noteflow.infrastructure.ai.nodes.web_search import (
+    DisabledWebSearchProvider,
+    WebSearchConfig,
+    WebSearchProvider,
+    WebSearchResponse,
+    WebSearchResult,
+    derive_search_query,
+    execute_web_search,
+    format_results_for_context,
+    merge_contexts,
+)
+
+__all__ = [
+    "DisabledWebSearchProvider",
+    "SuggestedAnnotation",
+    "SuggestedAnnotationType",
+    "VerificationResult",
+    "WebSearchConfig",
+    "WebSearchProvider",
+    "WebSearchResponse",
+    "WebSearchResult",
+    "derive_search_query",
+    "execute_web_search",
+    "extract_annotations_from_answer",
+    "format_results_for_context",
+    "merge_contexts",
+    "verify_citations",
+]
--- a/src/noteflow/infrastructure/ai/nodes/annotation_suggester.py
+++ b/src/noteflow/infrastructure/ai/nodes/annotation_suggester.py
@@ -0,0 +1,121 @@
+"""Annotation suggester for extracting action items and decisions from answers."""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from enum import Enum
+from typing import Final
+
+
+class SuggestedAnnotationType(str, Enum):
+    ACTION_ITEM = "action_item"
+    DECISION = "decision"
+    NOTE = "note"
+
+
+@dataclass(frozen=True)
+class SuggestedAnnotation:
+    text: str
+    annotation_type: SuggestedAnnotationType
+    segment_ids: tuple[int, ...]
+    confidence: float = 0.8
+
+    def to_dict(self) -> dict[str, object]:
+        return {
+            "text": self.text,
+            "type": self.annotation_type.value,
+            "segment_ids": list(self.segment_ids),
+            "confidence": self.confidence,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict[str, object]) -> SuggestedAnnotation:
+        text = str(data.get("text", ""))
+        type_str = str(data.get("type", "note"))
+        segment_ids_raw = data.get("segment_ids", [])
+        if isinstance(segment_ids_raw, list):
+            segment_ids = tuple(
+                int(sid) for sid in segment_ids_raw if isinstance(sid, (int, float))
+            )
+        else:
+            segment_ids = ()
+        confidence_raw = data.get("confidence", 0.8)
+        confidence = float(confidence_raw) if isinstance(confidence_raw, (int, float)) else 0.8
+
+        try:
+            annotation_type = SuggestedAnnotationType(type_str)
+        except ValueError:
+            annotation_type = SuggestedAnnotationType.NOTE
+
+        return cls(
+            text=text,
+            annotation_type=annotation_type,
+            segment_ids=segment_ids,
+            confidence=confidence,
+        )
+
+
+ACTION_ITEM_PATTERNS: Final[tuple[re.Pattern[str], ...]] = (
+    re.compile(r"(?:need to|should|must|will|going to|has to)\s+(.+?)(?:\.|$)", re.IGNORECASE),
+    re.compile(r"(?:action item|TODO|task):\s*(.+?)(?:\.|$)", re.IGNORECASE),
+    re.compile(r"(?:follow[- ]up|next step):\s*(.+?)(?:\.|$)", re.IGNORECASE),
+)
+
+DECISION_PATTERNS: Final[tuple[re.Pattern[str], ...]] = (
+    re.compile(r"(?:decided to|agreed to|will go with|chose to)\s+(.+?)(?:\.|$)", re.IGNORECASE),
+    re.compile(r"(?:decision|resolution):\s*(.+?)(?:\.|$)", re.IGNORECASE),
+    re.compile(r"(?:the team|we|they) (?:decided|agreed|chose)\s+(.+?)(?:\.|$)", re.IGNORECASE),
+)
+
+MIN_TEXT_LENGTH: Final[int] = 10
+MAX_TEXT_LENGTH: Final[int] = 200
+
+
+def extract_annotations_from_answer(
+    answer: str,
+    cited_segment_ids: tuple[int, ...],
+) -> list[SuggestedAnnotation]:
+    """Extract action items and decisions from synthesized answer."""
+    suggestions: list[SuggestedAnnotation] = []
+
+    for pattern in ACTION_ITEM_PATTERNS:
+        for match in pattern.finditer(answer):
+            text = match.group(1).strip()
+            if MIN_TEXT_LENGTH <= len(text) <= MAX_TEXT_LENGTH:
+                suggestions.append(
+                    SuggestedAnnotation(
+                        text=text,
+                        annotation_type=SuggestedAnnotationType.ACTION_ITEM,
+                        segment_ids=cited_segment_ids,
+                        confidence=0.7,
+                    )
+                )
+
+    for pattern in DECISION_PATTERNS:
+        for match in pattern.finditer(answer):
+            text = match.group(1).strip()
+            if MIN_TEXT_LENGTH <= len(text) <= MAX_TEXT_LENGTH:
+                suggestions.append(
+                    SuggestedAnnotation(
+                        text=text,
+                        annotation_type=SuggestedAnnotationType.DECISION,
+                        segment_ids=cited_segment_ids,
+                        confidence=0.75,
+                    )
+                )
+
+    return _dedupe_suggestions(suggestions)
+
+
+def _dedupe_suggestions(suggestions: list[SuggestedAnnotation]) -> list[SuggestedAnnotation]:
+    seen_texts: set[str] = set()
+    deduped: list[SuggestedAnnotation] = []
+
+    for suggestion in suggestions:
+        normalized = suggestion.text.lower().strip()
+        if normalized not in seen_texts:
+            seen_texts.add(normalized)
+            deduped.append(suggestion)
+
+    return deduped
--- a/src/noteflow/infrastructure/ai/nodes/verification.py
+++ b/src/noteflow/infrastructure/ai/nodes/verification.py
@@ -0,0 +1,61 @@
+"""Citation verification for AI-generated answers."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Final
+
+
+@dataclass(frozen=True)
+class VerificationResult:
+    """Result of citation verification.
+
+    Attributes:
+        is_valid: True if all cited segment IDs exist in available segments.
+        invalid_citation_indices: Indices of citations that failed validation.
+        reason: Human-readable explanation if validation failed.
+    """
+
+    is_valid: bool
+    invalid_citation_indices: tuple[int, ...]
+    reason: str | None = None
+
+
+NO_SEGMENTS_REASON: Final[str] = "No segments retrieved for question"
+INVALID_CITATIONS_PREFIX: Final[str] = "Invalid citation indices: "
+
+
+def verify_citations(
+    cited_ids: list[int],
+    available_ids: set[int],
+) -> VerificationResult:
+    """Verify all cited segment IDs exist in available segments.
+
+    Args:
+        cited_ids: List of segment IDs cited in the answer.
+        available_ids: Set of valid segment IDs from retrieval.
+
+    Returns:
+        VerificationResult with validation status and any invalid indices.
+    """
+    if not available_ids:
+        return VerificationResult(
+            is_valid=False,
+            invalid_citation_indices=(),
+            reason=NO_SEGMENTS_REASON,
+        )
+
+    invalid_indices = tuple(i for i, cid in enumerate(cited_ids) if cid not in available_ids)
+
+    if invalid_indices:
+        return VerificationResult(
+            is_valid=False,
+            invalid_citation_indices=invalid_indices,
+            reason=f"{INVALID_CITATIONS_PREFIX}{invalid_indices}",
+        )
+
+    return VerificationResult(
+        is_valid=True,
+        invalid_citation_indices=(),
+        reason=None,
+    )
--- a/src/noteflow/infrastructure/ai/nodes/web_search.py
+++ b/src/noteflow/infrastructure/ai/nodes/web_search.py
@@ -0,0 +1,226 @@
+"""Web search node for augmenting RAG with external knowledge."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Final, Protocol
+
+from noteflow.infrastructure.logging import get_logger
+
+logger = get_logger(__name__)
+
+DEFAULT_MAX_RESULTS: Final[int] = 5
+DEFAULT_TIMEOUT_SECONDS: Final[float] = 10.0
+
+
+@dataclass(frozen=True)
+class WebSearchResult:
+    """A single web search result."""
+
+    title: str
+    url: str
+    snippet: str
+    score: float = 1.0
+
+    def to_dict(self) -> dict[str, object]:
+        """Convert to dictionary for serialization."""
+        return {
+            "title": self.title,
+            "url": self.url,
+            "snippet": self.snippet,
+            "score": self.score,
+        }
+
+
+@dataclass(frozen=True)
+class WebSearchResponse:
+    """Response from a web search query."""
+
+    query: str
+    results: tuple[WebSearchResult, ...]
+    total_results: int
+    search_time_ms: float
+
+    @property
+    def has_results(self) -> bool:
+        """Check if search returned any results."""
+        return len(self.results) > 0
+
+
+class WebSearchProvider(Protocol):
+    """Protocol for web search providers.
+
+    Implementations can integrate with:
+    - Exa AI
+    - SerpAPI
+    - Brave Search API
+    - Bing Web Search API
+    - Google Custom Search
+    """
+
+    async def search(
+        self,
+        query: str,
+        max_results: int = DEFAULT_MAX_RESULTS,
+    ) -> WebSearchResponse:
+        """Execute a web search query.
+
+        Args:
+            query: Search query string.
+            max_results: Maximum number of results to return.
+
+        Returns:
+            WebSearchResponse with search results.
+        """
+        ...
+
+
+class DisabledWebSearchProvider:
+    """Stub provider that returns empty results.
+
+    Used when web search is not configured or disabled.
+    """
+
+    async def search(
+        self,
+        query: str,
+        _max_results: int = DEFAULT_MAX_RESULTS,
+    ) -> WebSearchResponse:
+        """Return empty results - web search disabled."""
+        logger.debug("web_search_disabled", query=query[:50])
+        return WebSearchResponse(
+            query=query,
+            results=(),
+            total_results=0,
+            search_time_ms=0.0,
+        )
+
+
+@dataclass
+class WebSearchConfig:
+    """Configuration for web search node."""
+
+    enabled: bool = False
+    max_results: int = DEFAULT_MAX_RESULTS
+    timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS
+    require_approval: bool = True
+
+
+def format_results_for_context(results: tuple[WebSearchResult, ...]) -> str:
+    """Format web search results for LLM context.
+
+    Args:
+        results: Web search results to format.
+
+    Returns:
+        Formatted string suitable for LLM context.
+    """
+    if not results:
+        return ""
+
+    formatted_parts: list[str] = ["## Web Search Results\n"]
+
+    for i, result in enumerate(results, 1):
+        formatted_parts.append(f"### [{i}] {result.title}")
+        formatted_parts.append(f"Source: {result.url}")
+        formatted_parts.append(f"{result.snippet}\n")
+
+    return "\n".join(formatted_parts)
+
+
+async def execute_web_search(
+    query: str,
+    provider: WebSearchProvider,
+    config: WebSearchConfig,
+) -> WebSearchResponse:
+    """Execute web search with configuration.
+
+    Args:
+        query: Search query derived from user question.
+        provider: Web search provider implementation.
+        config: Search configuration.
+
+    Returns:
+        WebSearchResponse with results (empty if disabled).
+    """
+    if not config.enabled:
+        logger.debug("web_search_skipped_disabled")
+        return WebSearchResponse(
+            query=query,
+            results=(),
+            total_results=0,
+            search_time_ms=0.0,
+        )
+
+    logger.info("web_search_executing", query=query[:100], max_results=config.max_results)
+
+    response = await provider.search(
+        query=query,
+        max_results=config.max_results,
+    )
+
+    logger.info(
+        "web_search_completed",
+        query=query[:50],
+        result_count=len(response.results),
+        search_time_ms=response.search_time_ms,
+    )
+
+    return response
+
+
+def merge_contexts(
+    transcript_context: str,
+    web_results: WebSearchResponse,
+) -> str:
+    """Merge transcript segments with web search results.
+
+    Args:
+        transcript_context: Context from transcript segments.
+        web_results: Web search response.
+
+    Returns:
+        Combined context for LLM synthesis.
+    """
+    if not web_results.has_results:
+        return transcript_context
+
+    web_context = format_results_for_context(web_results.results)
+
+    return f"""## Meeting Transcript Context
+{transcript_context}
+
+{web_context}
+
+Note: Web search results are provided as supplementary context. 
+Prioritize information from the meeting transcript when answering questions about the meeting.
+"""
+
+
+def derive_search_query(question: str, meeting_context: str | None = None) -> str:
+    """Derive a web search query from user question and context.
+
+    Args:
+        question: User's original question.
+        meeting_context: Optional context about the meeting topic.
+
+    Returns:
+        Optimized search query.
+    """
+    # Simple approach: use the question directly
+    # A more sophisticated approach would use an LLM to generate the query
+    query = question.strip()
+
+    # Add meeting context keywords if available
+    if meeting_context:
+        # Extract key terms from context (simplified)
+        context_terms = meeting_context[:100].strip()
+        if context_terms:
+            query = f"{query} {context_terms}"
+
+    # Limit query length
+    max_query_length = 256
+    if len(query) > max_query_length:
+        query = query[:max_query_length].rsplit(" ", 1)[0]
+
+    return query
--- a/src/noteflow/infrastructure/ai/tools/init.py
+++ b/src/noteflow/infrastructure/ai/tools/init.py
@@ -0,0 +1,27 @@
+"""Tool adapters for LangGraph workflows."""
+
+from noteflow.infrastructure.ai.tools.retrieval import (
+    BatchEmbedderProtocol,
+    EmbedderProtocol,
+    RetrievalResult,
+    retrieve_segments,
+    retrieve_segments_batch,
+    retrieve_segments_workspace,
+    retrieve_segments_workspace_batch,
+)
+from noteflow.infrastructure.ai.tools.synthesis import (
+    SynthesisResult,
+    synthesize_answer,
+)
+
+__all__ = [
+    "BatchEmbedderProtocol",
+    "EmbedderProtocol",
+    "RetrievalResult",
+    "SynthesisResult",
+    "retrieve_segments",
+    "retrieve_segments_batch",
+    "retrieve_segments_workspace",
+    "retrieve_segments_workspace_batch",
+    "synthesize_answer",
+]
--- a/src/noteflow/infrastructure/ai/tools/retrieval.py
+++ b/src/noteflow/infrastructure/ai/tools/retrieval.py
@@ -0,0 +1,237 @@
+"""Segment retrieval tools for LangGraph workflows."""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Final, Protocol, runtime_checkable
+from uuid import UUID
+
+from noteflow.domain.value_objects import MeetingId
+
+# Limit concurrent parallel operations to prevent resource exhaustion
+MAX_CONCURRENT_OPERATIONS: Final[int] = 10
+
+
+@runtime_checkable
+class EmbedderProtocol(Protocol):
+    """Protocol for text embedding providers."""
+
+    async def embed(self, text: str) -> list[float]:
+        """Embed a single text string."""
+        ...
+
+
+@runtime_checkable
+class BatchEmbedderProtocol(EmbedderProtocol, Protocol):
+    """Extended protocol for embedders supporting batch operations."""
+
+    async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
+        """Embed multiple texts in a single batch operation.
+
+        More efficient than calling embed() multiple times for providers
+        that support batching (reduces API calls, leverages GPU batching).
+        """
+        ...
+
+
+class SegmentLike(Protocol):
+    segment_id: int
+    meeting_id: MeetingId | None
+    text: str
+    start_time: float
+    end_time: float
+
+
+class SegmentSearchProtocol(Protocol):
+    async def search_semantic(
+        self,
+        query_embedding: list[float],
+        limit: int,
+        meeting_id: MeetingId | None,
+    ) -> Sequence[tuple[SegmentLike, float]]: ...
+
+
+class WorkspaceSegmentSearchProtocol(Protocol):
+    async def search_semantic_workspace(
+        self,
+        query_embedding: list[float],
+        workspace_id: UUID,
+        project_id: UUID | None,
+        limit: int,
+    ) -> Sequence[tuple[SegmentLike, float]]: ...
+
+
+@dataclass(frozen=True)
+class RetrievalResult:
+    segment_id: int
+    meeting_id: UUID
+    text: str
+    start_time: float
+    end_time: float
+    score: float
+
+
+def _meeting_id_to_uuid(mid: MeetingId | None) -> UUID:
+    if mid is None:
+        msg = "meeting_id is required for RetrievalResult"
+        raise ValueError(msg)
+    return UUID(str(mid))
+
+
+async def retrieve_segments(
+    query: str,
+    embedder: EmbedderProtocol,
+    segment_repo: SegmentSearchProtocol,
+    meeting_id: MeetingId | None = None,
+    top_k: int = 8,
+) -> list[RetrievalResult]:
+    """Retrieve relevant transcript segments via semantic search."""
+    query_embedding = await embedder.embed(query)
+    results = await segment_repo.search_semantic(
+        query_embedding=query_embedding,
+        limit=top_k,
+        meeting_id=meeting_id,
+    )
+    return [
+        RetrievalResult(
+            segment_id=segment.segment_id,
+            meeting_id=_meeting_id_to_uuid(segment.meeting_id),
+            text=segment.text,
+            start_time=segment.start_time,
+            end_time=segment.end_time,
+            score=score,
+        )
+        for segment, score in results
+    ]
+
+
+async def retrieve_segments_workspace(
+    query: str,
+    embedder: EmbedderProtocol,
+    segment_repo: WorkspaceSegmentSearchProtocol,
+    workspace_id: UUID,
+    project_id: UUID | None = None,
+    top_k: int = 20,
+) -> list[RetrievalResult]:
+    """Retrieve relevant transcript segments across workspace/project via semantic search."""
+    query_embedding = await embedder.embed(query)
+    results = await segment_repo.search_semantic_workspace(
+        query_embedding=query_embedding,
+        workspace_id=workspace_id,
+        project_id=project_id,
+        limit=top_k,
+    )
+    return [
+        RetrievalResult(
+            segment_id=segment.segment_id,
+            meeting_id=_meeting_id_to_uuid(segment.meeting_id),
+            text=segment.text,
+            start_time=segment.start_time,
+            end_time=segment.end_time,
+            score=score,
+        )
+        for segment, score in results
+    ]
+
+
+async def _embed_batch_fallback(
+    texts: Sequence[str],
+    embedder: EmbedderProtocol,
+) -> list[list[float]]:
+    """Embed multiple texts, using batch API if available or parallel fallback."""
+    if isinstance(embedder, BatchEmbedderProtocol):
+        return await embedder.embed_batch(texts)
+
+    semaphore = asyncio.Semaphore(MAX_CONCURRENT_OPERATIONS)
+
+    async def _bounded_embed(text: str) -> list[float]:
+        async with semaphore:
+            return await embedder.embed(text)
+
+    return list(await asyncio.gather(*(_bounded_embed(t) for t in texts)))
+
+
+async def retrieve_segments_batch(
+    queries: Sequence[str],
+    embedder: EmbedderProtocol,
+    segment_repo: SegmentSearchProtocol,
+    meeting_id: MeetingId | None = None,
+    top_k: int = 8,
+) -> list[list[RetrievalResult]]:
+    """Retrieve segments for multiple queries in parallel.
+
+    Uses batch embedding when available, then parallel search execution.
+    Returns results in the same order as input queries.
+    """
+    if not queries:
+        return []
+    embeddings = await _embed_batch_fallback(list(queries), embedder)
+
+    semaphore = asyncio.Semaphore(MAX_CONCURRENT_OPERATIONS)
+
+    async def _search(emb: list[float]) -> list[RetrievalResult]:
+        async with semaphore:
+            results = await segment_repo.search_semantic(
+                query_embedding=emb,
+                limit=top_k,
+                meeting_id=meeting_id,
+            )
+            return [
+                RetrievalResult(
+                    segment_id=seg.segment_id,
+                    meeting_id=_meeting_id_to_uuid(seg.meeting_id),
+                    text=seg.text,
+                    start_time=seg.start_time,
+                    end_time=seg.end_time,
+                    score=score,
+                )
+                for seg, score in results
+            ]
+
+    search_results = await asyncio.gather(*(_search(emb) for emb in embeddings))
+    return list(search_results)
+
+
+async def retrieve_segments_workspace_batch(
+    queries: Sequence[str],
+    embedder: EmbedderProtocol,
+    segment_repo: WorkspaceSegmentSearchProtocol,
+    workspace_id: UUID,
+    project_id: UUID | None = None,
+    top_k: int = 20,
+) -> list[list[RetrievalResult]]:
+    """Retrieve workspace segments for multiple queries in parallel.
+
+    Uses batch embedding when available, then parallel search execution.
+    Returns results in the same order as input queries.
+    """
+    if not queries:
+        return []
+    embeddings = await _embed_batch_fallback(list(queries), embedder)
+
+    semaphore = asyncio.Semaphore(MAX_CONCURRENT_OPERATIONS)
+
+    async def _search(emb: list[float]) -> list[RetrievalResult]:
+        async with semaphore:
+            results = await segment_repo.search_semantic_workspace(
+                query_embedding=emb,
+                workspace_id=workspace_id,
+                project_id=project_id,
+                limit=top_k,
+            )
+            return [
+                RetrievalResult(
+                    segment_id=seg.segment_id,
+                    meeting_id=_meeting_id_to_uuid(seg.meeting_id),
+                    text=seg.text,
+                    start_time=seg.start_time,
+                    end_time=seg.end_time,
+                    score=score,
+                )
+                for seg, score in results
+            ]
+
+    search_results = await asyncio.gather(*(_search(emb) for emb in embeddings))
+    return list(search_results)
--- a/src/noteflow/infrastructure/ai/tools/synthesis.py
+++ b/src/noteflow/infrastructure/ai/tools/synthesis.py
@@ -0,0 +1,60 @@
+"""Answer synthesis tools for LangGraph workflows."""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Final, Protocol
+
+if TYPE_CHECKING:
+    from noteflow.infrastructure.ai.tools.retrieval import RetrievalResult
+
+
+class LLMProtocol(Protocol):
+    async def complete(self, prompt: str) -> str: ...
+
+
+@dataclass(frozen=True)
+class SynthesisResult:
+    answer: str
+    cited_segment_ids: list[int]
+
+
+SYNTHESIS_PROMPT_TEMPLATE: Final[
+    str
+] = """Answer the question based on the following transcript segments.
+Cite specific segments by their ID when making claims.
+
+Question: {question}
+
+Segments:
+{segments}
+
+Answer (cite segment IDs in brackets like [1], [3]):"""
+
+CITATION_PATTERN: Final[re.Pattern[str]] = re.compile(r"\[(\d+)\]")
+
+
+async def synthesize_answer(
+    question: str,
+    segments: list[RetrievalResult],
+    llm: LLMProtocol,
+) -> SynthesisResult:
+    """Generate answer with segment citations using LLM."""
+    segment_text = "\n".join(
+        f"[{s.segment_id}] ({s.start_time:.1f}s-{s.end_time:.1f}s): {s.text}" for s in segments
+    )
+    prompt = SYNTHESIS_PROMPT_TEMPLATE.format(
+        question=question,
+        segments=segment_text,
+    )
+    answer = await llm.complete(prompt)
+    valid_ids = {s.segment_id for s in segments}
+    cited_ids = extract_cited_ids(answer, valid_ids)
+    return SynthesisResult(answer=answer, cited_segment_ids=cited_ids)
+
+
+def extract_cited_ids(answer: str, valid_ids: set[int]) -> list[int]:
+    matches = CITATION_PATTERN.findall(answer)
+    cited = [int(m) for m in matches if int(m) in valid_ids]
+    return list(dict.fromkeys(cited))
--- a/tests/domain/ai/init.py
+++ b/tests/domain/ai/init.py
@@ -0,0 +1 @@
+"""Tests for domain/ai/ module."""
--- a/tests/domain/ai/test_citations.py
+++ b/tests/domain/ai/test_citations.py
@@ -0,0 +1,118 @@
+from uuid import uuid4
+
+import pytest
+
+from noteflow.domain.ai.citations import SegmentCitation
+
+
+class TestSegmentCitation:
+    def test_creation_with_valid_values(self) -> None:
+        meeting_id = uuid4()
+        citation = SegmentCitation(
+            meeting_id=meeting_id,
+            segment_id=1,
+            start_time=0.0,
+            end_time=5.0,
+            text="Test segment text",
+            score=0.95,
+        )
+
+        assert citation.meeting_id == meeting_id
+        assert citation.segment_id == 1
+        assert citation.start_time == 0.0
+        assert citation.end_time == 5.0
+        assert citation.text == "Test segment text"
+        assert citation.score == 0.95
+
+    def test_duration_property(self) -> None:
+        citation = SegmentCitation(
+            meeting_id=uuid4(),
+            segment_id=1,
+            start_time=10.0,
+            end_time=25.0,
+            text="Test",
+        )
+
+        assert citation.duration == 15.0
+
+    def test_default_score_is_zero(self) -> None:
+        citation = SegmentCitation(
+            meeting_id=uuid4(),
+            segment_id=1,
+            start_time=0.0,
+            end_time=1.0,
+            text="Test",
+        )
+
+        assert citation.score == 0.0
+
+    def test_rejects_negative_segment_id(self) -> None:
+        with pytest.raises(ValueError, match="segment_id must be non-negative"):
+            SegmentCitation(
+                meeting_id=uuid4(),
+                segment_id=-1,
+                start_time=0.0,
+                end_time=5.0,
+                text="Test",
+            )
+
+    def test_rejects_negative_start_time(self) -> None:
+        with pytest.raises(ValueError, match="start_time must be non-negative"):
+            SegmentCitation(
+                meeting_id=uuid4(),
+                segment_id=1,
+                start_time=-1.0,
+                end_time=5.0,
+                text="Test",
+            )
+
+    def test_rejects_end_time_before_start_time(self) -> None:
+        with pytest.raises(ValueError, match="end_time must be >= start_time"):
+            SegmentCitation(
+                meeting_id=uuid4(),
+                segment_id=1,
+                start_time=10.0,
+                end_time=5.0,
+                text="Test",
+            )
+
+    @pytest.mark.parametrize(
+        "invalid_score",
+        [
+            pytest.param(-0.1, id="negative"),
+            pytest.param(1.1, id="above_one"),
+        ],
+    )
+    def test_rejects_invalid_score(self, invalid_score: float) -> None:
+        with pytest.raises(ValueError, match="score must be between 0 and 1"):
+            SegmentCitation(
+                meeting_id=uuid4(),
+                segment_id=1,
+                start_time=0.0,
+                end_time=5.0,
+                text="Test",
+                score=invalid_score,
+            )
+
+    def test_accepts_zero_duration(self) -> None:
+        citation = SegmentCitation(
+            meeting_id=uuid4(),
+            segment_id=1,
+            start_time=5.0,
+            end_time=5.0,
+            text="Instant moment",
+        )
+
+        assert citation.duration == 0.0
+
+    def test_is_frozen(self) -> None:
+        citation = SegmentCitation(
+            meeting_id=uuid4(),
+            segment_id=1,
+            start_time=0.0,
+            end_time=5.0,
+            text="Test",
+        )
+
+        with pytest.raises(AttributeError):
+            citation.text = "Modified"  # type: ignore[misc]
--- a/tests/infrastructure/ai/init.py
+++ b/tests/infrastructure/ai/init.py
@@ -0,0 +1 @@
+"""Tests for infrastructure/ai/ module."""
--- a/tests/infrastructure/ai/test_retrieval.py
+++ b/tests/infrastructure/ai/test_retrieval.py
@@ -0,0 +1,268 @@
+from collections.abc import Sequence
+from dataclasses import dataclass
+from unittest.mock import AsyncMock
+from uuid import uuid4
+
+import pytest
+
+from noteflow.infrastructure.ai.tools.retrieval import (
+    BatchEmbedderProtocol,
+    RetrievalResult,
+    retrieve_segments,
+    retrieve_segments_batch,
+)
+
+
+@dataclass
+class MockSegment:
+    segment_id: int
+    meeting_id: object
+    text: str
+    start_time: float
+    end_time: float
+
+
+class TestRetrieveSegments:
+    @pytest.fixture
+    def mock_embedder(self) -> AsyncMock:
+        embedder = AsyncMock()
+        embedder.embed.return_value = [0.1, 0.2, 0.3]
+        return embedder
+
+    @pytest.fixture
+    def mock_segment_repo(self) -> AsyncMock:
+        return AsyncMock()
+
+    @pytest.fixture
+    def sample_meeting_id(self) -> object:
+        return uuid4()
+
+    async def test_retrieve_segments_success(
+        self,
+        mock_embedder: AsyncMock,
+        mock_segment_repo: AsyncMock,
+        sample_meeting_id: object,
+    ) -> None:
+        segment = MockSegment(
+            segment_id=1,
+            meeting_id=sample_meeting_id,
+            text="Test segment",
+            start_time=0.0,
+            end_time=5.0,
+        )
+        mock_segment_repo.search_semantic.return_value = [(segment, 0.95)]
+
+        results = await retrieve_segments(
+            query="test query",
+            embedder=mock_embedder,
+            segment_repo=mock_segment_repo,
+            meeting_id=sample_meeting_id,  # type: ignore[arg-type]
+            top_k=5,
+        )
+
+        assert len(results) == 1
+        assert results[0].segment_id == 1
+        assert results[0].text == "Test segment"
+        assert results[0].score == 0.95
+
+    async def test_retrieve_segments_calls_embedder_with_query(
+        self,
+        mock_embedder: AsyncMock,
+        mock_segment_repo: AsyncMock,
+    ) -> None:
+        mock_segment_repo.search_semantic.return_value = []
+
+        await retrieve_segments(
+            query="what happened in the meeting",
+            embedder=mock_embedder,
+            segment_repo=mock_segment_repo,
+        )
+
+        mock_embedder.embed.assert_called_once_with("what happened in the meeting")
+
+    async def test_retrieve_segments_passes_embedding_to_repo(
+        self,
+        mock_embedder: AsyncMock,
+        mock_segment_repo: AsyncMock,
+    ) -> None:
+        mock_embedder.embed.return_value = [1.0, 2.0, 3.0]
+        mock_segment_repo.search_semantic.return_value = []
+
+        await retrieve_segments(
+            query="test",
+            embedder=mock_embedder,
+            segment_repo=mock_segment_repo,
+            top_k=10,
+        )
+
+        mock_segment_repo.search_semantic.assert_called_once_with(
+            query_embedding=[1.0, 2.0, 3.0],
+            meeting_id=None,
+            limit=10,
+        )
+
+    async def test_retrieve_segments_empty_result(
+        self,
+        mock_embedder: AsyncMock,
+        mock_segment_repo: AsyncMock,
+    ) -> None:
+        mock_segment_repo.search_semantic.return_value = []
+
+        results = await retrieve_segments(
+            query="test",
+            embedder=mock_embedder,
+            segment_repo=mock_segment_repo,
+        )
+
+        assert results == []
+
+    async def test_retrieval_result_is_frozen(self) -> None:
+        result = RetrievalResult(
+            segment_id=1,
+            meeting_id=uuid4(),
+            text="Test",
+            start_time=0.0,
+            end_time=5.0,
+            score=0.9,
+        )
+
+        with pytest.raises(AttributeError):
+            result.text = "Modified"  # type: ignore[misc]
+
+
+class MockBatchEmbedder:
+    def __init__(self, embedding: list[float]) -> None:
+        self._embedding = embedding
+        self.embed_calls: list[str] = []
+        self.embed_batch_calls: list[Sequence[str]] = []
+
+    async def embed(self, text: str) -> list[float]:
+        self.embed_calls.append(text)
+        return self._embedding
+
+    async def embed_batch(self, texts: Sequence[str]) -> list[list[float]]:
+        self.embed_batch_calls.append(texts)
+        return [self._embedding for _ in texts]
+
+
+class TestRetrieveSegmentsBatch:
+    @pytest.fixture
+    def mock_embedder(self) -> AsyncMock:
+        embedder = AsyncMock()
+        embedder.embed.return_value = [0.1, 0.2, 0.3]
+        return embedder
+
+    @pytest.fixture
+    def batch_embedder(self) -> MockBatchEmbedder:
+        return MockBatchEmbedder([0.1, 0.2, 0.3])
+
+    @pytest.fixture
+    def mock_segment_repo(self) -> AsyncMock:
+        return AsyncMock()
+
+    @pytest.fixture
+    def sample_meeting_id(self) -> object:
+        return uuid4()
+
+    async def test_batch_returns_empty_for_no_queries(
+        self,
+        mock_embedder: AsyncMock,
+        mock_segment_repo: AsyncMock,
+    ) -> None:
+        results = await retrieve_segments_batch(
+            queries=[],
+            embedder=mock_embedder,
+            segment_repo=mock_segment_repo,
+        )
+
+        assert results == []
+        mock_embedder.embed.assert_not_called()
+
+    async def test_batch_uses_embed_batch_when_available(
+        self,
+        batch_embedder: MockBatchEmbedder,
+        mock_segment_repo: AsyncMock,
+        sample_meeting_id: object,
+    ) -> None:
+        segment = MockSegment(
+            segment_id=1,
+            meeting_id=sample_meeting_id,
+            text="Test",
+            start_time=0.0,
+            end_time=5.0,
+        )
+        mock_segment_repo.search_semantic.return_value = [(segment, 0.9)]
+
+        assert isinstance(batch_embedder, BatchEmbedderProtocol)
+
+        results = await retrieve_segments_batch(
+            queries=["query1", "query2"],
+            embedder=batch_embedder,
+            segment_repo=mock_segment_repo,
+            meeting_id=sample_meeting_id,  # type: ignore[arg-type]
+        )
+
+        assert len(results) == 2
+        assert len(batch_embedder.embed_batch_calls) == 1
+        assert list(batch_embedder.embed_batch_calls[0]) == ["query1", "query2"]
+        assert batch_embedder.embed_calls == []
+
+    async def test_batch_falls_back_to_parallel_embed(
+        self,
+        mock_embedder: AsyncMock,
+        mock_segment_repo: AsyncMock,
+        sample_meeting_id: object,
+    ) -> None:
+        segment = MockSegment(
+            segment_id=1,
+            meeting_id=sample_meeting_id,
+            text="Test",
+            start_time=0.0,
+            end_time=5.0,
+        )
+        mock_segment_repo.search_semantic.return_value = [(segment, 0.9)]
+
+        results = await retrieve_segments_batch(
+            queries=["query1", "query2"],
+            embedder=mock_embedder,
+            segment_repo=mock_segment_repo,
+            meeting_id=sample_meeting_id,  # type: ignore[arg-type]
+        )
+
+        assert len(results) == 2
+        assert mock_embedder.embed.call_count == 2
+
+    async def test_batch_preserves_query_order(
+        self,
+        mock_segment_repo: AsyncMock,
+        sample_meeting_id: object,
+    ) -> None:
+        segment1 = MockSegment(1, sample_meeting_id, "First", 0.0, 5.0)
+        segment2 = MockSegment(2, sample_meeting_id, "Second", 5.0, 10.0)
+
+        call_count = 0
+
+        async def side_effect(
+            query_embedding: list[float],
+            limit: int,
+            meeting_id: object,
+        ) -> list[tuple[MockSegment, float]]:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return [(segment1, 0.9)]
+            return [(segment2, 0.8)]
+
+        mock_segment_repo.search_semantic.side_effect = side_effect
+
+        embedder = MockBatchEmbedder([0.1, 0.2])
+        results = await retrieve_segments_batch(
+            queries=["first", "second"],
+            embedder=embedder,
+            segment_repo=mock_segment_repo,
+            meeting_id=sample_meeting_id,  # type: ignore[arg-type]
+        )
+
+        assert len(results) == 2
+        assert results[0][0].text == "First"
+        assert results[1][0].text == "Second"
--- a/tests/infrastructure/ai/test_synthesis.py
+++ b/tests/infrastructure/ai/test_synthesis.py
@@ -0,0 +1,149 @@
+from unittest.mock import AsyncMock
+from uuid import uuid4
+
+import pytest
+
+from noteflow.infrastructure.ai.tools.retrieval import RetrievalResult
+from noteflow.infrastructure.ai.tools.synthesis import (
+    SynthesisResult,
+    extract_cited_ids,
+    synthesize_answer,
+)
+
+
+class TestSynthesizeAnswer:
+    @pytest.fixture
+    def mock_llm(self) -> AsyncMock:
+        return AsyncMock()
+
+    @pytest.fixture
+    def sample_segments(self) -> list[RetrievalResult]:
+        meeting_id = uuid4()
+        return [
+            RetrievalResult(
+                segment_id=1,
+                meeting_id=meeting_id,
+                text="John discussed the project timeline",
+                start_time=0.0,
+                end_time=5.0,
+                score=0.95,
+            ),
+            RetrievalResult(
+                segment_id=3,
+                meeting_id=meeting_id,
+                text="The deadline is next Friday",
+                start_time=10.0,
+                end_time=15.0,
+                score=0.85,
+            ),
+        ]
+
+    async def test_synthesize_answer_returns_result(
+        self,
+        mock_llm: AsyncMock,
+        sample_segments: list[RetrievalResult],
+    ) -> None:
+        mock_llm.complete.return_value = "The project deadline is next Friday [3]."
+
+        result = await synthesize_answer(
+            question="What is the deadline?",
+            segments=sample_segments,
+            llm=mock_llm,
+        )
+
+        assert isinstance(result, SynthesisResult)
+        assert "deadline" in result.answer.lower()
+
+    async def test_synthesize_answer_extracts_citations(
+        self,
+        mock_llm: AsyncMock,
+        sample_segments: list[RetrievalResult],
+    ) -> None:
+        mock_llm.complete.return_value = "John discussed timelines [1] and the deadline [3]."
+
+        result = await synthesize_answer(
+            question="What happened?",
+            segments=sample_segments,
+            llm=mock_llm,
+        )
+
+        assert result.cited_segment_ids == [1, 3]
+
+    async def test_synthesize_answer_filters_invalid_citations(
+        self,
+        mock_llm: AsyncMock,
+        sample_segments: list[RetrievalResult],
+    ) -> None:
+        mock_llm.complete.return_value = "Found [1], [99], and [3]."
+
+        result = await synthesize_answer(
+            question="What happened?",
+            segments=sample_segments,
+            llm=mock_llm,
+        )
+
+        assert 99 not in result.cited_segment_ids
+        assert result.cited_segment_ids == [1, 3]
+
+    async def test_synthesize_answer_builds_prompt_with_segments(
+        self,
+        mock_llm: AsyncMock,
+        sample_segments: list[RetrievalResult],
+    ) -> None:
+        mock_llm.complete.return_value = "Answer."
+
+        await synthesize_answer(
+            question="What is happening?",
+            segments=sample_segments,
+            llm=mock_llm,
+        )
+
+        call_args = mock_llm.complete.call_args
+        prompt = call_args[0][0]
+        assert "What is happening?" in prompt
+        assert "[1]" in prompt
+        assert "[3]" in prompt
+        assert "John discussed" in prompt
+
+
+class TestExtractCitedIds:
+    def test_extracts_single_citation(self) -> None:
+        result = extract_cited_ids("The answer is here [5].", {1, 3, 5})
+
+        assert result == [5]
+
+    def test_extracts_multiple_citations(self) -> None:
+        result = extract_cited_ids("See [1] and [3] for details.", {1, 3, 5})
+
+        assert result == [1, 3]
+
+    def test_filters_invalid_ids(self) -> None:
+        result = extract_cited_ids("See [1] and [99].", {1, 3, 5})
+
+        assert result == [1]
+
+    def test_deduplicates_citations(self) -> None:
+        result = extract_cited_ids("See [1] and then [1] again.", {1, 3})
+
+        assert result == [1]
+
+    def test_preserves_order(self) -> None:
+        result = extract_cited_ids("[3] comes first, then [1].", {1, 3})
+
+        assert result == [3, 1]
+
+    def test_empty_for_no_citations(self) -> None:
+        result = extract_cited_ids("No citations here.", {1, 3})
+
+        assert result == []
+
+
+class TestSynthesisResult:
+    def test_is_frozen(self) -> None:
+        result = SynthesisResult(
+            answer="Test answer",
+            cited_segment_ids=[1, 2],
+        )
+
+        with pytest.raises(AttributeError):
+            result.answer = "Modified"  # type: ignore[misc]
--- a/typings/langgraph/init.pyi
+++ b/typings/langgraph/init.pyi
@@ -0,0 +1 @@
+# Type stubs for langgraph
--- a/typings/langgraph/checkpoint/postgres/init.pyi
+++ b/typings/langgraph/checkpoint/postgres/init.pyi
@@ -0,0 +1 @@
+# Type stubs for langgraph-checkpoint-postgres
--- a/typings/langgraph/checkpoint/postgres/aio.pyi
+++ b/typings/langgraph/checkpoint/postgres/aio.pyi
@@ -0,0 +1,10 @@
+# Type stubs for langgraph-checkpoint-postgres async module
+
+class AsyncPostgresSaver:
+    """Async PostgreSQL checkpointer for LangGraph.
+
+    This stub provides typing for the langgraph-checkpoint-postgres package.
+    """
+
+    def __init__(self, pool: object) -> None: ...
+    async def setup(self) -> None: ...
--- a/typings/langgraph/graph/init.pyi
+++ b/typings/langgraph/graph/init.pyi
@@ -0,0 +1,41 @@
+# Type stubs for langgraph.graph
+from collections.abc import Callable, Coroutine
+from typing import Generic, TypeVar
+
+_StateT = TypeVar("_StateT")
+
+class START:
+    """Sentinel for graph start node."""
+
+    pass
+
+class END:
+    """Sentinel for graph end node."""
+
+    pass
+
+class CompiledStateGraph(Generic[_StateT]):
+    """Compiled state graph that can be invoked."""
+
+    async def ainvoke(self, input: _StateT) -> _StateT: ...
+    def invoke(self, input: _StateT) -> _StateT: ...
+
+class StateGraph(Generic[_StateT]):
+    """State graph builder.
+
+    This stub provides typing for langgraph StateGraph.
+    """
+
+    def __init__(self, state_schema: type[_StateT]) -> None: ...
+    def add_node(
+        self,
+        name: str,
+        action: Callable[[_StateT], dict[str, object]]
+        | Callable[[_StateT], Coroutine[object, object, dict[str, object]]],
+    ) -> None: ...
+    def add_edge(
+        self,
+        start_key: str | type[START],
+        end_key: str | type[END],
+    ) -> None: ...
+    def compile(self) -> CompiledStateGraph[_StateT]: ...
				`@@ -0,0 +1 @@`
				`# Type stubs for langgraph-checkpoint-postgres`