Merge pull request #3 from lfglabs-dev/fixes

Fixes
2025-12-23 00:40:42 +03:00
parent 8896410830 f20b84f172
commit 045b704712
47 changed files with 2363 additions and 5155 deletions
--- a/.cursor/rules/dashboard.mdc
+++ b/.cursor/rules/dashboard.mdc
@@ -43,10 +43,11 @@ headers: { 'Authorization': `Bearer ${token}` }
 ```
 dashboard/src/
 ├── app/              # Next.js App Router pages
+│   ├── control/      # Mission control panel (chat interface)
+│   ├── history/      # Agents page - mission history + tree modal
 │   ├── console/      # SSH terminal + file explorer
-│   ├── control/      # Task control panel
-│   ├── agents/       # Agent tree visualization
 │   ├── modules/      # MCP module management
+│   ├── files/        # File explorer
 │   └── settings/     # Configuration
 ├── components/
 │   ├── ui/           # Generic UI components
@@ -56,6 +57,17 @@ dashboard/src/
 └── lib/              # Utilities (api, auth, settings)
 ```

+## Navigation Structure
+
+| Nav Item | Route | Description |
+|----------|-------|-------------|
+| Overview | `/` | Global stats and recent missions |
+| Mission | `/control` | Active mission chat interface |
+| Agents | `/history` | Mission history with tree visualization modal |
+| Console | `/console` | SSH terminal access |
+| Modules | `/modules` | MCP server management |
+| Settings | `/settings` | Configuration |
+
 ## Refresh Resilience Pattern

 The dashboard maintains state snapshots on the backend so users can refresh or navigate away without losing visual state.
@@ -96,7 +108,11 @@ useEffect(() => {

 ## Agent Tree Visualization

-Dynamic, animated tree visualization for hierarchical agent execution. Uses SVG + framer-motion for smooth animations.
+Dynamic, animated tree visualization for hierarchical agent execution. Uses SVG + framer-motion for smooth animations. 
+
+The tree is accessed via a **modal** from the Agents page (formerly History):
+- Click the tree icon on any mission row to open the full-screen modal
+- Modal shows the agent execution tree with pan/zoom controls

 ### Component Structure

@@ -114,48 +130,45 @@ components/agent-tree/
 ```tsx
 import { 
  AgentTreeCanvas, 
-  generateComplexTree, 
-  simulateTreeUpdates,
  type AgentNode 
 } from '@/components/agent-tree';

-// With real data from backend
+// Full mode (in Control page panel)
 <AgentTreeCanvas 
  tree={agentTree} 
  selectedNodeId={selectedId}
  onSelectNode={(node) => setSelectedId(node?.id ?? null)} 
 />

-// Demo mode (for testing without API)
-const [tree, setTree] = useState(generateComplexTree());
-useEffect(() => simulateTreeUpdates(tree, setTree), []);
-<AgentTreeCanvas tree={tree} />
+// Compact mode (in History page preview, side panels)
+<AgentTreeCanvas tree={agentTree} compact className="w-full h-full" />
 ```

+### Props
+
+| Prop | Type | Description |
+|------|------|-------------|
+| `tree` | `AgentNode \| null` | Tree data to visualize |
+| `onSelectNode` | `(node: AgentNode \| null) => void` | Node selection callback |
+| `selectedNodeId` | `string \| null` | Currently selected node ID |
+| `compact` | `boolean` | Compact mode - hides minimap and details panel |
+| `className` | `string` | Additional CSS classes |
+
 ### Node Display

 Each node shows:
 - **Icon**: Agent type (Bot, Brain, Cpu, Zap, Target, GitBranch)
 - **Name**: Truncated agent name
- **Model**: LLM model used (e.g., `claude-sonnet-4.5`)
+- **Model**: LLM model used (e.g., `gemini-3-flash`)
 - **Status**: Running (pulse), Completed (✓), Failed (✗), Pending (clock)
 - **Budget**: Spent / Allocated (e.g., `$0.35 / $9.00`)

-### Demo Modes
-
-Three demo tree generators for testing:
- `generateSimpleTree()` – Basic orchestrator (5 nodes)
- `generateComplexTree()` – Subtask decomposition (10-15 nodes)
- `generateDeepTree(depth)` – Recursive nesting (50+ nodes)
-
-Use `simulateTreeUpdates(tree, setTree)` to simulate live status changes.
-
 ### Interactions

 - **Pan**: Click and drag to move the tree
 - **Zoom**: Mouse wheel or +/- buttons
- **Select**: Click a node to show details panel
- **Reset**: Reset view button restores initial position
+- **Select**: Click a node to show details panel (full mode only)
+- **Fit**: Reset view button fits tree to viewport

 ## Environment Variables

--- a/.cursor/rules/project.mdc
+++ b/.cursor/rules/project.mdc
@@ -1,5 +1,5 @@
 ---
-description: Core Open Agent architecture - hierarchical agent system with full machine access
+description: Core Open Agent architecture - SimpleAgent system with full machine access
 alwaysApply: true
 ---

@@ -13,26 +13,30 @@ Minimal autonomous coding agent in Rust with **full machine access** (not sandbo
 |-----------|----------|---------|
 | Backend (Rust) | `src/` | HTTP API + agent system |
 | Dashboard (Next.js) | `dashboard/` | Web UI (Bun, not npm) |
+| iOS Dashboard | `ios_dashboard/` | Native iOS app (Swift/SwiftUI) |
 | MCP configs | `.open_agent/mcp/config.json` | Model Context Protocol servers |
 | Tuning | `.open_agent/tuning.json` | Calibration data |

 ## Architecture

 ```
-RootAgent (orchestrator)
-├── ComplexityEstimator (leaf) → estimates task difficulty 0-1
-├── ModelSelector (leaf) → U-curve cost optimization
-├── TaskExecutor (leaf) → runs tools in a loop
-└── Verifier (leaf) → hybrid programmatic + LLM verification
+SimpleAgent
+    └── TaskExecutor → runs tools in a loop with auto-upgrade
 ```

+The agent system was simplified from a complex hierarchical orchestrator to a single `SimpleAgent` that:
+- Automatically upgrades outdated model names via `ModelResolver`
+- Uses `TaskExecutor` for tool-based execution
+- Supports model overrides per mission/message
+- Handles parallel mission execution
+
 ### Module Map

 ```
 src/
-├── agents/           # Hierarchical agent system
-│   ├── orchestrator/ # RootAgent, NodeAgent
-│   └── leaf/         # ComplexityEstimator, ModelSelector, TaskExecutor, Verifier
+├── agents/           # Agent system
+│   ├── simple.rs     # SimpleAgent (main entry point)
+│   └── leaf/         # TaskExecutor
 ├── budget/           # Cost tracking, pricing, smart retry
 │   ├── benchmarks.rs # Model capability scores from llm-stats.com
 │   ├── pricing.rs    # OpenRouter pricing + model allowlist
@@ -79,19 +83,44 @@ src/
 | Method | Path | Purpose |
 |--------|------|---------|
 | `POST` | `/api/control/message` | Send message to agent |
+| `POST` | `/api/control/tool_result` | Submit frontend tool result |
 | `GET` | `/api/control/stream` | SSE event stream |
 | `POST` | `/api/control/cancel` | Cancel current execution |
 | `GET` | `/api/control/tree` | Get agent tree snapshot (refresh resilience) |
 | `GET` | `/api/control/progress` | Get execution progress ("Subtask X/Y") |
+
+### Mission Endpoints
+
+| Method | Path | Purpose |
+|--------|------|---------|
 | `GET` | `/api/control/missions` | List all missions |
-| `POST` | `/api/control/missions` | Create new mission |
-| `GET` | `/api/control/missions/current` | Get current mission |
+| `POST` | `/api/control/missions` | Create new mission (optional: title, model_override) |
+| `GET` | `/api/control/missions/current` | Get current active mission |
+| `GET` | `/api/control/missions/:id` | Get specific mission |
+| `GET` | `/api/control/missions/:id/tree` | Get mission's agent tree |
 | `POST` | `/api/control/missions/:id/load` | Switch to mission |
+| `POST` | `/api/control/missions/:id/status` | Set mission status |
 | `POST` | `/api/control/missions/:id/cancel` | Cancel specific mission |
+| `POST` | `/api/control/missions/:id/resume` | Resume interrupted mission |
 | `POST` | `/api/control/missions/:id/parallel` | Start mission in parallel |
+
+### Parallel Execution Endpoints
+
+| Method | Path | Purpose |
+|--------|------|---------|
 | `GET` | `/api/control/running` | List running missions |
 | `GET` | `/api/control/parallel/config` | Get parallel execution config |

+### Mission Statuses
+
+Missions can be in one of these states:
+- `active` - Currently being worked on
+- `completed` - Successfully finished
+- `failed` - Failed with errors
+- `interrupted` - Stopped due to server shutdown/cancellation (resumable)
+- `blocked` - Blocked by external factors (resumable)
+- `not_feasible` - Cannot be completed as specified
+
 ## Model Selection (U-Curve)

 - **Cheap models**: low token cost, high failure rate, more retries
--- a/dashboard/src/app/agents/page.tsx
+++ b/dashboard/src/app/agents/page.tsx
@@ -1,594 +0,0 @@
-'use client';
-
-/**
- * Agent Tree Page
- * 
- * Dynamic visualization of the hierarchical agent execution tree.
- * Shows real-time updates as agents are created, run, and complete.
- */
-
-import { useEffect, useMemo, useState, useRef, useCallback } from 'react';
-import Link from 'next/link';
-import { toast } from 'sonner';
-import { cn } from '@/lib/utils';
-import { listMissions, getCurrentMission, streamControl, getAgentTree, getProgress, getMissionTree, Mission, ControlRunState, ExecutionProgress } from '@/lib/api';
-import { ShimmerSidebarItem } from '@/components/ui/shimmer';
-import {
-  AgentTreeCanvas,
-  generateSimpleTree,
-  generateComplexTree,
-  generateDeepTree,
-  simulateTreeUpdates,
-  type AgentNode,
-} from '@/components/agent-tree';
-import {
-  Bot,
-  CheckCircle,
-  XCircle,
-  Loader,
-  Clock,
-  Search,
-  Layers,
-  FlaskConical,
-  Play,
-  Pause,
-  MessageSquare,
-} from 'lucide-react';
-
-function isRecord(value: unknown): value is Record<string, unknown> {
-  return typeof value === 'object' && value !== null;
-}
-
-type DemoMode = 'off' | 'simple' | 'complex' | 'deep';
-
-export default function AgentsPage() {
-  const [missions, setMissions] = useState<Mission[]>([]);
-  const [currentMission, setCurrentMission] = useState<Mission | null>(null);
-  const [controlState, setControlState] = useState<ControlRunState>('idle');
-  const [selectedMissionId, setSelectedMissionId] = useState<string | null>(null);
-  const [searchQuery, setSearchQuery] = useState('');
-  const [loading, setLoading] = useState(true);
-  const [realTree, setRealTree] = useState<AgentNode | null>(null);
-  const [selectedNodeId, setSelectedNodeId] = useState<string | null>(null);
-  const [progress, setProgress] = useState<ExecutionProgress | null>(null);
-  
-  // Demo mode state
-  const [demoMode, setDemoMode] = useState<DemoMode>('off');
-  const [demoTree, setDemoTree] = useState<AgentNode | null>(null);
-  const [demoRunning, setDemoRunning] = useState(false);
-  const demoCleanupRef = useRef<(() => void) | null>(null);
-  
-  const fetchedRef = useRef(false);
-  const streamCleanupRef = useRef<null | (() => void)>(null);
-
-  const selectedMission = useMemo(
-    () => missions.find((m) => m.id === selectedMissionId) ?? currentMission,
-    [missions, selectedMissionId, currentMission]
-  );
-
-  // Convert backend tree node to frontend AgentNode
-  const convertTreeNode = useCallback((node: Record<string, unknown>): AgentNode => {
-    const children = (node['children'] as Record<string, unknown>[] | undefined) ?? [];
-    return {
-      id: String(node['id'] ?? ''),
-      type: (String(node['node_type'] ?? 'Node') as AgentNode['type']),
-      status: (String(node['status'] ?? 'pending') as AgentNode['status']),
-      name: String(node['name'] ?? ''),
-      description: String(node['description'] ?? ''),
-      model: node['selected_model'] != null ? String(node['selected_model']) : undefined,
-      budgetAllocated: Number(node['budget_allocated'] ?? 0),
-      budgetSpent: Number(node['budget_spent'] ?? 0),
-      complexity: node['complexity'] != null ? Number(node['complexity']) : undefined,
-      children: children.map((c) => convertTreeNode(c)),
-    };
-  }, []);
-
-  // Stream control events for real-time status and tree updates
-  // First fetch snapshot, then subscribe to live updates
-  useEffect(() => {
-    streamCleanupRef.current?.();
-    let mounted = true;
-
-    // Fetch initial snapshot for refresh resilience
-    const fetchSnapshot = async () => {
-      try {
-        const [treeSnapshot, progressSnapshot] = await Promise.all([
-          getAgentTree().catch(() => null),
-          getProgress().catch(() => null),
-        ]);
-        if (!mounted) return;
-        
-        if (treeSnapshot) {
-          setRealTree(convertTreeNode(treeSnapshot as unknown as Record<string, unknown>));
-        }
-        if (progressSnapshot) {
-          setProgress(progressSnapshot);
-        }
-      } catch (e) {
-        console.error('Failed to fetch snapshot:', e);
-      }
-    };
-    
-    fetchSnapshot();
-
-    const cleanup = streamControl((event) => {
-      const data: unknown = event.data;
-      if (event.type === 'status' && isRecord(data)) {
-        const st = data['state'];
-        setControlState(typeof st === 'string' ? (st as ControlRunState) : 'idle');
-        
-        // Clear real tree and progress when idle
-        if (st === 'idle') {
-          setRealTree(null);
-          setProgress(null);
-        }
-      }
-      
-      // Handle real-time tree updates
-      if (event.type === 'agent_tree' && isRecord(data)) {
-        const tree = data['tree'];
-        if (isRecord(tree)) {
-          const converted = convertTreeNode(tree);
-          setRealTree(converted);
-        }
-      }
-      
-      // Handle progress updates
-      if (event.type === 'progress' && isRecord(data)) {
-        setProgress({
-          total_subtasks: Number(data['total_subtasks'] ?? 0),
-          completed_subtasks: Number(data['completed_subtasks'] ?? 0),
-          current_subtask: data['current_subtask'] as string | null,
-          current_depth: Number(data['depth'] ?? 0),
-        });
-      }
-    });
-
-    streamCleanupRef.current = cleanup;
-    return () => {
-      mounted = false;
-      streamCleanupRef.current?.();
-      streamCleanupRef.current = null;
-    };
-  }, [convertTreeNode]);
-
-  useEffect(() => {
-    let cancelled = false;
-    let hasShownError = false;
-
-    const fetchData = async () => {
-      try {
-        const [missionsData, currentMissionData] = await Promise.all([
-          listMissions().catch(() => []),
-          getCurrentMission().catch(() => null),
-        ]);
-        if (cancelled) return;
-        
-        fetchedRef.current = true;
-        setMissions(missionsData);
-        setCurrentMission(currentMissionData);
-        
-        if (!selectedMissionId && currentMissionData) {
-          setSelectedMissionId(currentMissionData.id);
-        }
-        hasShownError = false;
-      } catch (error) {
-        if (!hasShownError) {
-          toast.error('Failed to fetch missions');
-          hasShownError = true;
-        }
-        console.error('Failed to fetch data:', error);
-      } finally {
-        if (!cancelled) {
-          setLoading(false);
-        }
-      }
-    };
-
-    fetchData();
-    const interval = setInterval(fetchData, 5000);
-    return () => {
-      cancelled = true;
-      clearInterval(interval);
-    };
-  }, [selectedMissionId]);
-
-  const filteredMissions = useMemo(() => {
-    if (!searchQuery.trim()) return missions;
-    const query = searchQuery.toLowerCase();
-    return missions.filter((m) => 
-      m.title?.toLowerCase().includes(query) || 
-      m.id.toLowerCase().includes(query)
-    );
-  }, [missions, searchQuery]);
-
-  const controlStateToStatus = (state: ControlRunState, missionStatus?: string): AgentNode['status'] => {
-    if (state === 'running' || state === 'waiting_for_tool') return 'running';
-    if (missionStatus === 'completed') return 'completed';
-    if (missionStatus === 'failed') return 'failed';
-    if (missionStatus === 'interrupted') return 'pending'; // Show as pending (resumable)
-    return 'pending';
-  };
-
-  // Build a basic agent tree from mission data when no real tree is available
-  const buildFallbackTree = useCallback((): AgentNode | null => {
-    if (!selectedMission) return null;
-
-    const rootStatus = controlStateToStatus(controlState, selectedMission.status);
-    
-    return {
-      id: 'root',
-      type: 'Root',
-      status: rootStatus,
-      name: 'Root Agent',
-      description: selectedMission.title?.slice(0, 50) || 'Mission ' + selectedMission.id.slice(0, 8),
-      model: 'claude-sonnet-4.5',
-      budgetAllocated: 1000,
-      budgetSpent: 50,
-      children: [
-        {
-          id: 'complexity',
-          type: 'ComplexityEstimator',
-          status: 'completed',
-          name: 'Complexity Estimator',
-          description: 'Estimate task difficulty',
-          model: 'claude-3.5-haiku',
-          budgetAllocated: 10,
-          budgetSpent: 5,
-          complexity: 0.7,
-        },
-        {
-          id: 'model-selector',
-          type: 'ModelSelector',
-          status: 'completed',
-          name: 'Model Selector',
-          description: 'Select optimal model for task',
-          model: 'claude-3.5-haiku',
-          budgetAllocated: 10,
-          budgetSpent: 3,
-        },
-        {
-          id: 'executor',
-          type: 'TaskExecutor',
-          status: rootStatus,
-          name: 'Task Executor',
-          description: 'Execute task using tools',
-          model: 'claude-sonnet-4.5',
-          budgetAllocated: 900,
-          budgetSpent: 35,
-        },
-        {
-          id: 'verifier',
-          type: 'Verifier',
-          status: selectedMission.status === 'completed' ? 'completed' : 
-                  selectedMission.status === 'failed' ? 'failed' : 'pending',
-          name: 'Verifier',
-          description: 'Verify task completion',
-          model: 'claude-3.5-haiku',
-          budgetAllocated: 80,
-          budgetSpent: selectedMission.status === 'completed' ? 7 : 0,
-        },
-      ] as AgentNode[],
-    };
-  }, [selectedMission, controlState]);
-
-  // Load tree for a specific mission
-  const loadMissionTree = useCallback(async (missionId: string) => {
-    try {
-      const tree = await getMissionTree(missionId);
-      if (tree) {
-        setRealTree(convertTreeNode(tree as unknown as Record<string, unknown>));
-      } else {
-        setRealTree(null);
-      }
-    } catch (e) {
-      console.error('Failed to load mission tree:', e);
-      setRealTree(null);
-    }
-  }, [convertTreeNode]);
-
-  // Demo mode handlers
-  const startDemo = useCallback((mode: DemoMode) => {
-    // Cleanup previous demo
-    demoCleanupRef.current?.();
-    
-    if (mode === 'off') {
-      setDemoMode('off');
-      setDemoTree(null);
-      setDemoRunning(false);
-      return;
-    }
-    
-    // Generate demo tree
-    let tree: AgentNode;
-    switch (mode) {
-      case 'simple':
-        tree = generateSimpleTree();
-        break;
-      case 'complex':
-        tree = generateComplexTree();
-        break;
-      case 'deep':
-        tree = generateDeepTree(4);
-        break;
-      default:
-        return;
-    }
-    
-    setDemoMode(mode);
-    setDemoTree(tree);
-    setDemoRunning(true);
-    
-    // Start simulation
-    const cleanup = simulateTreeUpdates(tree, setDemoTree);
-    demoCleanupRef.current = cleanup;
-  }, []);
-  
-  const toggleDemoRunning = useCallback(() => {
-    if (demoRunning) {
-      demoCleanupRef.current?.();
-      demoCleanupRef.current = null;
-      setDemoRunning(false);
-    } else if (demoTree) {
-      const cleanup = simulateTreeUpdates(demoTree, setDemoTree);
-      demoCleanupRef.current = cleanup;
-      setDemoRunning(true);
-    }
-  }, [demoRunning, demoTree]);
-
-  // Load tree when selected mission changes (e.g., on initial page load)
-  // Track which mission's tree we last loaded to avoid redundant fetches
-  const lastLoadedMissionRef = useRef<string | null>(null);
-  useEffect(() => {
-    if (selectedMissionId && selectedMissionId !== lastLoadedMissionRef.current) {
-      lastLoadedMissionRef.current = selectedMissionId;
-      loadMissionTree(selectedMissionId);
-    }
-  }, [selectedMissionId, loadMissionTree]);
-  
-  // Cleanup on unmount
-  useEffect(() => {
-    return () => {
-      demoCleanupRef.current?.();
-    };
-  }, []);
-
-  // Use demo tree when in demo mode, otherwise use real tree or fallback
-  const displayTree = useMemo(() => {
-    if (demoMode !== 'off' && demoTree) {
-      return demoTree;
-    }
-    return realTree ?? buildFallbackTree();
-  }, [demoMode, demoTree, realTree, buildFallbackTree]);
-
-  const isActive = controlState !== 'idle';
-
-  return (
-    <div className="flex h-screen">
-      {/* Mission selector sidebar */}
-      <div className="w-64 border-r border-white/[0.06] glass-panel p-4 flex flex-col">
-        <h2 className="mb-3 text-sm font-medium text-white">Missions</h2>
-        
-        <div className="relative mb-4">
-          <Search className="absolute left-2.5 top-1/2 h-3.5 w-3.5 -translate-y-1/2 text-white/30" />
-          <input
-            type="text"
-            placeholder="Search missions..."
-            value={searchQuery}
-            onChange={(e) => setSearchQuery(e.target.value)}
-            className="w-full rounded-lg border border-white/[0.06] bg-white/[0.02] py-2 pl-8 pr-3 text-xs text-white placeholder-white/30 focus:border-indigo-500/50 focus:outline-none transition-colors"
-          />
-        </div>
-        
-        {isActive && currentMission && (
-          <div className="mb-4 p-3 rounded-xl bg-indigo-500/10 border border-indigo-500/30">
-            <div className="flex items-center gap-2">
-              <Loader className="h-3 w-3 animate-spin text-indigo-400" />
-              <span className="text-xs font-medium text-indigo-400">Active</span>
-            </div>
-            <p className="mt-1 text-xs text-white/60 truncate">
-              {currentMission.title || 'Mission ' + currentMission.id.slice(0, 8)}
-            </p>
-          </div>
-        )}
-        
-        <div className="flex-1 overflow-y-auto space-y-2">
-          {loading ? (
-            <>
-              <ShimmerSidebarItem />
-              <ShimmerSidebarItem />
-              <ShimmerSidebarItem />
-            </>
-          ) : filteredMissions.length === 0 && !currentMission ? (
-            <p className="text-xs text-white/40 py-2">
-              {searchQuery ? 'No missions found' : 'No missions yet'}
-            </p>
-          ) : (
-            <>
-              {currentMission && (!searchQuery || currentMission.title?.toLowerCase().includes(searchQuery.toLowerCase())) && (
-                <button
-                  key={currentMission.id}
-                  onClick={() => {
-                    setSelectedMissionId(currentMission.id);
-                    // Load tree for this mission (either live or saved)
-                    if (selectedMissionId !== currentMission.id) {
-                      loadMissionTree(currentMission.id);
-                    }
-                    if (demoMode !== 'off') startDemo('off');
-                  }}
-                  className={cn(
-                    'w-full rounded-xl p-3 text-left transition-all',
-                    selectedMissionId === currentMission.id && demoMode === 'off'
-                      ? 'bg-white/[0.08] border border-indigo-500/50'
-                      : 'bg-white/[0.02] border border-white/[0.04] hover:bg-white/[0.04] hover:border-white/[0.08]'
-                  )}
-                >
-                  <div className="flex items-center gap-2">
-                    {controlState !== 'idle' ? (
-                      <Loader className="h-3 w-3 animate-spin text-indigo-400" />
-                    ) : currentMission.status === 'completed' ? (
-                      <CheckCircle className="h-3 w-3 text-emerald-400" />
-                    ) : currentMission.status === 'failed' ? (
-                      <XCircle className="h-3 w-3 text-red-400" />
-                    ) : (
-                      <Clock className="h-3 w-3 text-indigo-400" />
-                    )}
-                    <span className="truncate text-sm text-white/80">
-                      {currentMission.title?.slice(0, 25) || 'Current Mission'}
-                    </span>
-                  </div>
-                </button>
-              )}
-              
-              {filteredMissions.filter(m => m.id !== currentMission?.id).map((mission) => (
-                <button
-                  key={mission.id}
-                  onClick={() => {
-                    // Load tree for this mission (either live or saved from database)
-                    if (selectedMissionId !== mission.id) {
-                      loadMissionTree(mission.id);
-                    }
-                    setSelectedMissionId(mission.id);
-                    if (demoMode !== 'off') startDemo('off');
-                  }}
-                  className={cn(
-                    'w-full rounded-xl p-3 text-left transition-all',
-                    selectedMissionId === mission.id && demoMode === 'off'
-                      ? 'bg-white/[0.08] border border-indigo-500/50'
-                      : 'bg-white/[0.02] border border-white/[0.04] hover:bg-white/[0.04] hover:border-white/[0.08]'
-                  )}
-                >
-                  <div className="flex items-center gap-2">
-                    {mission.status === 'active' ? (
-                      <Clock className="h-3 w-3 text-indigo-400" />
-                    ) : mission.status === 'completed' ? (
-                      <CheckCircle className="h-3 w-3 text-emerald-400" />
-                    ) : (
-                      <XCircle className="h-3 w-3 text-red-400" />
-                    )}
-                    <span className="truncate text-sm text-white/80">
-                      {mission.title?.slice(0, 25) || 'Mission ' + mission.id.slice(0, 8)}
-                    </span>
-                  </div>
-                </button>
-              ))}
-            </>
-          )}
-        </div>
-
-        {/* Demo mode controls */}
-        <div className="mt-4 pt-4 border-t border-white/[0.06]">
-          <div className="flex items-center gap-2 mb-3">
-            <FlaskConical className="h-4 w-4 text-amber-400" />
-            <span className="text-xs font-medium text-white/60">Demo Mode</span>
-          </div>
-          
-          <div className="space-y-2">
-            <div className="flex gap-1.5">
-              {(['simple', 'complex', 'deep'] as const).map((mode) => (
-                <button
-                  key={mode}
-                  onClick={() => startDemo(mode)}
-                  className={cn(
-                    'flex-1 px-2 py-1.5 rounded-lg text-xs font-medium transition-all capitalize',
-                    demoMode === mode
-                      ? 'bg-amber-500/20 text-amber-400 border border-amber-500/30'
-                      : 'bg-white/[0.02] text-white/50 border border-white/[0.04] hover:bg-white/[0.04]'
-                  )}
-                >
-                  {mode}
-                </button>
-              ))}
-            </div>
-            
-            {demoMode !== 'off' && (
-              <div className="flex gap-2">
-                <button
-                  onClick={toggleDemoRunning}
-                  className={cn(
-                    'flex-1 flex items-center justify-center gap-1.5 px-3 py-1.5 rounded-lg text-xs font-medium transition-all',
-                    demoRunning
-                      ? 'bg-amber-500/20 text-amber-400 border border-amber-500/30'
-                      : 'bg-white/[0.02] text-white/50 border border-white/[0.04]'
-                  )}
-                >
-                  {demoRunning ? (
-                    <>
-                      <Pause className="h-3 w-3" />
-                      Pause
-                    </>
-                  ) : (
-                    <>
-                      <Play className="h-3 w-3" />
-                      Resume
-                    </>
-                  )}
-                </button>
-                <button
-                  onClick={() => startDemo('off')}
-                  className="px-3 py-1.5 rounded-lg text-xs font-medium bg-white/[0.02] text-white/50 border border-white/[0.04] hover:bg-white/[0.04] transition-all"
-                >
-                  Stop
-                </button>
-              </div>
-            )}
-          </div>
-        </div>
-      </div>
-
-      {/* Agent tree visualization */}
-      <div className="flex-1 flex flex-col overflow-hidden">
-        {/* Header */}
-        <div className="shrink-0 p-6 pb-0">
-          <div className="flex items-center gap-3">
-            <div className="flex h-10 w-10 items-center justify-center rounded-xl bg-indigo-500/10">
-              <Layers className="h-5 w-5 text-indigo-400" />
-            </div>
-            <div>
-              <div className="flex items-center gap-2">
-                <h1 className="text-xl font-semibold text-white">Agent Tree</h1>
-                {demoMode !== 'off' && (
-                  <span className="px-2 py-0.5 rounded-full text-xs font-medium bg-amber-500/20 text-amber-400 border border-amber-500/30">
-                    Demo: {demoMode}
-                  </span>
-                )}
-              </div>
-              <p className="text-sm text-white/50">
-                {demoMode !== 'off' 
-                  ? 'Simulated agent tree with live updates'
-                  : 'Hierarchical agent execution visualization'
-                }
-              </p>
-            </div>
-          </div>
-        </div>
-
-        {/* Tree canvas */}
-        <div className="flex-1 p-6 min-h-0">
-          {!displayTree && (missions.length === 0 && !currentMission) ? (
-            <div className="flex flex-col items-center justify-center h-full">
-              <div className="flex h-16 w-16 items-center justify-center rounded-2xl bg-white/[0.02] mb-4">
-                <MessageSquare className="h-8 w-8 text-white/30" />
-              </div>
-              <p className="text-white/80">No active missions</p>
-              <p className="mt-2 text-sm text-white/40 text-center max-w-sm">
-                Start a conversation in the{' '}
-                <Link href="/control" className="text-indigo-400 hover:text-indigo-300">
-                  Control
-                </Link>{' '}
-                page or try <span className="text-amber-400">Demo Mode</span> in the sidebar
-              </p>
-            </div>
-          ) : (
-            <AgentTreeCanvas
-              tree={displayTree}
-              selectedNodeId={selectedNodeId}
-              onSelectNode={(node) => setSelectedNodeId(node?.id ?? null)}
-              className="w-full h-full rounded-2xl border border-white/[0.06]"
-            />
-          )}
-        </div>
-      </div>
-    </div>
-  );
-}
--- a/dashboard/src/app/console/console-wrapper.tsx
+++ b/dashboard/src/app/console/console-wrapper.tsx
@@ -14,3 +14,6 @@ export function ConsoleWrapper() {



+
+
+
--- a/dashboard/src/app/control/control-client.tsx
+++ b/dashboard/src/app/control/control-client.tsx
@@ -20,6 +20,8 @@ import {
  getProgress,
  getRunningMissions,
  cancelMission,
+  listModels,
+  getModelDisplayName,
  type ControlRunState,
  type Mission,
  type MissionStatus,
@@ -403,6 +405,9 @@ export default function ControlClient() {
  >([]);
  const [uploadQueue, setUploadQueue] = useState<string[]>([]);

+  // Model selection state
+  const [availableModels, setAvailableModels] = useState<string[]>([]);
+
  // Check if the mission we're viewing is actually running (not just any mission)
  const viewingMissionIsRunning = useMemo(() => {
    if (!viewingMissionId) return runState !== "idle";
@@ -489,6 +494,31 @@ export default function ControlClient() {
    return () => document.removeEventListener("mousedown", handleClickOutside);
  }, []);

+  // Handle file upload - wrapped in useCallback to avoid stale closures
+  const handleFileUpload = useCallback(async (file: File) => {
+    setUploadQueue((prev) => [...prev, file.name]);
+
+    try {
+      // Upload to mission-specific context folder if we have a mission
+      const contextPath = currentMission?.id 
+        ? `/root/context/${currentMission.id}/`
+        : "/root/context/";
+      const result = await uploadFile(file, contextPath);
+      toast.success(`Uploaded ${result.name}`);
+
+      // Add a message about the upload
+      setInput((prev) => {
+        const uploadNote = `[Uploaded: ${result.name}]`;
+        return prev ? `${prev}\n${uploadNote}` : uploadNote;
+      });
+    } catch (error) {
+      console.error("Upload failed:", error);
+      toast.error(`Failed to upload ${file.name}`);
+    } finally {
+      setUploadQueue((prev) => prev.filter((name) => name !== file.name));
+    }
+  }, [currentMission?.id]);
+
  // Handle paste to upload files
  useEffect(() => {
    const textarea = textareaRef.current;
@@ -519,28 +549,7 @@ export default function ControlClient() {

    textarea.addEventListener("paste", handlePaste);
    return () => textarea.removeEventListener("paste", handlePaste);
-  }, []);
-
-  // Handle file upload
-  const handleFileUpload = async (file: File) => {
-    setUploadQueue((prev) => [...prev, file.name]);
-
-    try {
-      const result = await uploadFile(file, "/root/context/");
-      toast.success(`Uploaded ${result.name} to /root/context/`);
-
-      // Add a message about the upload
-      setInput((prev) => {
-        const uploadNote = `[Uploaded: ${result.name}]`;
-        return prev ? `${prev}\n${uploadNote}` : uploadNote;
-      });
-    } catch (error) {
-      console.error("Upload failed:", error);
-      toast.error(`Failed to upload ${file.name}`);
-    } finally {
-      setUploadQueue((prev) => prev.filter((name) => name !== file.name));
-    }
-  };
+  }, [handleFileUpload]);

  // Handle file input change
  const handleFileChange = async (
@@ -629,6 +638,17 @@ export default function ControlClient() {
    return () => clearInterval(interval);
  }, []);

+  // Fetch available models for mission creation
+  useEffect(() => {
+    listModels()
+      .then((data) => {
+        setAvailableModels(data.models);
+      })
+      .catch((err) => {
+        console.error("Failed to fetch models:", err);
+      });
+  }, []);
+
  // Handle cancelling a parallel mission
  const handleCancelMission = async (missionId: string) => {
    try {
@@ -643,21 +663,37 @@ export default function ControlClient() {
    }
  };

+  // Track the mission ID being fetched to prevent race conditions
+  const fetchingMissionIdRef = useRef<string | null>(null);
+
  // Handle switching which mission we're viewing
  const handleViewMission = useCallback(
    async (missionId: string) => {
      setViewingMissionId(missionId);
+      fetchingMissionIdRef.current = missionId;

      // Always load fresh history from API when switching missions
      // This ensures we don't show stale cached events
      try {
        const mission = await getMission(missionId);
+        
+        // Race condition guard: only update if this is still the mission we want
+        if (fetchingMissionIdRef.current !== missionId) {
+          return; // Another mission was requested, discard this response
+        }
+        
        const historyItems = missionHistoryToItems(mission);
        setItems(historyItems);
        // Update cache with fresh data
        setMissionItems((prev) => ({ ...prev, [missionId]: historyItems }));
      } catch (err) {
        console.error("Failed to load mission:", err);
+        
+        // Race condition guard: only update if this is still the mission we want
+        if (fetchingMissionIdRef.current !== missionId) {
+          return;
+        }
+        
        // Fallback to cached items if API fails
        if (missionItems[missionId]) {
          setItems(missionItems[missionId]);
@@ -1141,27 +1177,41 @@ export default function ControlClient() {
              <span className="hidden sm:inline">New</span> Mission
            </button>
            {showNewMissionDialog && (
-              <div className="absolute right-0 top-full mt-1 w-72 rounded-lg border border-white/[0.06] bg-[#1a1a1a] p-4 shadow-xl z-10">
+              <div className="absolute right-0 top-full mt-1 w-80 rounded-lg border border-white/[0.06] bg-[#1a1a1a] p-4 shadow-xl z-10">
                <h3 className="text-sm font-medium text-white mb-3">
                  Create New Mission
                </h3>
                <div className="space-y-3">
                  <div>
-                    <label className="block text-xs text-white/50 mb-1">
-                      Model Override (optional)
+                    <label className="block text-xs text-white/50 mb-1.5">
+                      Model
                    </label>
-                    <input
-                      type="text"
+                    <select
                      value={newMissionModel}
                      onChange={(e) => setNewMissionModel(e.target.value)}
-                      placeholder="e.g., deepseek/deepseek-v3.2"
-                      className="w-full rounded-lg border border-white/[0.06] bg-white/[0.02] px-3 py-2 text-sm text-white placeholder-white/30 focus:border-indigo-500/50 focus:outline-none"
-                    />
-                    <p className="text-xs text-white/30 mt-1">
-                      Leave empty to use default model
+                      className="w-full rounded-lg border border-white/[0.06] bg-white/[0.02] px-3 py-2.5 text-sm text-white focus:border-indigo-500/50 focus:outline-none appearance-none cursor-pointer"
+                      style={{
+                        backgroundImage: `url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e")`,
+                        backgroundPosition: "right 0.5rem center",
+                        backgroundRepeat: "no-repeat",
+                        backgroundSize: "1.5em 1.5em",
+                        paddingRight: "2.5rem",
+                      }}
+                    >
+                      <option value="" className="bg-[#1a1a1a]">
+                        Auto (default)
+                      </option>
+                      {availableModels.map((model) => (
+                        <option key={model} value={model} className="bg-[#1a1a1a]">
+                          {getModelDisplayName(model)}
+                        </option>
+                      ))}
+                    </select>
+                    <p className="text-xs text-white/30 mt-1.5">
+                      Auto uses the configured default model
                    </p>
                  </div>
-                  <div className="flex gap-2">
+                  <div className="flex gap-2 pt-1">
                    <button
                      onClick={() => {
                        setShowNewMissionDialog(false);
@@ -1410,7 +1460,7 @@ export default function ControlClient() {
                      it&apos;s busy
                    </p>
                    <p className="mt-1 text-xs text-white/30">
-                      Tip: Paste files directly to upload to /root/context/
+                      Tip: Paste files directly to upload to context folder
                    </p>
                  </>
                )}
--- a/dashboard/src/app/history/page.tsx
+++ b/dashboard/src/app/history/page.tsx
@@ -1,13 +1,14 @@
 "use client";

-import { useEffect, useState, useRef, useMemo } from "react";
+import { useEffect, useState, useRef, useMemo, useCallback } from "react";
 import Link from "next/link";
 import { toast } from "sonner";
 import { cn } from "@/lib/utils";
-import { listTasks, listRuns, listMissions, TaskState, Run, Mission } from "@/lib/api";
+import { listMissions, getMissionTree, Mission } from "@/lib/api";
 import { ShimmerTableRow } from "@/components/ui/shimmer";
 import { CopyButton } from "@/components/ui/copy-button";
 import { RelativeTime } from "@/components/ui/relative-time";
+import { AgentTreeCanvas, type AgentNode } from "@/components/agent-tree";
 import {
  CheckCircle,
  XCircle,
@@ -21,9 +22,11 @@ import {
  ArrowUpDown,
  ArrowUp,
  ArrowDown,
+  Network,
+  X,
 } from "lucide-react";

-const statusIcons = {
+const statusIcons: Record<string, typeof Clock> = {
  pending: Clock,
  running: Loader,
  completed: CheckCircle,
@@ -35,7 +38,7 @@ const statusIcons = {
  not_feasible: XCircle,
 };

-const statusConfig = {
+const statusConfig: Record<string, { color: string; bg: string }> = {
  pending: { color: "text-amber-400", bg: "bg-amber-500/10" },
  running: { color: "text-indigo-400", bg: "bg-indigo-500/10" },
  completed: { color: "text-emerald-400", bg: "bg-emerald-500/10" },
@@ -47,6 +50,10 @@ const statusConfig = {
  not_feasible: { color: "text-rose-400", bg: "bg-rose-500/10" },
 };

+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null;
+}
+
 type SortField = 'date' | 'status' | 'messages';
 type SortDirection = 'asc' | 'desc';

@@ -80,30 +87,47 @@ function SortButton({
  );
 }

+// Convert backend tree node to frontend AgentNode
+function convertTreeNode(node: Record<string, unknown>): AgentNode {
+  const children = (node["children"] as Record<string, unknown>[] | undefined) ?? [];
+  return {
+    id: String(node["id"] ?? ""),
+    type: String(node["node_type"] ?? "Node") as AgentNode["type"],
+    status: String(node["status"] ?? "pending") as AgentNode["status"],
+    name: String(node["name"] ?? ""),
+    description: String(node["description"] ?? ""),
+    model: node["selected_model"] != null ? String(node["selected_model"]) : undefined,
+    budgetAllocated: Number(node["budget_allocated"] ?? 0),
+    budgetSpent: Number(node["budget_spent"] ?? 0),
+    complexity: node["complexity"] != null ? Number(node["complexity"]) : undefined,
+    children: children.map((c) => convertTreeNode(c)),
+  };
+}
+
 export default function HistoryPage() {
-  const [tasks, setTasks] = useState<TaskState[]>([]);
-  const [runs, setRuns] = useState<Run[]>([]);
  const [missions, setMissions] = useState<Mission[]>([]);
  const [loading, setLoading] = useState(true);
  const [filter, setFilter] = useState<string>("all");
  const [search, setSearch] = useState("");
-  const [sortField, setSortField] = useState<SortField>('date');
-  const [sortDirection, setSortDirection] = useState<SortDirection>('desc');
+  const [sortField, setSortField] = useState<SortField>("date");
+  const [sortDirection, setSortDirection] = useState<SortDirection>("desc");
  const fetchedRef = useRef(false);

+  // Tree preview state
+  const [previewMissionId, setPreviewMissionId] = useState<string | null>(null);
+  const [previewTree, setPreviewTree] = useState<AgentNode | null>(null);
+  const [loadingTree, setLoadingTree] = useState(false);
+  
+  // Track the mission ID being fetched to prevent race conditions
+  const fetchingTreeMissionIdRef = useRef<string | null>(null);
+
  useEffect(() => {
    if (fetchedRef.current) return;
    fetchedRef.current = true;

    const fetchData = async () => {
      try {
-        const [tasksData, runsData, missionsData] = await Promise.all([
-          listTasks().catch(() => []),
-          listRuns().catch(() => ({ runs: [] })),
-          listMissions().catch(() => []),
-        ]);
-        setTasks(tasksData);
-        setRuns(runsData.runs || []);
+        const missionsData = await listMissions().catch(() => []);
        setMissions(missionsData);
      } catch (error) {
        console.error("Failed to fetch data:", error);
@@ -116,31 +140,61 @@ export default function HistoryPage() {
    fetchData();
  }, []);

+  // Load tree for preview
+  const handlePreviewTree = useCallback(async (missionId: string) => {
+    if (previewMissionId === missionId) {
+      // Toggle off
+      setPreviewMissionId(null);
+      setPreviewTree(null);
+      fetchingTreeMissionIdRef.current = null;
+      return;
+    }
+
+    setPreviewMissionId(missionId);
+    setLoadingTree(true);
+    fetchingTreeMissionIdRef.current = missionId;
+    
+    try {
+      const tree = await getMissionTree(missionId);
+      
+      // Race condition guard: only update if this is still the mission we want
+      if (fetchingTreeMissionIdRef.current !== missionId) {
+        return; // Another mission was requested, discard this response
+      }
+      
+      if (tree && isRecord(tree)) {
+        setPreviewTree(convertTreeNode(tree as Record<string, unknown>));
+      } else {
+        setPreviewTree(null);
+        toast.error("No tree data available for this mission");
+      }
+    } catch {
+      // Race condition guard: only update if this is still the mission we want
+      if (fetchingTreeMissionIdRef.current !== missionId) {
+        return;
+      }
+      
+      setPreviewTree(null);
+      toast.error("Failed to load tree");
+    } finally {
+      // Only clear loading if this is still the current fetch
+      if (fetchingTreeMissionIdRef.current === missionId) {
+        setLoadingTree(false);
+      }
+    }
+  }, [previewMissionId]);
+
  const handleSort = (field: SortField) => {
    if (sortField === field) {
-      setSortDirection(sortDirection === 'asc' ? 'desc' : 'asc');
+      setSortDirection(sortDirection === "asc" ? "desc" : "asc");
    } else {
      setSortField(field);
-      setSortDirection('desc');
+      setSortDirection("desc");
    }
  };

-  const filteredTasks = tasks.filter((task) => {
-    if (filter !== "all" && task.status !== filter) return false;
-    if (search && !task.task.toLowerCase().includes(search.toLowerCase()))
-      return false;
-    return true;
-  });
-
-  const filteredRuns = runs.filter((run) => {
-    if (filter !== "all" && run.status !== filter) return false;
-    if (search && !run.input_text.toLowerCase().includes(search.toLowerCase()))
-      return false;
-    return true;
-  });
-
  const filteredMissions = useMemo(() => {
-    let filtered = missions.filter((mission) => {
+    const filtered = missions.filter((mission) => {
      if (filter !== "all" && mission.status !== filter) return false;
      const title = mission.title || "";
      if (search && !title.toLowerCase().includes(search.toLowerCase()))
@@ -152,29 +206,30 @@ export default function HistoryPage() {
    return filtered.sort((a, b) => {
      let comparison = 0;
      switch (sortField) {
-        case 'date':
-          comparison = new Date(b.updated_at).getTime() - new Date(a.updated_at).getTime();
+        case "date":
+          comparison =
+            new Date(b.updated_at).getTime() - new Date(a.updated_at).getTime();
          break;
-        case 'status':
+        case "status":
          comparison = a.status.localeCompare(b.status);
          break;
-        case 'messages':
+        case "messages":
          comparison = b.history.length - a.history.length;
          break;
      }
-      return sortDirection === 'asc' ? -comparison : comparison;
+      return sortDirection === "asc" ? -comparison : comparison;
    });
  }, [missions, filter, search, sortField, sortDirection]);

-  const hasData = filteredTasks.length > 0 || filteredRuns.length > 0 || filteredMissions.length > 0;
+  const hasData = filteredMissions.length > 0;

  return (
    <div className="p-6">
      {/* Header */}
      <div className="mb-6">
-        <h1 className="text-xl font-semibold text-white">History</h1>
+        <h1 className="text-xl font-semibold text-white">Agents</h1>
        <p className="mt-1 text-sm text-white/50">
-          View all past and current tasks
+          Mission history and agent tree visualization
        </p>
      </div>

@@ -184,7 +239,7 @@ export default function HistoryPage() {
          <Search className="absolute left-3 top-1/2 h-4 w-4 -translate-y-1/2 text-white/30" />
          <input
            type="text"
-            placeholder="Search tasks..."
+            placeholder="Search missions..."
            value={search}
            onChange={(e) => setSearch(e.target.value)}
            className="w-full rounded-lg border border-white/[0.06] bg-white/[0.02] py-2.5 pl-10 pr-4 text-sm text-white placeholder-white/30 focus:border-indigo-500/50 focus:outline-none transition-colors"
@@ -210,128 +265,63 @@ export default function HistoryPage() {
      </div>

      {/* Content */}
-      {loading ? (
-        <div className="space-y-6">
-          {/* Shimmer for missions table */}
-          <div>
-            <div className="h-4 w-24 bg-white/[0.04] rounded mb-3 animate-pulse" />
-            <div className="rounded-xl bg-white/[0.02] border border-white/[0.04] overflow-hidden">
-              <table className="w-full">
-                <thead>
-                  <tr className="border-b border-white/[0.04]">
-                    <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Status</th>
-                    <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Mission</th>
-                    <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Messages</th>
-                    <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Updated</th>
-                    <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Actions</th>
-                  </tr>
-                </thead>
-                <tbody className="divide-y divide-white/[0.04]">
-                  <ShimmerTableRow columns={5} />
-                  <ShimmerTableRow columns={5} />
-                  <ShimmerTableRow columns={5} />
-                </tbody>
-              </table>
-            </div>
-          </div>
-        </div>
-      ) : !hasData ? (
-        <div className="flex flex-col items-center py-16 text-center">
-          <div className="flex h-16 w-16 items-center justify-center rounded-2xl bg-white/[0.02] mb-4">
-            <MessageSquare className="h-8 w-8 text-white/30" />
-          </div>
-          <p className="text-white/80">No history yet</p>
-          <p className="mt-2 text-sm text-white/40">
-            Start a conversation in the{" "}
-            <Link
-              href="/control"
-              className="text-indigo-400 hover:text-indigo-300"
-            >
-              Control
-            </Link>{" "}
-            page
-          </p>
-        </div>
-      ) : (
-        <div className="space-y-6">
-          {/* Archived Runs - shown first for visibility */}
-          {filteredRuns.length > 0 && (
-            <div>
-              <h2 className="mb-3 text-xs font-medium uppercase tracking-wider text-white/40">
-                Recent Runs ({filteredRuns.length})
-              </h2>
-              <div className="rounded-xl bg-white/[0.02] border border-white/[0.04] overflow-hidden">
-                <table className="w-full">
-                  <thead>
-                    <tr className="border-b border-white/[0.04]">
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Status
-                      </th>
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Input
-                      </th>
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Created
-                      </th>
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Cost
-                      </th>
-                    </tr>
-                  </thead>
-                  <tbody className="divide-y divide-white/[0.04]">
-                    {filteredRuns.map((run) => {
-                      const status = run.status as keyof typeof statusIcons;
-                      const Icon = statusIcons[status] || Clock;
-                      const config =
-                        statusConfig[status] || statusConfig.pending;
-                      return (
-                        <tr
-                          key={run.id}
-                          className="group hover:bg-white/[0.02] transition-colors"
-                        >
-                          <td className="px-4 py-3">
-                            <span
-                              className={cn(
-                                "inline-flex items-center gap-1.5 rounded-md px-2 py-1 text-[10px] font-medium",
-                                config.bg,
-                                config.color
-                              )}
-                            >
-                              <Icon className="h-3 w-3" />
-                              {run.status}
-                            </span>
-                          </td>
-                          <td className="px-4 py-3">
-                            <div className="flex items-center gap-2">
-                              <p className="max-w-md truncate text-sm text-white/80">
-                                {run.input_text}
-                              </p>
-                              <CopyButton text={run.input_text} showOnHover label="Copied input" />
-                            </div>
-                          </td>
-                          <td className="px-4 py-3">
-                            <RelativeTime 
-                              date={run.created_at} 
-                              className="text-xs text-white/40"
-                            />
-                          </td>
-                          <td className="px-4 py-3">
-                            <span className="text-sm text-emerald-400 tabular-nums">
-                              ${(run.total_cost_cents / 100).toFixed(2)}
-                            </span>
-                          </td>
-                        </tr>
-                      );
-                    })}
-                  </tbody>
-                </table>
+      <div>
+          {loading ? (
+            <div className="space-y-6">
+              {/* Shimmer for missions table */}
+              <div>
+                <div className="h-4 w-24 bg-white/[0.04] rounded mb-3 animate-pulse" />
+                <div className="rounded-xl bg-white/[0.02] border border-white/[0.04] overflow-hidden">
+                  <table className="w-full">
+                    <thead>
+                      <tr className="border-b border-white/[0.04]">
+                        <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
+                          Status
+                        </th>
+                        <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
+                          Mission
+                        </th>
+                        <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
+                          Messages
+                        </th>
+                        <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
+                          Updated
+                        </th>
+                        <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
+                          Actions
+                        </th>
+                      </tr>
+                    </thead>
+                    <tbody className="divide-y divide-white/[0.04]">
+                      <ShimmerTableRow columns={5} />
+                      <ShimmerTableRow columns={5} />
+                      <ShimmerTableRow columns={5} />
+                    </tbody>
+                  </table>
+                </div>
              </div>
            </div>
-          )}
-
-          {/* Missions */}
-          {filteredMissions.length > 0 && (
-            <div>
+          ) : !hasData ? (
+            <div className="flex flex-col items-center py-16 text-center">
+              <div className="flex h-16 w-16 items-center justify-center rounded-2xl bg-white/[0.02] mb-4">
+                <MessageSquare className="h-8 w-8 text-white/30" />
+              </div>
+              <p className="text-white/80">No history yet</p>
+              <p className="mt-2 text-sm text-white/40">
+                Start a conversation in the{" "}
+                <Link
+                  href="/control"
+                  className="text-indigo-400 hover:text-indigo-300"
+                >
+                  Control
+                </Link>{" "}
+                page
+              </p>
+            </div>
+          ) : (
+            <div className="space-y-6">
+              {/* Missions */}
+              <div>
              <h2 className="mb-3 text-xs font-medium uppercase tracking-wider text-white/40">
                Missions ({filteredMissions.length})
              </h2>
@@ -342,7 +332,7 @@ export default function HistoryPage() {
                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
                        <span className="flex items-center">
                          Status
-                          <SortButton field="status" currentField={sortField} direction={sortDirection} onClick={() => handleSort('status')} />
+                          <SortButton field="status" currentField={sortField} direction={sortDirection} onClick={() => handleSort("status")} />
                        </span>
                      </th>
                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
@@ -351,13 +341,13 @@ export default function HistoryPage() {
                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
                        <span className="flex items-center">
                          Messages
-                          <SortButton field="messages" currentField={sortField} direction={sortDirection} onClick={() => handleSort('messages')} />
+                          <SortButton field="messages" currentField={sortField} direction={sortDirection} onClick={() => handleSort("messages")} />
                        </span>
                      </th>
                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
                        <span className="flex items-center">
                          Updated
-                          <SortButton field="date" currentField={sortField} direction={sortDirection} onClick={() => handleSort('date')} />
+                          <SortButton field="date" currentField={sortField} direction={sortDirection} onClick={() => handleSort("date")} />
                        </span>
                      </th>
                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
@@ -417,9 +407,21 @@ export default function HistoryPage() {
                                {mission.status === "active" ? "Continue" : "View"}{" "}
                                <ArrowRight className="h-3 w-3" />
                              </Link>
-                              <CopyButton 
-                                text={mission.id} 
-                                showOnHover 
+                              <button
+                                onClick={() => handlePreviewTree(mission.id)}
+                                className={cn(
+                                  "inline-flex items-center gap-1 text-xs transition-colors",
+                                  previewMissionId === mission.id
+                                    ? "text-emerald-400 hover:text-emerald-300"
+                                    : "text-white/40 hover:text-white/60"
+                                )}
+                                title="View agent tree"
+                              >
+                                <Network className="h-3 w-3" />
+                              </button>
+                              <CopyButton
+                                text={mission.id}
+                                showOnHover
                                label="Copied mission ID"
                                className="opacity-0 group-hover:opacity-100"
                              />
@@ -432,104 +434,72 @@ export default function HistoryPage() {
                </table>
              </div>
            </div>
-          )}
-
-          {/* Active Tasks */}
-          {filteredTasks.length > 0 && (
-            <div>
-              <h2 className="mb-3 text-xs font-medium uppercase tracking-wider text-white/40">
-                Active Tasks ({filteredTasks.length})
-              </h2>
-              <div className="rounded-xl bg-white/[0.02] border border-white/[0.04] overflow-hidden">
-                <table className="w-full">
-                  <thead>
-                    <tr className="border-b border-white/[0.04]">
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Status
-                      </th>
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Task
-                      </th>
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Model
-                      </th>
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Iterations
-                      </th>
-                      <th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
-                        Actions
-                      </th>
-                    </tr>
-                  </thead>
-                  <tbody className="divide-y divide-white/[0.04]">
-                    {filteredTasks.map((task) => {
-                      const Icon = statusIcons[task.status];
-                      const config = statusConfig[task.status];
-                      return (
-                        <tr
-                          key={task.id}
-                          className="group hover:bg-white/[0.02] transition-colors"
-                        >
-                          <td className="px-4 py-3">
-                            <span
-                              className={cn(
-                                "inline-flex items-center gap-1.5 rounded-md px-2 py-1 text-[10px] font-medium",
-                                config.bg,
-                                config.color
-                              )}
-                            >
-                              <Icon
-                                className={cn(
-                                  "h-3 w-3",
-                                  task.status === "running" && "animate-spin"
-                                )}
-                              />
-                              {task.status}
-                            </span>
-                          </td>
-                          <td className="px-4 py-3">
-                            <div className="flex items-center gap-2">
-                              <p className="max-w-md truncate text-sm text-white/80">
-                                {task.task}
-                              </p>
-                              <CopyButton text={task.task} showOnHover label="Copied task" />
-                            </div>
-                          </td>
-                          <td className="px-4 py-3">
-                            <span className="text-xs text-white/40 font-mono">
-                              {task.model.split("/").pop()}
-                            </span>
-                          </td>
-                          <td className="px-4 py-3">
-                            <span className="text-sm text-white tabular-nums">
-                              {task.iterations}
-                            </span>
-                          </td>
-                          <td className="px-4 py-3">
-                            <div className="flex items-center gap-2">
-                              <Link
-                                href={`/control?task=${task.id}`}
-                                className="inline-flex items-center gap-1 text-xs text-indigo-400 hover:text-indigo-300 transition-colors"
-                              >
-                                View <ArrowRight className="h-3 w-3" />
-                              </Link>
-                              <CopyButton 
-                                text={task.id} 
-                                showOnHover 
-                                label="Copied task ID"
-                                className="opacity-0 group-hover:opacity-100"
-                              />
-                            </div>
-                          </td>
-                        </tr>
-                      );
-                    })}
-                  </tbody>
-                </table>
-              </div>
            </div>
          )}
+      </div>

+      {/* Agent Tree Modal */}
+      {previewMissionId && (
+        <div
+          className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
+          onClick={() => {
+            setPreviewMissionId(null);
+            setPreviewTree(null);
+            fetchingTreeMissionIdRef.current = null;
+          }}
+        >
+          <div
+            className="relative w-[90vw] h-[85vh] max-w-6xl rounded-2xl bg-[#0a0a0a] border border-white/[0.08] shadow-2xl overflow-hidden flex flex-col"
+            onClick={(e) => e.stopPropagation()}
+          >
+            {/* Modal Header */}
+            <div className="flex items-center justify-between px-6 py-4 border-b border-white/[0.06]">
+              <div className="flex items-center gap-3">
+                <div className="flex h-10 w-10 items-center justify-center rounded-xl bg-emerald-500/10">
+                  <Network className="h-5 w-5 text-emerald-400" />
+                </div>
+                <div>
+                  <h2 className="text-lg font-semibold text-white">Agent Tree</h2>
+                  <p className="text-xs text-white/40">
+                    {missions.find((m) => m.id === previewMissionId)?.title?.slice(0, 50) || "Mission visualization"}
+                  </p>
+                </div>
+              </div>
+              <button
+                onClick={() => {
+                  setPreviewMissionId(null);
+                  setPreviewTree(null);
+                  fetchingTreeMissionIdRef.current = null;
+                }}
+                className="flex h-10 w-10 items-center justify-center rounded-xl text-white/40 hover:bg-white/[0.04] hover:text-white/70 transition-colors"
+              >
+                <X className="h-5 w-5" />
+              </button>
+            </div>
+
+            {/* Modal Content */}
+            <div className="flex-1 min-h-0">
+              {loadingTree ? (
+                <div className="flex flex-col items-center justify-center h-full">
+                  <Loader className="h-8 w-8 animate-spin text-emerald-400 mb-3" />
+                  <p className="text-sm text-white/60">Loading agent tree...</p>
+                </div>
+              ) : previewTree ? (
+                <AgentTreeCanvas tree={previewTree} className="w-full h-full" />
+              ) : (
+                <div className="flex flex-col items-center justify-center h-full text-center p-8">
+                  <div className="flex h-20 w-20 items-center justify-center rounded-2xl bg-white/[0.02] mb-4">
+                    <Network className="h-10 w-10 text-white/20" />
+                  </div>
+                  <p className="text-lg font-medium text-white/60">No tree data available</p>
+                  <p className="text-sm text-white/30 mt-2 max-w-md">
+                    Agent tree data is captured during mission execution. 
+                    This mission may have been completed before tree tracking was enabled.
+                  </p>
+                </div>
+              )}
+            </div>
+          </div>
        </div>
      )}
    </div>
--- a/dashboard/src/components/agent-tree/AgentTreeCanvas.tsx
+++ b/dashboard/src/components/agent-tree/AgentTreeCanvas.tsx
@@ -94,6 +94,8 @@ interface AgentTreeCanvasProps {
  onSelectNode?: (node: AgentNode | null) => void;
  selectedNodeId?: string | null;
  className?: string;
+  /** Compact mode for embedded panels - hides minimap and details panel */
+  compact?: boolean;
 }

 /**
@@ -527,6 +529,7 @@ export function AgentTreeCanvas({
  onSelectNode,
  selectedNodeId,
  className,
+  compact = false,
 }: AgentTreeCanvasProps) {
  const containerRef = useRef<HTMLDivElement>(null);
  const [dimensions, setDimensions] = useState({ width: 800, height: 600 });
@@ -566,8 +569,8 @@ export function AgentTreeCanvas({
  useEffect(() => {
    if (layout.width > 0 && layout.height > 0 && dimensions.width > 0 && dimensions.height > 0) {
      // Calculate zoom to fit the tree in view with some padding
-      const paddingX = 80;
-      const paddingY = 80;
+      const paddingX = compact ? 40 : 80;
+      const paddingY = compact ? 40 : 80;
      const availableWidth = dimensions.width - paddingX;
      const availableHeight = dimensions.height - paddingY;
      
@@ -575,8 +578,8 @@ export function AgentTreeCanvas({
      const scaleY = availableHeight / layout.height;
      
      // Use the smaller scale to fit both dimensions
-      // Cap between 0.4 (minimum readable) and 1 (don't zoom in past 100%)
-      const MIN_ZOOM = 0.4;
+      // Cap between 0.3/0.4 (minimum readable) and 1 (don't zoom in past 100%)
+      const MIN_ZOOM = compact ? 0.3 : 0.4;
      const fitZoom = Math.max(MIN_ZOOM, Math.min(1, Math.min(scaleX, scaleY)));
      
      // Calculate pan to center horizontally, start from top with padding
@@ -587,12 +590,12 @@ export function AgentTreeCanvas({
      const scaledHeight = layout.height * fitZoom;
      const centerY = scaledHeight < availableHeight 
        ? (dimensions.height - scaledHeight) / 2 
-        : 30; // Start near top if tree is too tall
+        : compact ? 20 : 30; // Start near top if tree is too tall
      
      setZoom(fitZoom);
      setPan({ x: centerX, y: centerY });
    }
-  }, [layout.width, layout.height, dimensions.width, dimensions.height]);
+  }, [layout.width, layout.height, dimensions.width, dimensions.height, compact]);
  
  // Pan handlers
  const handleMouseDown = useCallback((e: React.MouseEvent) => {
@@ -690,34 +693,40 @@ export function AgentTreeCanvas({
        </g>
      </svg>
      
-      {/* Mini-map */}
-      <TreeMiniMap tree={tree} />
+      {/* Mini-map - hidden in compact mode */}
+      {!compact && <TreeMiniMap tree={tree} />}
      
      {/* Zoom controls */}
-      <div className="absolute bottom-4 right-4 flex gap-2">
+      <div className={cn("absolute flex gap-1", compact ? "bottom-2 right-2" : "bottom-4 right-4 gap-2")}>
        <button
          onClick={() => setZoom(z => Math.min(2, z * 1.15))}
-          className="w-8 h-8 rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors flex items-center justify-center"
+          className={cn(
+            "rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors flex items-center justify-center",
+            compact ? "w-6 h-6 text-xs" : "w-8 h-8"
+          )}
        >
          +
        </button>
        <button
          onClick={() => setZoom(z => Math.max(0.3, z / 1.15))}
-          className="w-8 h-8 rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors flex items-center justify-center"
+          className={cn(
+            "rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors flex items-center justify-center",
+            compact ? "w-6 h-6 text-xs" : "w-8 h-8"
+          )}
        >
          −
        </button>
        <button
          onClick={() => {
            // Fit to view with minimum zoom for readability
-            const paddingX = 80;
-            const paddingY = 80;
+            const paddingX = compact ? 40 : 80;
+            const paddingY = compact ? 40 : 80;
            const availableWidth = dimensions.width - paddingX;
            const availableHeight = dimensions.height - paddingY;
            
            const scaleX = availableWidth / layout.width;
            const scaleY = availableHeight / layout.height;
-            const MIN_ZOOM = 0.4;
+            const MIN_ZOOM = compact ? 0.3 : 0.4;
            const fitZoom = Math.max(MIN_ZOOM, Math.min(1, Math.min(scaleX, scaleY)));
            
            const scaledWidth = layout.width * fitZoom;
@@ -726,26 +735,31 @@ export function AgentTreeCanvas({
            const scaledHeight = layout.height * fitZoom;
            const centerY = scaledHeight < availableHeight 
              ? (dimensions.height - scaledHeight) / 2 
-              : 30;
+              : 20;
            
            setZoom(fitZoom);
            setPan({ x: centerX, y: centerY });
          }}
-          className="px-2 h-8 rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors text-xs"
+          className={cn(
+            "rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors text-xs",
+            compact ? "px-1.5 h-6" : "px-2 h-8"
+          )}
        >
          Fit
        </button>
      </div>
      
-      {/* Node details panel */}
-      <AnimatePresence>
-        {selectedNode && (
-          <NodeDetailsPanel 
-            node={selectedNode} 
-            onClose={() => onSelectNode?.(null)} 
-          />
-        )}
-      </AnimatePresence>
+      {/* Node details panel - hidden in compact mode */}
+      {!compact && (
+        <AnimatePresence>
+          {selectedNode && (
+            <NodeDetailsPanel 
+              node={selectedNode} 
+              onClose={() => onSelectNode?.(null)} 
+            />
+          )}
+        </AnimatePresence>
+      )}
    </div>
  );
 }
--- a/dashboard/src/components/sidebar.tsx
+++ b/dashboard/src/components/sidebar.tsx
@@ -10,7 +10,6 @@ import {
  LayoutDashboard,
  MessageSquare,
  Network,
-  History,
  Terminal,
  Settings,
  Plug,
@@ -21,11 +20,10 @@ import {

 const navigation = [
  { name: 'Overview', href: '/', icon: LayoutDashboard },
-  { name: 'Control', href: '/control', icon: MessageSquare },
-  { name: 'Agents', href: '/agents', icon: Network },
-  { name: 'Modules', href: '/modules', icon: Plug },
+  { name: 'Mission', href: '/control', icon: MessageSquare },
+  { name: 'Agents', href: '/history', icon: Network },
  { name: 'Console', href: '/console', icon: Terminal },
-  { name: 'History', href: '/history', icon: History },
+  { name: 'Modules', href: '/modules', icon: Plug },
  { name: 'Settings', href: '/settings', icon: Settings },
 ];

--- a/dashboard/src/components/ui/confirm-dialog.tsx
+++ b/dashboard/src/components/ui/confirm-dialog.tsx
@@ -121,3 +121,6 @@ export function ConfirmDialog({



+
+
+
--- a/dashboard/src/components/ui/copy-button.tsx
+++ b/dashboard/src/components/ui/copy-button.tsx
@@ -55,3 +55,6 @@ export function CopyButton({ text, className, label = 'Copied!', showOnHover = t



+
+
+
--- a/dashboard/src/components/ui/relative-time.tsx
+++ b/dashboard/src/components/ui/relative-time.tsx
@@ -51,3 +51,6 @@ export function RelativeTime({ date, className }: RelativeTimeProps) {



+
+
+
--- a/dashboard/src/components/ui/shimmer.tsx
+++ b/dashboard/src/components/ui/shimmer.tsx
@@ -87,3 +87,6 @@ export function ShimmerText({ lines = 3, className }: ShimmerProps & { lines?: n



+
+
+
--- a/dashboard/src/lib/api.ts
+++ b/dashboard/src/lib/api.ts
@@ -748,3 +748,61 @@ export async function uploadFile(

  return res.json();
 }
+
+// ==================== Models ====================
+
+export interface ModelsResponse {
+  models: string[];
+  count: number;
+}
+
+// List available models
+export async function listModels(tier?: string): Promise<ModelsResponse> {
+  const params = tier ? `?tier=${encodeURIComponent(tier)}` : "";
+  const res = await apiFetch(`/api/models${params}`);
+  if (!res.ok) throw new Error("Failed to fetch models");
+  return res.json();
+}
+
+// Friendly display names for models
+const MODEL_DISPLAY_NAMES: Record<string, string> = {
+  // OpenAI - simplified (newest first)
+  "openai/gpt-5.2-pro": "gpt-5.2-pro",
+  "openai/gpt-5.2": "gpt-5.2",
+  "openai/gpt-5.2-chat": "gpt-5.2",
+  "openai/gpt-4.1-mini": "gpt-4-mini",
+  "openai/gpt-4.1": "gpt-4",
+  "openai/o1": "o1",
+  "openai/o3-mini-high": "o3-mini",
+  // Anthropic - simplified
+  "anthropic/claude-sonnet-4.5": "4.5-sonnet",
+  "anthropic/claude-opus-4.5": "4.5-opus",
+  "anthropic/claude-haiku-4.5": "4.5-haiku",
+  // Google
+  "google/gemini-3-flash-preview": "gemini-3-flash",
+  "google/gemini-3-pro-image-preview": "gemini-3-pro",
+  // DeepSeek
+  "deepseek/deepseek-r1": "deepseek-r1",
+  "deepseek/deepseek-chat-v3-0324": "deepseek-v3",
+  // Qwen
+  "qwen/qwq-32b": "qwq-32b",
+  "qwen/qwen-2.5-72b-instruct": "qwen-72b",
+  "qwen/qwen3-next-80b-a3b-thinking": "qwen3-thinking",
+  // Mistral
+  "mistralai/mistral-small-24b-instruct-2501": "mistral-small",
+  "mistralai/mistral-medium-3.1": "mistral-medium",
+  "mistralai/mistral-large-2512": "mistral-large",
+  // Meta
+  "meta-llama/llama-3.1-405b": "llama-405b",
+  "meta-llama/llama-3.2-90b-vision-instruct": "llama-90b-vision",
+  "meta-llama/llama-3.3-70b-instruct:free": "llama-70b (free)",
+};
+
+// Get display name for a model
+export function getModelDisplayName(modelId: string): string {
+  if (MODEL_DISPLAY_NAMES[modelId]) {
+    return MODEL_DISPLAY_NAMES[modelId];
+  }
+  // Fallback: strip provider prefix
+  return modelId.includes("/") ? modelId.split("/").pop()! : modelId;
+}
--- a/dashboard/src/lib/settings.ts
+++ b/dashboard/src/lib/settings.ts
@@ -56,3 +56,6 @@ export function getRuntimeTaskDefaults(): { model?: string; budget_cents?: numbe



+
+
+
--- a/dashboard/src/lib/utils.ts
+++ b/dashboard/src/lib/utils.ts
@@ -38,3 +38,6 @@ export function formatRelativeTime(date: Date): string {



+
+
+
--- a/ios_dashboard/OpenAgentDashboard/Models/Mission.swift
+++ b/ios_dashboard/OpenAgentDashboard/Models/Mission.swift
@@ -11,14 +11,35 @@ enum MissionStatus: String, Codable, CaseIterable {
    case active
    case completed
    case failed
+    case interrupted
+    case blocked
+    case notFeasible = "not_feasible"
    
    var statusType: StatusType {
        switch self {
        case .active: return .active
        case .completed: return .completed
        case .failed: return .failed
+        case .interrupted: return .interrupted
+        case .blocked: return .blocked
+        case .notFeasible: return .failed
        }
    }
+    
+    var displayLabel: String {
+        switch self {
+        case .active: return "Active"
+        case .completed: return "Completed"
+        case .failed: return "Failed"
+        case .interrupted: return "Interrupted"
+        case .blocked: return "Blocked"
+        case .notFeasible: return "Not Feasible"
+        }
+    }
+    
+    var canResume: Bool {
+        self == .interrupted || self == .blocked
+    }
 }

 struct MissionHistoryEntry: Codable, Identifiable {
@@ -35,9 +56,12 @@ struct Mission: Codable, Identifiable, Hashable {
    let id: String
    var status: MissionStatus
    let title: String?
+    let modelOverride: String?
    let history: [MissionHistoryEntry]
    let createdAt: String
    let updatedAt: String
+    let interruptedAt: String?
+    let resumable: Bool
    
    func hash(into hasher: inout Hasher) {
        hasher.combine(id)
@@ -48,9 +72,24 @@ struct Mission: Codable, Identifiable, Hashable {
    }
    
    enum CodingKeys: String, CodingKey {
-        case id, status, title, history
+        case id, status, title, history, resumable
+        case modelOverride = "model_override"
        case createdAt = "created_at"
        case updatedAt = "updated_at"
+        case interruptedAt = "interrupted_at"
+    }
+    
+    init(from decoder: Decoder) throws {
+        let container = try decoder.container(keyedBy: CodingKeys.self)
+        id = try container.decode(String.self, forKey: .id)
+        status = try container.decode(MissionStatus.self, forKey: .status)
+        title = try container.decodeIfPresent(String.self, forKey: .title)
+        modelOverride = try container.decodeIfPresent(String.self, forKey: .modelOverride)
+        history = try container.decode([MissionHistoryEntry].self, forKey: .history)
+        createdAt = try container.decode(String.self, forKey: .createdAt)
+        updatedAt = try container.decode(String.self, forKey: .updatedAt)
+        interruptedAt = try container.decodeIfPresent(String.self, forKey: .interruptedAt)
+        resumable = try container.decodeIfPresent(Bool.self, forKey: .resumable) ?? false
    }
    
    var displayTitle: String {
@@ -60,11 +99,20 @@ struct Mission: Codable, Identifiable, Hashable {
        return "Untitled Mission"
    }
    
+    var displayModel: String? {
+        guard let model = modelOverride else { return nil }
+        return model.split(separator: "/").last.map(String.init)
+    }
+    
    var updatedDate: Date? {
        let formatter = ISO8601DateFormatter()
        formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds]
        return formatter.date(from: updatedAt) ?? ISO8601DateFormatter().date(from: updatedAt)
    }
+    
+    var canResume: Bool {
+        resumable && status.canResume
+    }
 }

 enum TaskStatus: String, Codable, CaseIterable {
@@ -101,6 +149,66 @@ struct TaskState: Codable, Identifiable {
    }
 }

+// MARK: - Parallel Execution
+
+struct RunningMissionInfo: Codable, Identifiable {
+    let missionId: String
+    let modelOverride: String?
+    let state: String
+    let queueLen: Int
+    let historyLen: Int
+    let secondsSinceActivity: Int
+    let expectedDeliverables: Int
+    
+    var id: String { missionId }
+    
+    enum CodingKeys: String, CodingKey {
+        case missionId = "mission_id"
+        case modelOverride = "model_override"
+        case state
+        case queueLen = "queue_len"
+        case historyLen = "history_len"
+        case secondsSinceActivity = "seconds_since_activity"
+        case expectedDeliverables = "expected_deliverables"
+    }
+    
+    // Memberwise initializer for previews and testing
+    init(missionId: String, modelOverride: String?, state: String, queueLen: Int, historyLen: Int, secondsSinceActivity: Int, expectedDeliverables: Int) {
+        self.missionId = missionId
+        self.modelOverride = modelOverride
+        self.state = state
+        self.queueLen = queueLen
+        self.historyLen = historyLen
+        self.secondsSinceActivity = secondsSinceActivity
+        self.expectedDeliverables = expectedDeliverables
+    }
+    
+    var isRunning: Bool {
+        state == "running" || state == "waiting_for_tool"
+    }
+    
+    var isStalled: Bool {
+        isRunning && secondsSinceActivity > 60
+    }
+    
+    var displayModel: String {
+        guard let model = modelOverride else { return "Default" }
+        return model.split(separator: "/").last.map(String.init) ?? model
+    }
+}
+
+struct ParallelConfig: Codable {
+    let maxParallelMissions: Int
+    let runningCount: Int
+    
+    enum CodingKeys: String, CodingKey {
+        case maxParallelMissions = "max_parallel_missions"
+        case runningCount = "running_count"
+    }
+}
+
+// MARK: - Runs
+
 struct Run: Codable, Identifiable {
    let id: String
    let createdAt: String
--- a/ios_dashboard/OpenAgentDashboard/Services/APIService.swift
+++ b/ios_dashboard/OpenAgentDashboard/Services/APIService.swift
@@ -98,6 +98,32 @@ final class APIService {
        let _: EmptyResponse = try await post("/api/control/missions/\(id)/status", body: StatusRequest(status: status.rawValue))
    }
    
+    func resumeMission(id: String) async throws -> Mission {
+        try await post("/api/control/missions/\(id)/resume", body: EmptyBody())
+    }
+    
+    func cancelMission(id: String) async throws {
+        let _: EmptyResponse = try await post("/api/control/missions/\(id)/cancel", body: EmptyBody())
+    }
+    
+    // MARK: - Parallel Missions
+    
+    func getRunningMissions() async throws -> [RunningMissionInfo] {
+        try await get("/api/control/running")
+    }
+    
+    func startMissionParallel(id: String, content: String, model: String? = nil) async throws {
+        struct ParallelRequest: Encodable {
+            let content: String
+            let model: String?
+        }
+        let _: EmptyResponse = try await post("/api/control/missions/\(id)/parallel", body: ParallelRequest(content: content, model: model))
+    }
+    
+    func getParallelConfig() async throws -> ParallelConfig {
+        try await get("/api/control/parallel/config")
+    }
+    
    // MARK: - Control
    
    func sendMessage(content: String) async throws -> (id: String, queued: Bool) {
--- a/ios_dashboard/OpenAgentDashboard/Views/Components/RunningMissionsBar.swift
+++ b/ios_dashboard/OpenAgentDashboard/Views/Components/RunningMissionsBar.swift
@@ -0,0 +1,269 @@
+//
+//  RunningMissionsBar.swift
+//  OpenAgentDashboard
+//
+//  Compact horizontal bar showing currently running missions
+//  Allows switching between parallel missions
+//
+
+import SwiftUI
+
+struct RunningMissionsBar: View {
+    let runningMissions: [RunningMissionInfo]
+    let currentMission: Mission?
+    let viewingMissionId: String?
+    let onSelectMission: (String) -> Void
+    let onCancelMission: (String) -> Void
+    let onRefresh: () -> Void
+    
+    var body: some View {
+        ScrollView(.horizontal, showsIndicators: false) {
+            HStack(spacing: 8) {
+                // Header with refresh button
+                headerView
+                
+                // Current mission if not in running list
+                if let mission = currentMission,
+                   !runningMissions.contains(where: { $0.missionId == mission.id }) {
+                    currentMissionChip(mission)
+                }
+                
+                // Running missions
+                ForEach(runningMissions) { mission in
+                    runningMissionChip(mission)
+                }
+            }
+            .padding(.horizontal, 16)
+            .padding(.vertical, 8)
+        }
+        .background(.ultraThinMaterial)
+    }
+    
+    // MARK: - Header
+    
+    private var headerView: some View {
+        HStack(spacing: 6) {
+            Image(systemName: "square.stack.3d.up")
+                .font(.system(size: 12, weight: .medium))
+                .foregroundStyle(Theme.textTertiary)
+            
+            Text("Running")
+                .font(.caption.weight(.medium))
+                .foregroundStyle(Theme.textTertiary)
+            
+            Text("(\(runningMissions.count))")
+                .font(.caption)
+                .foregroundStyle(Theme.textMuted)
+            
+            Button(action: onRefresh) {
+                Image(systemName: "arrow.clockwise")
+                    .font(.system(size: 10, weight: .medium))
+                    .foregroundStyle(Theme.textMuted)
+            }
+            .padding(4)
+            .contentShape(Rectangle())
+        }
+    }
+    
+    // MARK: - Current Mission Chip
+    
+    private func currentMissionChip(_ mission: Mission) -> some View {
+        let isViewing = viewingMissionId == mission.id
+        
+        return Button {
+            onSelectMission(mission.id)
+        } label: {
+            HStack(spacing: 6) {
+                // Status dot
+                Circle()
+                    .fill(Theme.success)
+                    .frame(width: 6, height: 6)
+                
+                // Model name
+                Text(mission.displayModel ?? "Default")
+                    .font(.caption.weight(.medium))
+                    .foregroundStyle(Theme.textPrimary)
+                    .lineLimit(1)
+                
+                // Mission ID
+                Text(String(mission.id.prefix(8)))
+                    .font(.system(size: 9).monospaced())
+                    .foregroundStyle(Theme.textMuted)
+                
+                // Selection indicator
+                if isViewing {
+                    Image(systemName: "checkmark")
+                        .font(.system(size: 9, weight: .bold))
+                        .foregroundStyle(Theme.accent)
+                }
+            }
+            .padding(.horizontal, 10)
+            .padding(.vertical, 6)
+            .background(isViewing ? Theme.accent.opacity(0.15) : Color.white.opacity(0.05))
+            .clipShape(RoundedRectangle(cornerRadius: 8, style: .continuous))
+            .overlay(
+                RoundedRectangle(cornerRadius: 8, style: .continuous)
+                    .stroke(isViewing ? Theme.accent.opacity(0.3) : Theme.border, lineWidth: 1)
+            )
+        }
+        .buttonStyle(.plain)
+    }
+    
+    // MARK: - Running Mission Chip
+    
+    private func runningMissionChip(_ mission: RunningMissionInfo) -> some View {
+        let isViewing = viewingMissionId == mission.missionId
+        let isStalled = mission.isStalled
+        // Only show severely stalled state for running missions
+        let isSeverlyStalled = mission.isRunning && mission.secondsSinceActivity > 120
+        
+        let borderColor: Color = {
+            if isViewing { return Theme.accent.opacity(0.3) }
+            if isSeverlyStalled { return Theme.error.opacity(0.3) }
+            if isStalled { return Theme.warning.opacity(0.3) }
+            return Theme.border
+        }()
+        
+        let backgroundColor: Color = {
+            if isViewing { return Theme.accent.opacity(0.15) }
+            if isSeverlyStalled { return Theme.error.opacity(0.1) }
+            if isStalled { return Theme.warning.opacity(0.1) }
+            return Color.white.opacity(0.05)
+        }()
+        
+        return HStack(spacing: 6) {
+            // Tap area for selection
+            Button {
+                onSelectMission(mission.missionId)
+            } label: {
+                HStack(spacing: 6) {
+                    // Status dot with animation
+                    Circle()
+                        .fill(statusColor(for: mission))
+                        .frame(width: 6, height: 6)
+                        .overlay {
+                            if mission.isRunning && !isStalled {
+                                Circle()
+                                    .stroke(statusColor(for: mission).opacity(0.5), lineWidth: 1.5)
+                                    .frame(width: 10, height: 10)
+                                    .opacity(0.6)
+                            }
+                        }
+                    
+                    // Model name
+                    Text(mission.displayModel)
+                        .font(.caption.weight(.medium))
+                        .foregroundStyle(Theme.textPrimary)
+                        .lineLimit(1)
+                    
+                    // Mission ID
+                    Text(String(mission.missionId.prefix(8)))
+                        .font(.system(size: 9).monospaced())
+                        .foregroundStyle(Theme.textMuted)
+                    
+                    // Stalled indicator
+                    if isStalled {
+                        HStack(spacing: 2) {
+                            Image(systemName: "exclamationmark.triangle.fill")
+                                .font(.system(size: 8))
+                            Text("\(mission.secondsSinceActivity)s")
+                                .font(.system(size: 9).monospaced())
+                        }
+                        .foregroundStyle(isSeverlyStalled ? Theme.error : Theme.warning)
+                    }
+                    
+                    // Selection indicator
+                    if isViewing {
+                        Image(systemName: "checkmark")
+                            .font(.system(size: 9, weight: .bold))
+                            .foregroundStyle(Theme.accent)
+                    }
+                }
+            }
+            .buttonStyle(.plain)
+            
+            // Cancel button
+            Button {
+                onCancelMission(mission.missionId)
+            } label: {
+                Image(systemName: "xmark")
+                    .font(.system(size: 9, weight: .medium))
+                    .foregroundStyle(Theme.textMuted)
+                    .frame(width: 18, height: 18)
+                    .background(Color.white.opacity(0.05))
+                    .clipShape(Circle())
+            }
+            .buttonStyle(.plain)
+        }
+        .padding(.leading, 10)
+        .padding(.trailing, 6)
+        .padding(.vertical, 6)
+        .background(backgroundColor)
+        .clipShape(RoundedRectangle(cornerRadius: 8, style: .continuous))
+        .overlay(
+            RoundedRectangle(cornerRadius: 8, style: .continuous)
+                .stroke(borderColor, lineWidth: 1)
+        )
+    }
+    
+    // MARK: - Helpers
+    
+    private func statusColor(for mission: RunningMissionInfo) -> Color {
+        // Only show stalled/severely-stalled states for running missions
+        if mission.isRunning && mission.secondsSinceActivity > 120 {
+            return Theme.error
+        } else if mission.isStalled {
+            return Theme.warning
+        } else if mission.isRunning {
+            return Theme.success
+        } else {
+            return Theme.warning
+        }
+    }
+}
+
+// MARK: - Preview
+
+#Preview {
+    VStack(spacing: 0) {
+        RunningMissionsBar(
+            runningMissions: [
+                RunningMissionInfo(
+                    missionId: "abc12345-6789-0000-0000-000000000001",
+                    modelOverride: "deepseek/deepseek-v3.2",
+                    state: "running",
+                    queueLen: 0,
+                    historyLen: 5,
+                    secondsSinceActivity: 15,
+                    expectedDeliverables: 0
+                ),
+                RunningMissionInfo(
+                    missionId: "def12345-6789-0000-0000-000000000002",
+                    modelOverride: "qwen/qwen3-235b",
+                    state: "running",
+                    queueLen: 1,
+                    historyLen: 3,
+                    secondsSinceActivity: 75,
+                    expectedDeliverables: 0
+                ),
+                RunningMissionInfo(
+                    missionId: "ghi12345-6789-0000-0000-000000000003",
+                    modelOverride: nil,
+                    state: "running",
+                    queueLen: 0,
+                    historyLen: 10,
+                    secondsSinceActivity: 150,
+                    expectedDeliverables: 0
+                )
+            ],
+            currentMission: nil,
+            viewingMissionId: "abc12345-6789-0000-0000-000000000001",
+            onSelectMission: { _ in },
+            onCancelMission: { _ in },
+            onRefresh: {}
+        )
+        
+        Spacer()
+    }
+    .background(Theme.backgroundPrimary)
+}
--- a/ios_dashboard/OpenAgentDashboard/Views/Components/StatusBadge.swift
+++ b/ios_dashboard/OpenAgentDashboard/Views/Components/StatusBadge.swift
@@ -19,6 +19,8 @@ enum StatusType {
    case connected
    case disconnected
    case connecting
+    case interrupted
+    case blocked
    
    var color: Color {
        switch self {
@@ -32,6 +34,8 @@ enum StatusType {
            return Theme.error
        case .cancelled, .disconnected:
            return Theme.textTertiary
+        case .interrupted, .blocked:
+            return Theme.warning
        }
    }
    
@@ -52,6 +56,8 @@ enum StatusType {
        case .connected: return "Connected"
        case .disconnected: return "Disconnected"
        case .connecting: return "Connecting"
+        case .interrupted: return "Interrupted"
+        case .blocked: return "Blocked"
        }
    }
    
@@ -66,6 +72,8 @@ enum StatusType {
        case .idle: return "moon.fill"
        case .connected: return "wifi"
        case .disconnected: return "wifi.slash"
+        case .interrupted: return "pause.circle.fill"
+        case .blocked: return "exclamationmark.triangle.fill"
        }
    }
    
--- a/ios_dashboard/OpenAgentDashboard/Views/Control/ControlView.swift
+++ b/ios_dashboard/OpenAgentDashboard/Views/Control/ControlView.swift
@@ -21,6 +21,15 @@ struct ControlView: View {
    @State private var isAtBottom = true
    @State private var copiedMessageId: String?
    
+    // Parallel missions state
+    @State private var runningMissions: [RunningMissionInfo] = []
+    @State private var viewingMissionId: String?
+    @State private var showRunningMissions = false
+    @State private var pollingTask: Task<Void, Never>?
+    
+    // Track pending fetch to prevent race conditions
+    @State private var fetchingMissionId: String?
+    
    @FocusState private var isInputFocused: Bool
    
    private let api = APIService.shared
@@ -36,6 +45,11 @@ struct ControlView: View {
            backgroundGlows
            
            VStack(spacing: 0) {
+                // Running missions bar (when there are parallel missions)
+                if showRunningMissions && (!runningMissions.isEmpty || currentMission != nil) {
+                    runningMissionsBar
+                }
+                
                // Messages
                messagesView
                
@@ -76,6 +90,26 @@ struct ControlView: View {
                }
            }
            
+            ToolbarItem(placement: .topBarLeading) {
+                // Running missions toggle
+                Button {
+                    withAnimation(.easeInOut(duration: 0.2)) {
+                        showRunningMissions.toggle()
+                    }
+                    HapticService.selectionChanged()
+                } label: {
+                    HStack(spacing: 4) {
+                        Image(systemName: "square.stack.3d.up")
+                            .font(.system(size: 14))
+                        if !runningMissions.isEmpty {
+                            Text("\(runningMissions.count)")
+                                .font(.caption2.weight(.semibold))
+                        }
+                    }
+                    .foregroundStyle(showRunningMissions ? Theme.accent : Theme.textSecondary)
+                }
+            }
+            
            ToolbarItem(placement: .topBarTrailing) {
                Menu {
                    Button {
@@ -87,6 +121,15 @@ struct ControlView: View {
                    if let mission = currentMission {
                        Divider()
                        
+                        // Resume button for interrupted/blocked missions
+                        if mission.canResume {
+                            Button {
+                                Task { await resumeMission() }
+                            } label: {
+                                Label("Resume Mission", systemImage: "play.circle")
+                            }
+                        }
+                        
                        Button {
                            Task { await setMissionStatus(.completed) }
                        } label: {
@@ -99,7 +142,7 @@ struct ControlView: View {
                            Label("Mark Failed", systemImage: "xmark.circle")
                        }
                        
-                        if mission.status != .active {
+                        if mission.status != .active && !mission.canResume {
                            Button {
                                Task { await setMissionStatus(.active) }
                            } label: {
@@ -117,10 +160,22 @@ struct ControlView: View {
            // Check if we're being opened with a specific mission from History
            if let pendingId = nav.consumePendingMission() {
                await loadMission(id: pendingId)
+                viewingMissionId = pendingId
            } else {
                await loadCurrentMission()
+                viewingMissionId = currentMission?.id
            }
+            
+            // Fetch initial running missions
+            await refreshRunningMissions()
+            
+            // Auto-show bar if there are multiple running missions
+            if runningMissions.count > 1 {
+                showRunningMissions = true
+            }
+            
            startStreaming()
+            startPollingRunningMissions()
        }
        .onChange(of: nav.pendingMissionId) { _, newId in
            // Handle navigation from History while Control is already visible
@@ -128,15 +183,42 @@ struct ControlView: View {
                nav.pendingMissionId = nil
                Task {
                    await loadMission(id: missionId)
+                    viewingMissionId = missionId
                }
            }
-
+        }
+        .onChange(of: currentMission?.id) { _, newId in
+            // Sync viewingMissionId with currentMission when it changes
+            if viewingMissionId == nil, let id = newId {
+                viewingMissionId = id
+            }
        }
        .onDisappear {
            streamTask?.cancel()
+            pollingTask?.cancel()
        }
    }
    
+    // MARK: - Running Missions Bar
+    
+    private var runningMissionsBar: some View {
+        RunningMissionsBar(
+            runningMissions: runningMissions,
+            currentMission: currentMission,
+            viewingMissionId: viewingMissionId,
+            onSelectMission: { missionId in
+                Task { await switchToMission(id: missionId) }
+            },
+            onCancelMission: { missionId in
+                Task { await cancelMission(id: missionId) }
+            },
+            onRefresh: {
+                Task { await refreshRunningMissions() }
+            }
+        )
+        .transition(.move(edge: .top).combined(with: .opacity))
+    }
+    
    // MARK: - Background
    
    private var backgroundGlows: some View {
@@ -448,6 +530,7 @@ struct ControlView: View {
        do {
            if let mission = try await api.getCurrentMission() {
                currentMission = mission
+                viewingMissionId = mission.id
                messages = mission.history.enumerated().map { index, entry in
                    ChatMessage(
                        id: "\(mission.id)-\(index)",
@@ -467,28 +550,40 @@ struct ControlView: View {
    }
    
    private func loadMission(id: String) async {
+        // Set target immediately for race condition tracking
+        fetchingMissionId = id
+        
        isLoading = true
-        defer { isLoading = false }

        do {
-            let missions = try await api.listMissions()
-            if let mission = missions.first(where: { $0.id == id }) {
-                currentMission = mission
-                messages = mission.history.enumerated().map { index, entry in
-                    ChatMessage(
-                        id: "\(mission.id)-\(index)",
-                        type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
-                        content: entry.content
-                    )
-                }
-                HapticService.success()
-                
-                // Scroll to bottom after loading
-                DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
-                    shouldScrollToBottom = true
-                }
+            let mission = try await api.getMission(id: id)
+            
+            // Race condition guard: only update if this is still the mission we want
+            guard fetchingMissionId == id else {
+                return // Another mission was requested, discard this response
+            }
+            
+            currentMission = mission
+            viewingMissionId = mission.id
+            messages = mission.history.enumerated().map { index, entry in
+                ChatMessage(
+                    id: "\(mission.id)-\(index)",
+                    type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
+                    content: entry.content
+                )
+            }
+            isLoading = false
+            HapticService.success()
+            
+            // Scroll to bottom after loading
+            DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
+                shouldScrollToBottom = true
            }
        } catch {
+            // Race condition guard
+            guard fetchingMissionId == id else { return }
+            
+            isLoading = false
            print("Failed to load mission: \(error)")
        }
    }
@@ -497,7 +592,19 @@ struct ControlView: View {
        do {
            let mission = try await api.createMission()
            currentMission = mission
+            viewingMissionId = mission.id
            messages = []
+            
+            // Refresh running missions to show the new mission
+            await refreshRunningMissions()
+            
+            // Show the bar when creating new missions
+            if !showRunningMissions && !runningMissions.isEmpty {
+                withAnimation(.easeInOut(duration: 0.2)) {
+                    showRunningMissions = true
+                }
+            }
+            
            HapticService.success()
        } catch {
            print("Failed to create mission: \(error)")
@@ -518,6 +625,33 @@ struct ControlView: View {
        }
    }
    
+    private func resumeMission() async {
+        guard let mission = currentMission, mission.canResume else { return }
+        
+        do {
+            let resumed = try await api.resumeMission(id: mission.id)
+            currentMission = resumed
+            viewingMissionId = resumed.id
+            // Reload messages to get the resume prompt
+            messages = resumed.history.enumerated().map { index, entry in
+                ChatMessage(
+                    id: "\(resumed.id)-\(index)",
+                    type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
+                    content: entry.content
+                )
+            }
+            
+            // Refresh running missions
+            await refreshRunningMissions()
+            
+            HapticService.success()
+            shouldScrollToBottom = true
+        } catch {
+            print("Failed to resume mission: \(error)")
+            HapticService.error()
+        }
+    }
+    
    private func sendMessage() {
        let content = inputText.trimmingCharacters(in: .whitespacesAndNewlines)
        guard !content.isEmpty else { return }
@@ -553,25 +687,119 @@ struct ControlView: View {
        }
    }
    
+    // MARK: - Parallel Missions
+    
+    private func refreshRunningMissions() async {
+        do {
+            runningMissions = try await api.getRunningMissions()
+        } catch {
+            print("Failed to refresh running missions: \(error)")
+        }
+    }
+    
+    private func startPollingRunningMissions() {
+        pollingTask = Task {
+            while !Task.isCancelled {
+                try? await Task.sleep(for: .seconds(3))
+                guard !Task.isCancelled else { break }
+                await refreshRunningMissions()
+            }
+        }
+    }
+    
+    private func switchToMission(id: String) async {
+        guard id != viewingMissionId else { return }
+        
+        // Set the target mission ID immediately for race condition tracking
+        viewingMissionId = id
+        fetchingMissionId = id
+        
+        isLoading = true
+        
+        do {
+            // Load the mission from API
+            let mission = try await api.getMission(id: id)
+            
+            // Race condition guard: only update if this is still the mission we want
+            guard fetchingMissionId == id else {
+                return // Another mission was requested, discard this response
+            }
+            
+            // If this is not a parallel mission, also update currentMission
+            if runningMissions.contains(where: { $0.missionId == id }) {
+                // This is a parallel mission - just load its history
+                messages = mission.history.enumerated().map { index, entry in
+                    ChatMessage(
+                        id: "\(mission.id)-\(index)",
+                        type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
+                        content: entry.content
+                    )
+                }
+            } else {
+                // This is the main mission - load it fully
+                currentMission = mission
+                messages = mission.history.enumerated().map { index, entry in
+                    ChatMessage(
+                        id: "\(mission.id)-\(index)",
+                        type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
+                        content: entry.content
+                    )
+                }
+            }
+            
+            isLoading = false
+            HapticService.selectionChanged()
+            shouldScrollToBottom = true
+        } catch {
+            // Race condition guard: only show error if this is still the mission we want
+            guard fetchingMissionId == id else { return }
+            
+            isLoading = false
+            print("Failed to switch mission: \(error)")
+            HapticService.error()
+        }
+    }
+    
+    private func cancelMission(id: String) async {
+        do {
+            try await api.cancelMission(id: id)
+            
+            // Refresh running missions
+            await refreshRunningMissions()
+            
+            // If we were viewing this mission, switch to current
+            if viewingMissionId == id {
+                if let currentId = currentMission?.id {
+                    await switchToMission(id: currentId)
+                }
+            }
+            
+            HapticService.success()
+        } catch {
+            print("Failed to cancel mission: \(error)")
+            HapticService.error()
+        }
+    }
+    
    private func handleStreamEvent(type: String, data: [String: Any]) {
-        // Filter events by mission_id - only show events for the current mission
+        // Filter events by mission_id - only show events for the mission we're viewing
        // This prevents cross-mission contamination when parallel missions are running
        let eventMissionId = data["mission_id"] as? String
-        let currentMissionId = currentMission?.id
+        let viewingId = viewingMissionId
+        let currentId = currentMission?.id
        
        // Only allow status events from any mission (for global state)
-        // All other events must match the current mission
+        // All other events must match the mission we're viewing
        if type != "status" {
            if let eventId = eventMissionId {
-                // Event has a mission_id - must match current mission
-                if eventId != currentMissionId {
+                // Event has a mission_id - must match viewing mission
+                if eventId != viewingId {
                    return // Skip events from other missions
                }
-            } else if currentMissionId != nil {
+            } else if viewingId != nil && viewingId != currentId {
                // Event has NO mission_id (from main session)
-                // This is fine if we're on the current/main mission
-                // But we can't verify, so allow it for now
-                // TODO: Backend should always include mission_id
+                // Skip if we're viewing a different (parallel) mission
+                return
            }
        }
        
--- a/ios_dashboard/OpenAgentDashboard/Views/Files/FilesView.swift
+++ b/ios_dashboard/OpenAgentDashboard/Views/Files/FilesView.swift
@@ -22,6 +22,9 @@ struct FilesView: View {
    @State private var newFolderName = ""
    @State private var isImporting = false
    
+    // Track pending path fetch to prevent race conditions
+    @State private var fetchingPath: String?
+    
    private let api = APIService.shared
    
    private var sortedEntries: [FileEntry] {
@@ -360,16 +363,32 @@ struct FilesView: View {
    // MARK: - Actions
    
    private func loadDirectory() async {
+        let pathToLoad = currentPath
+        fetchingPath = pathToLoad
+        
        isLoading = true
        errorMessage = nil
        
        do {
-            entries = try await api.listDirectory(path: currentPath)
+            let result = try await api.listDirectory(path: pathToLoad)
+            
+            // Race condition guard: only update if this is still the path we want
+            guard fetchingPath == pathToLoad else {
+                return // Navigation changed, discard this response
+            }
+            
+            entries = result
        } catch {
+            // Race condition guard
+            guard fetchingPath == pathToLoad else { return }
+            
            errorMessage = error.localizedDescription
        }
        
-        isLoading = false
+        // Only clear loading if this is still the current fetch
+        if fetchingPath == pathToLoad {
+            isLoading = false
+        }
    }
    
    private func navigateTo(_ path: String) {
--- a/ios_dashboard/OpenAgentDashboard/Views/History/HistoryView.swift
+++ b/ios_dashboard/OpenAgentDashboard/Views/History/HistoryView.swift
@@ -22,15 +22,17 @@ struct HistoryView: View {
    enum StatusFilter: String, CaseIterable {
        case all = "All"
        case active = "Active"
+        case interrupted = "Interrupted"
        case completed = "Completed"
        case failed = "Failed"
        
-        var missionStatus: MissionStatus? {
+        var missionStatuses: [MissionStatus]? {
            switch self {
            case .all: return nil
-            case .active: return .active
-            case .completed: return .completed
-            case .failed: return .failed
+            case .active: return [.active]
+            case .interrupted: return [.interrupted, .blocked]
+            case .completed: return [.completed]
+            case .failed: return [.failed, .notFeasible]
            }
        }
    }
@@ -38,7 +40,7 @@ struct HistoryView: View {
    private var filteredMissions: [Mission] {
        missions.filter { mission in
            // Filter by status
-            if let statusFilter = selectedFilter.missionStatus, mission.status != statusFilter {
+            if let statuses = selectedFilter.missionStatuses, !statuses.contains(mission.status) {
                return false
            }
            
@@ -257,11 +259,11 @@ private struct MissionRow: View {
    var body: some View {
        HStack(spacing: 14) {
            // Icon
-            Image(systemName: "target")
+            Image(systemName: mission.canResume ? "play.circle" : "target")
                .font(.title3)
-                .foregroundStyle(Theme.accent)
+                .foregroundStyle(mission.canResume ? Theme.warning : Theme.accent)
                .frame(width: 40, height: 40)
-                .background(Theme.accent.opacity(0.15))
+                .background((mission.canResume ? Theme.warning : Theme.accent).opacity(0.15))
                .clipShape(RoundedRectangle(cornerRadius: 10, style: .continuous))
            
            // Content
@@ -274,9 +276,23 @@ private struct MissionRow: View {
                HStack(spacing: 8) {
                    StatusBadge(status: mission.status.statusType, compact: true)
                    
+                    if mission.canResume {
+                        Text("Resumable")
+                            .font(.caption2.weight(.medium))
+                            .foregroundStyle(Theme.warning)
+                    }
+                    
                    Text("\(mission.history.count) messages")
                        .font(.caption)
                        .foregroundStyle(Theme.textTertiary)
+                    
+                    if let model = mission.displayModel {
+                        Text("•")
+                            .foregroundStyle(Theme.textMuted)
+                        Text(model)
+                            .font(.caption2.monospaced())
+                            .foregroundStyle(Theme.textTertiary)
+                    }
                }
            }
            
@@ -300,7 +316,7 @@ private struct MissionRow: View {
        .clipShape(RoundedRectangle(cornerRadius: 14, style: .continuous))
        .overlay(
            RoundedRectangle(cornerRadius: 14, style: .continuous)
-                .stroke(Theme.border, lineWidth: 0.5)
+                .stroke(mission.canResume ? Theme.warning.opacity(0.3) : Theme.border, lineWidth: mission.canResume ? 1 : 0.5)
        )
    }
 }
--- a/ios_dashboard/README.md
+++ b/ios_dashboard/README.md
@@ -5,10 +5,27 @@ Native iOS dashboard for Open Agent with **Liquid Glass** design language.
 ## Features

 - **Control** - Chat interface with the AI agent, real-time streaming
- **History** - View past missions, tasks, and runs
+- **History** - View past missions with filtering (active, interrupted, completed, failed)
 - **Terminal** - SSH console via WebSocket
 - **Files** - Remote file explorer with upload/download

+### Mission Management
+
+- Create new missions with optional model override
+- Resume interrupted or blocked missions
+- Mark missions as completed/failed
+- View mission status (active, completed, failed, interrupted, blocked, not_feasible)
+- Model override display per mission
+
+### Parallel Missions
+
+- View all running missions in a compact horizontal bar
+- Switch between parallel missions with a single tap
+- Real-time status indicators (running, stalled, severely stalled)
+- Cancel running missions directly from the bar
+- Automatic polling for running mission updates (every 3s)
+- SSE event filtering by mission_id to prevent cross-contamination
+
 ## Design System

 Built with "Quiet Luxury + Liquid Glass" aesthetic:
@@ -77,6 +94,12 @@ ios_dashboard/
 │   │   ├── Terminal/           # SSH console
 │   │   ├── Files/              # File explorer
 │   │   └── Components/         # Reusable UI
+│   │       ├── GlassButton.swift
+│   │       ├── GlassCard.swift
+│   │       ├── StatusBadge.swift
+│   │       ├── LoadingView.swift
+│   │       ├── RunningMissionsBar.swift   # Parallel missions UI
+│   │       └── ToolUI/         # Tool UI components
 │   └── Assets.xcassets/
 └── OpenAgentDashboard.xcodeproj/
 ```
--- a/models_with_benchmarks.json
+++ b/models_with_benchmarks.json
--- a/scripts/merge_benchmarks.py
+++ b/scripts/merge_benchmarks.py
@@ -76,6 +76,9 @@ MODEL_FAMILY_PATTERNS = [
    (r"^anthropic/claude-(\d+\.?\d*)-haiku$", "claude-haiku", "fast"),
    
    # OpenAI GPT
+    (r"^openai/gpt-5\.2-pro$", "gpt-5-pro", "flagship"),
+    (r"^openai/gpt-5\.2$", "gpt-5", "mid"),
+    (r"^openai/gpt-5\.2-chat$", "gpt-5", "mid"),
    (r"^openai/gpt-4\.1$", "gpt-4", "mid"),
    (r"^openai/gpt-4o$", "gpt-4", "mid"),
    (r"^openai/gpt-4-turbo", "gpt-4", "mid"),
@@ -110,6 +113,8 @@ MODEL_FAMILY_PATTERNS = [
    # Qwen
    (r"^qwen/qwen-2\.5-72b", "qwen-72b", "mid"),
    (r"^qwen/qwq-32b", "qwq", "mid"),
+    (r"^qwen/qwen3-next-80b.*thinking", "qwen3-thinking", "flagship"),
+    (r"^qwen/qwen3-235b.*instruct", "qwen3-instruct", "mid"),
 ]

 HEADERS = {
--- a/src/agents/leaf/complexity.rs
+++ b/src/agents/leaf/complexity.rs
@@ -1,409 +0,0 @@
-//! Complexity estimation agent.
-//!
-//! Analyzes a task description and estimates:
-//! - Complexity score (0-1)
-//! - Whether to split into subtasks
-//! - Estimated token count
-//!
-//! ## Learning Integration
-//! When memory is available, the estimator queries similar past tasks
-//! and adjusts predictions based on historical actual token usage.
-
-use async_trait::async_trait;
-use serde_json::json;
-
-use crate::agents::{
-    Agent, AgentContext, AgentId, AgentResult, AgentType, Complexity, LeafAgent, LeafCapability,
-};
-use crate::llm::{ChatMessage, ChatOptions, Role};
-use crate::memory::HistoricalContext;
-use crate::task::Task;
-
-/// Agent that estimates task complexity.
-/// 
-/// # Purpose
-/// Given a task description, estimate how complex it is and whether
-/// it should be split into subtasks.
-/// 
-/// # Algorithm
-/// 1. Send task description to LLM with complexity evaluation prompt
-/// 2. Parse LLM response for complexity score and reasoning
-/// 3. Return structured Complexity object
-pub struct ComplexityEstimator {
-    id: AgentId,
-    prompt_variant: ComplexityPromptVariant,
-    split_threshold: f64,
-    token_multiplier: f64,
-}
-
-/// Prompt variants for complexity estimation.
-///
-/// We keep this as an enum (not free-form strings) so we can:
-/// - A/B test variants deterministically
-/// - Store tuned choice as a stable symbol in config
-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum ComplexityPromptVariant {
-    /// Short rubric-based prompt (fast).
-    RubricV1,
-    /// More explicit calibration prompt encouraging realistic token estimates.
-    CalibratedV2,
-}
-
-impl ComplexityEstimator {
-    /// Create a new complexity estimator.
-    pub fn new() -> Self {
-        Self {
-            id: AgentId::new(),
-            prompt_variant: ComplexityPromptVariant::CalibratedV2,
-            split_threshold: 0.6,
-            token_multiplier: 1.0,
-        }
-    }
-
-    /// Create a custom estimator (used by calibration harness).
-    pub fn with_params(
-        prompt_variant: ComplexityPromptVariant,
-        split_threshold: f64,
-        token_multiplier: f64,
-    ) -> Self {
-        Self {
-            id: AgentId::new(),
-            prompt_variant,
-            split_threshold: split_threshold.clamp(0.0, 1.0),
-            token_multiplier: token_multiplier.max(0.1),
-        }
-    }
-
-    /// Prompt template for complexity estimation.
-    /// 
-    /// # Response Format
-    /// LLM should respond with JSON containing:
-    /// - score: float 0-1
-    /// - reasoning: string explanation
-    fn build_prompt(&self, task: &Task) -> String {
-        match self.prompt_variant {
-            ComplexityPromptVariant::RubricV1 => format!(
-                r#"You are a task complexity analyzer.
-
-Task: {task}
-
-Respond with ONLY a JSON object:
-{{
-  "score": <float 0..1>,
-  "reasoning": <string>,
-  "estimated_tokens": <int>
-}}
-
-Rubric for score:
- 0.0-0.2: Trivial
- 0.2-0.4: Simple
- 0.4-0.6: Moderate
- 0.6-0.8: Complex
- 0.8-1.0: Very Complex"#,
-                task = task.description()
-            ),
-            ComplexityPromptVariant::CalibratedV2 => format!(
-                r#"You are a task complexity analyzer. Your goal is to estimate:
-1) a complexity score in [0, 1]
-2) a realistic token budget estimate for completing the task end-to-end using an LLM with tools.
-
-Task: {task}
-
-Important: \"estimated_tokens\" should reflect TOTAL tokens (input + output) across multiple turns, including:
- planning / reasoning
- tool call arguments and tool outputs
- iterative fixes and retries
-
-Respond with ONLY a JSON object:
-{{
-  \"score\": <float 0..1>,
-  \"reasoning\": <string>,
-  \"estimated_tokens\": <int>
-}}
-
-Rubric for score:
- 0.0-0.2: Trivial (single tool call)
- 0.2-0.4: Simple (1-3 tool calls)
- 0.4-0.6: Moderate (3-8 tool calls)
- 0.6-0.8: Complex (multi-file, tests, iterations)
- 0.8-1.0: Very Complex (architecture, significant refactor)"#,
-                task = task.description()
-            ),
-        }
-    }
-
-    /// Parse LLM response into Complexity struct.
-    /// 
-    /// # Postconditions
-    /// - Returns valid Complexity with score in [0, 1]
-    /// - Falls back to moderate complexity on parse error
-    fn parse_response(&self, response: &str) -> Complexity {
-        // Try to parse as JSON
-        if let Ok(json) = serde_json::from_str::<serde_json::Value>(response) {
-            let score = json["score"].as_f64().unwrap_or(0.5);
-            let reasoning = json["reasoning"].as_str().unwrap_or("No reasoning provided");
-            let estimated_tokens = json["estimated_tokens"].as_u64().unwrap_or(2000);
-            
-            return Complexity::new(score, reasoning, estimated_tokens);
-        }
-
-        // Try to extract score from text
-        if let Some(score) = self.extract_score_from_text(response) {
-            return Complexity::new(score, response, 2000);
-        }
-
-        // Default to moderate complexity
-        Complexity::moderate("Could not parse complexity response")
-    }
-
-    /// Try to extract a score from free-form text.
-    fn extract_score_from_text(&self, text: &str) -> Option<f64> {
-        // Look for patterns like "0.5" or "score: 0.5" or "50%"
-        let text_lower = text.to_lowercase();
-        
-        // Check for keywords
-        if text_lower.contains("trivial") || text_lower.contains("very simple") {
-            return Some(0.1);
-        }
-        if text_lower.contains("very complex") || text_lower.contains("extremely") {
-            return Some(0.9);
-        }
-        if text_lower.contains("complex") {
-            return Some(0.7);
-        }
-        if text_lower.contains("moderate") || text_lower.contains("medium") {
-            return Some(0.5);
-        }
-        if text_lower.contains("simple") || text_lower.contains("easy") {
-            return Some(0.3);
-        }
-
-        None
-    }
-
-    /// Query historical context for similar tasks.
-    /// 
-    /// Returns adjustment multipliers based on past actual vs predicted values.
-    async fn get_historical_adjustments(
-        &self,
-        task_description: &str,
-        ctx: &AgentContext,
-    ) -> Option<HistoricalContext> {
-        let memory = ctx.memory.as_ref()?;
-        
-        match memory.retriever.get_historical_context(task_description, 5).await {
-            Ok(context) => {
-                if let Some(ref hist) = context {
-                    tracing::debug!(
-                        "Historical context found: {} similar tasks, avg token ratio: {:.2}, success rate: {:.2}",
-                        hist.similar_outcomes.len(),
-                        hist.avg_token_multiplier,
-                        hist.similar_success_rate
-                    );
-                }
-                context
-            }
-            Err(e) => {
-                tracing::warn!("Failed to fetch historical context: {}", e);
-                None
-            }
-        }
-    }
-
-    /// Adjust token estimate based on historical data.
-    /// 
-    /// If similar past tasks consistently used more/fewer tokens than predicted,
-    /// we adjust our estimate accordingly.
-    fn apply_historical_adjustment(
-        &self,
-        base_tokens: u64,
-        historical: Option<&HistoricalContext>,
-    ) -> u64 {
-        match historical {
-            Some(hist) if hist.similar_outcomes.len() >= 2 => {
-                // Apply the historical token multiplier (clamped to reasonable range)
-                let multiplier = hist.avg_token_multiplier.clamp(0.5, 3.0);
-                let adjusted = (base_tokens as f64 * multiplier).round() as u64;
-                
-                tracing::debug!(
-                    "Adjusted token estimate: {} -> {} (multiplier: {:.2})",
-                    base_tokens, adjusted, multiplier
-                );
-                
-                adjusted
-            }
-            _ => base_tokens,
-        }
-    }
-}
-
-impl Default for ComplexityEstimator {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[async_trait]
-impl Agent for ComplexityEstimator {
-    fn id(&self) -> &AgentId {
-        &self.id
-    }
-
-    fn agent_type(&self) -> AgentType {
-        AgentType::ComplexityEstimator
-    }
-
-    fn description(&self) -> &str {
-        "Estimates task complexity and recommends splitting strategy"
-    }
-
-    /// Estimate complexity of a task.
-    /// 
-    /// # Returns
-    /// AgentResult with Complexity data in the `data` field.
-    /// 
-    /// # Learning Integration
-    /// When memory is available, queries similar past tasks to adjust predictions
-    /// based on actual historical token usage.
-    async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
-        // Query historical context for similar tasks (if memory available)
-        let historical = self.get_historical_adjustments(task.description(), ctx).await;
-        
-        let prompt = self.build_prompt(task);
-        
-        let messages = vec![
-            ChatMessage::new(Role::System, "You are a precise task complexity analyzer. Respond only with JSON."),
-            ChatMessage::new(Role::User, prompt),
-        ];
-
-        // Use a fast, cheap model for complexity estimation
-        let model = "openai/gpt-4.1-mini";
-        
-        let pricing = ctx.pricing.get_pricing(model).await;
-        let options = ChatOptions {
-            temperature: Some(0.0),
-            top_p: None,
-            max_tokens: Some(400),
-        };
-
-        match ctx
-            .llm
-            .chat_completion_with_options(model, &messages, None, options)
-            .await
-        {
-            Ok(response) => {
-                let content = response.content.unwrap_or_default();
-                let parsed = self.parse_response(&content);
-
-                // Apply calibrated adjustments (pure post-processing).
-                let base_tokens = ((parsed.estimated_tokens() as f64) * self.token_multiplier)
-                    .round()
-                    .max(1.0) as u64;
-                
-                // Apply historical adjustment if we have relevant data
-                let adjusted_tokens = self.apply_historical_adjustment(base_tokens, historical.as_ref());
-                
-                let should_split = parsed.score() > self.split_threshold;
-                let complexity = Complexity::new(parsed.score(), parsed.reasoning(), adjusted_tokens)
-                    .with_split(should_split);
-                
-                // Record analysis on the task
-                {
-                    let a = task.analysis_mut();
-                    a.complexity_score = Some(complexity.score());
-                    a.complexity_reasoning = Some(complexity.reasoning().to_string());
-                    a.should_split = Some(complexity.should_split());
-                    a.estimated_total_tokens = Some(complexity.estimated_tokens());
-                }
-
-                // Compute cost (if usage + pricing available)
-                let cost_cents = match (&response.usage, &pricing) {
-                    (Some(u), Some(p)) => p.calculate_cost_cents(u.prompt_tokens, u.completion_tokens),
-                    _ => 1, // fallback tiny cost
-                };
-                
-                // Build historical info for response data
-                let historical_info = historical.as_ref().map(|h| json!({
-                    "similar_tasks_found": h.similar_outcomes.len(),
-                    "avg_token_multiplier": h.avg_token_multiplier,
-                    "avg_cost_multiplier": h.avg_cost_multiplier,
-                    "similar_success_rate": h.similar_success_rate,
-                }));
-                
-                AgentResult::success(
-                    format!(
-                        "Complexity: {:.2} - {}{}",
-                        complexity.score(),
-                        if complexity.should_split() { "Should split" } else { "Execute directly" },
-                        if historical.is_some() { " (adjusted from history)" } else { "" }
-                    ),
-                    cost_cents,
-                )
-                .with_model(model)
-                .with_data(json!({
-                    "score": complexity.score(),
-                    "reasoning": complexity.reasoning(),
-                    "should_split": complexity.should_split(),
-                    "estimated_tokens": complexity.estimated_tokens(),
-                    "base_tokens_before_history": base_tokens,
-                    "historical_adjustment": historical_info,
-                    "usage": response.usage.as_ref().map(|u| json!({
-                        "prompt_tokens": u.prompt_tokens,
-                        "completion_tokens": u.completion_tokens,
-                        "total_tokens": u.total_tokens
-                    })),
-                }))
-            }
-            Err(e) => {
-                // On error, return moderate complexity as fallback
-                let fallback = Complexity::moderate(format!("LLM error, using fallback: {}", e));
-                
-                AgentResult::success(
-                    "Using fallback complexity estimate due to LLM error",
-                    0,
-                )
-                .with_data(json!({
-                    "score": fallback.score(),
-                    "reasoning": fallback.reasoning(),
-                    "should_split": fallback.should_split(),
-                    "estimated_tokens": fallback.estimated_tokens(),
-                    "fallback": true,
-                }))
-            }
-        }
-    }
-}
-
-impl LeafAgent for ComplexityEstimator {
-    fn capability(&self) -> LeafCapability {
-        LeafCapability::ComplexityEstimation
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_parse_json_response() {
-        let estimator = ComplexityEstimator::new();
-        
-        let json_response = r#"{"score": 0.7, "reasoning": "Complex task", "estimated_tokens": 3000, "should_split": true}"#;
-        let complexity = estimator.parse_response(json_response);
-        
-        assert!((complexity.score() - 0.7).abs() < 0.01);
-        assert!(complexity.should_split());
-    }
-
-    #[test]
-    fn test_parse_text_response() {
-        let estimator = ComplexityEstimator::new();
-        
-        let text_response = "This is a very complex task";
-        let complexity = estimator.parse_response(text_response);
-        
-        assert!(complexity.score() > 0.6);
-    }
-}
-
--- a/src/agents/leaf/executor.rs
+++ b/src/agents/leaf/executor.rs
@@ -19,7 +19,7 @@ use serde_json::json;
 use std::path::Path;

 use crate::agents::{
-    Agent, AgentContext, AgentId, AgentResult, AgentType, LeafAgent, LeafCapability,
+    Agent, AgentContext, AgentId, AgentResult, AgentType, LeafAgent, LeafCapability, TerminalReason,
 };
 use crate::api::control::{AgentEvent, ControlRunState};
 use crate::budget::ExecutionSignals;
@@ -43,6 +43,8 @@ pub struct ExecutionLoopResult {
    pub signals: ExecutionSignals,
    /// Whether execution succeeded
    pub success: bool,
+    /// Why execution terminated (if not successful completion)
+    pub terminal_reason: Option<TerminalReason>,
 }

 /// Agent that executes tasks using tools.
@@ -60,6 +62,33 @@ pub struct TaskExecutor {
    id: AgentId,
 }

+/// Categorize a tool name into a broader approach category.
+/// Used for tracking repeated failures of similar approaches.
+fn categorize_tool(tool_name: &str) -> String {
+    match tool_name {
+        // Static analysis tools
+        name if name.contains("slither") || name.contains("mythril") || 
+                name.contains("solhint") || name.contains("echidna") => "static_analysis".to_string(),
+        
+        // Code execution/compilation
+        "run_command" => "shell_command".to_string(),
+        name if name.contains("compile") || name.contains("build") => "compilation".to_string(),
+        
+        // File operations
+        "read_file" | "write_file" | "list_directory" | "search_files" => "file_ops".to_string(),
+        
+        // Network/API calls
+        name if name.contains("browser") || name.contains("http") || 
+                name.contains("fetch") || name.contains("curl") => "network".to_string(),
+        
+        // Git operations
+        name if name.contains("git") || name.contains("clone") => "git".to_string(),
+        
+        // Default: use the tool name itself
+        _ => tool_name.to_string(),
+    }
+}
+
 impl TaskExecutor {
    /// Create a new task executor.
    pub fn new() -> Self {
@@ -558,7 +587,54 @@ Use `search_memory` when you encounter a problem you might have solved before or
 4. **Explicit completion** — Use complete_mission tool when the goal is fully achieved
 5. **Failure acknowledgment** — If you cannot complete, explain why and call complete_mission with failed status
 6. **No silent exits** — Every execution should end with either a deliverable or an explanation
-7. **Large files in chunks** — If writing files >2000 chars, verify content isn't truncated"#,
+7. **Large files in chunks** — If writing files >2000 chars, verify content isn't truncated
+
+## ⚠️ CRITICAL: Blocker Detection (STOP if these occur!)
+
+**If you encounter ANY blocker, STOP IMMEDIATELY and report it. DO NOT produce placeholder content.**
+
+### Type Mismatch Blockers
+| Requested | But Target Is | Action |
+|-----------|---------------|--------|
+| Solidity/Smart Contract audit | C++/Rust/Go project | STOP → `complete_mission(blocked, "Target is C++/Rust/Go, not Solidity")` |
+| Python analysis | Java/JavaScript project | STOP → `complete_mission(blocked, "Target is Java/JS, not Python")` |
+| Web scraping | Desktop app | STOP → `complete_mission(blocked, "Target is desktop app, not website")` |
+
+**How to detect project types:**
+- **Solidity**: `.sol` files, `hardhat.config.js`, `truffle-config.js`, `foundry.toml`
+- **C++ (Bitcoin forks)**: `configure.ac`, `Makefile.am`, `src/*.cpp`, `src/*.h`
+- **Rust**: `Cargo.toml`, `src/*.rs`
+- **Go**: `go.mod`, `*.go` files
+
+### Access/Resource Blockers
+| Blocker | Action |
+|---------|--------|
+| Can't clone/access repository | STOP → report exact error |
+| Can't fetch contract bytecode | STOP → report RPC error and address |
+| Required tool won't install | STOP → report installation error |
+| Source code not available | TRY bytecode analysis first, then report if still blocked |
+
+### Smart Contract Audit Specific
+**When auditing contracts WITHOUT source code:**
+1. FIRST try fetching bytecode: `cast code <address> --rpc-url <rpc>`
+2. THEN decompile: Use `heimdall`, `panoramix`, `dedaub`
+3. ONLY report "blocked" if bytecode analysis also fails
+
+**Chain RPCs:**
+- Ethereum: `https://eth.llamarpc.com`
+- BSC: `https://bsc-dataseed.binance.org`
+- Polygon: `https://polygon-rpc.com`
+- Merlin: `https://rpc.merlinchain.io`
+
+## 🚫 NEVER DO THESE
+
+1. **NEVER create "example" or "illustrative" content** as substitute for real analysis
+2. **NEVER analyze unrelated code** (e.g., library code instead of target contracts)
+3. **NEVER produce generic filler** (e.g., "SQL injection" in a smart contract audit)
+4. **NEVER frame placeholder content as real analysis**
+5. **NEVER mark "completed" if you analyzed substitute targets**
+
+If you cannot perform the requested analysis, use `complete_mission(blocked, reason)` and explain clearly what blocked you."#,
            session_metadata = session_metadata,
            memory_context = memory_context,
            working_dir = working_dir,
@@ -622,6 +698,15 @@ Use `search_memory` when you encounter a problem you might have solved before or
        let mut has_error_messages = false;
        let mut iterations_completed = 0u32;
        
+        // Track consecutive empty/reasoning-only responses (P0 fix for agent stalls)
+        let mut empty_response_count: u32 = 0;
+        const EMPTY_RESPONSE_WARNING_THRESHOLD: u32 = 2;
+        const EMPTY_RESPONSE_FORCE_COMPLETE_THRESHOLD: u32 = 4;
+        
+        // Track failed tool attempts by category (P3 fix for approach looping)
+        let mut failed_tool_attempts: std::collections::HashMap<String, u32> = std::collections::HashMap::new();
+        const TOOL_FAILURE_THRESHOLD: u32 = 3;
+        
        // Track uploaded images that need to be included in the response
        // When upload_image succeeds, we store the (url, markdown) so we can warn
        // the agent if they try to complete without including the images.
@@ -701,8 +786,25 @@ Use `search_memory` when you encounter a problem you might have solved before or
            tracing::info!("Browser tools available: {:?}", browser_tools);
        }
        
-        tracing::info!("Discovered {} built-in tools, {} MCP tools", builtin_count, mcp_tool_schemas.len());
-        tool_schemas.extend(mcp_tool_schemas);
+        // Filter out MCP tools that conflict with built-in tools (built-in takes precedence)
+        let builtin_names: std::collections::HashSet<_> = tool_schemas.iter().map(|t| t.function.name.as_str()).collect();
+        let mcp_count_before = mcp_tool_schemas.len();
+        let filtered_mcp: Vec<_> = mcp_tool_schemas
+            .into_iter()
+            .filter(|t| {
+                if builtin_names.contains(t.function.name.as_str()) {
+                    tracing::debug!("Skipping MCP tool '{}' - conflicts with built-in tool", t.function.name);
+                    false
+                } else {
+                    true
+                }
+            })
+            .collect();
+        let mcp_skipped = mcp_count_before - filtered_mcp.len();
+        
+        tracing::info!("Discovered {} built-in tools, {} MCP tools ({} skipped due to conflicts)", 
+            builtin_count, filtered_mcp.len(), mcp_skipped);
+        tool_schemas.extend(filtered_mcp);

        // Agent loop
        for iteration in 0..ctx.max_iterations {
@@ -734,6 +836,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
                        usage,
                        signals,
                        success: false,
+                        terminal_reason: Some(TerminalReason::Cancelled),
                    };
                }
            }
@@ -762,13 +865,19 @@ Use `search_memory` when you encounter a problem you might have solved before or
                    usage,
                    signals,
                    success: false,
+                    terminal_reason: Some(TerminalReason::BudgetExhausted),
                };
            }

-            // Call LLM
-            let response = match ctx.llm.chat_completion(model, &messages, Some(&tool_schemas)).await {
-                Ok(r) => r,
-                Err(e) => {
+            // Call LLM with timeout (P2 fix: detect hangs)
+            const LLM_TIMEOUT_SECS: u64 = 300; // 5 minutes max per LLM call
+            let llm_future = ctx.llm.chat_completion(model, &messages, Some(&tool_schemas));
+            let response = match tokio::time::timeout(
+                std::time::Duration::from_secs(LLM_TIMEOUT_SECS),
+                llm_future
+            ).await {
+                Ok(Ok(r)) => r,
+                Ok(Err(e)) => {
                    has_error_messages = true;
                    let error_msg = format!("LLM error: {}", e);
                    let signals = ExecutionSignals {
@@ -792,6 +901,45 @@ Use `search_memory` when you encounter a problem you might have solved before or
                        usage,
                        signals,
                        success: false,
+                        terminal_reason: Some(TerminalReason::LlmError),
+                    };
+                }
+                Err(_timeout) => {
+                    // P2 FIX: LLM call timed out - return with partial results
+                    has_error_messages = true;
+                    tracing::error!(
+                        "LLM call timed out after {} seconds at iteration {}",
+                        LLM_TIMEOUT_SECS,
+                        iterations_completed
+                    );
+                    let signals = ExecutionSignals {
+                        iterations: iterations_completed,
+                        max_iterations: ctx.max_iterations as u32,
+                        successful_tool_calls,
+                        failed_tool_calls,
+                        files_modified,
+                        repetitive_actions,
+                        has_error_messages,
+                        partial_progress: files_modified || successful_tool_calls > 0,
+                        cost_spent_cents: total_cost_cents,
+                        budget_total_cents: task.budget().total_cents(),
+                        final_output: format!(
+                            "LLM call timed out after {} seconds. Partial results may be in working directory.",
+                            LLM_TIMEOUT_SECS
+                        ),
+                        model_used: model.to_string(),
+                    };
+                    return ExecutionLoopResult {
+                        output: format!(
+                            "Agent stalled: LLM call timed out after {} seconds. Check working directory for partial results.",
+                            LLM_TIMEOUT_SECS
+                        ),
+                        cost_cents: total_cost_cents,
+                        tool_log,
+                        usage,
+                        signals,
+                        success: false,
+                        terminal_reason: Some(TerminalReason::Stalled),
                    };
                }
            };
@@ -907,6 +1055,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
                                usage,
                                signals,
                                success: false,
+                                terminal_reason: Some(TerminalReason::InfiniteLoop),
                            };
                        }
                        
@@ -999,6 +1148,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
                                                        usage,
                                                        signals,
                                                        success: false,
+                                                        terminal_reason: Some(TerminalReason::Cancelled),
                                                    };
                                                }
                                            }
@@ -1069,7 +1219,28 @@ Use `search_memory` when you encounter a problem you might have solved before or
                                    Err(e) => {
                                        failed_tool_calls += 1;
                                        has_error_messages = true;
-                                        let s = format!("Error: {}", e);
+                                        
+                                        // P3 FIX: Track failed approaches by tool category
+                                        let tool_category = categorize_tool(&tool_name);
+                                        let count = failed_tool_attempts.entry(tool_category.clone()).or_insert(0);
+                                        *count += 1;
+                                        
+                                        let s = if *count >= TOOL_FAILURE_THRESHOLD {
+                                            tracing::warn!(
+                                                "Tool category '{}' has failed {} times - suggesting pivot",
+                                                tool_category,
+                                                *count
+                                            );
+                                            format!(
+                                                "Error: {}\n\n[SYSTEM NOTE: The '{}' approach has failed {} times. \
+                                                Consider: 1) Try a completely different tool/approach, \
+                                                2) Analyze what you DO have and produce partial results, \
+                                                3) Call complete_mission(blocked) if fundamentally stuck]",
+                                                e, tool_category, *count
+                                            )
+                                        } else {
+                                            format!("Error: {}", e)
+                                        };
                                        (s.clone(), serde_json::Value::String(s))
                                    }
                                }
@@ -1205,14 +1376,92 @@ Use `search_memory` when you encounter a problem you might have solved before or
                                usage,
                                signals,
                                success: true,
+                                terminal_reason: None,
                            };
                        }
                    }

+                    // Reset empty response counter on successful tool execution
+                    empty_response_count = 0;
                    continue;
                }
            }

+            // P0 FIX: Handle reasoning-only responses (no tool calls, no/empty content)
+            // This prevents the agent from stalling when the LLM returns only thinking
+            let has_reasoning = response.reasoning.as_ref().map(|r| !r.is_empty()).unwrap_or(false);
+            let has_content = response.content.as_ref().map(|c| !c.trim().is_empty()).unwrap_or(false);
+            let has_tool_calls = response.tool_calls.as_ref().map(|tc| !tc.is_empty()).unwrap_or(false);
+            
+            if !has_tool_calls && !has_content {
+                empty_response_count += 1;
+                tracing::warn!(
+                    "Empty/reasoning-only response #{} (has_reasoning: {}, iteration: {})",
+                    empty_response_count,
+                    has_reasoning,
+                    iterations_completed
+                );
+                
+                // Force completion if too many empty responses
+                if empty_response_count >= EMPTY_RESPONSE_FORCE_COMPLETE_THRESHOLD {
+                    tracing::error!(
+                        "Force completing: {} consecutive empty/reasoning-only responses",
+                        empty_response_count
+                    );
+                    has_error_messages = true;
+                    let signals = ExecutionSignals {
+                        iterations: iterations_completed,
+                        max_iterations: ctx.max_iterations as u32,
+                        successful_tool_calls,
+                        failed_tool_calls,
+                        files_modified,
+                        repetitive_actions,
+                        has_error_messages,
+                        partial_progress: files_modified || successful_tool_calls > 0,
+                        cost_spent_cents: total_cost_cents,
+                        budget_total_cents: task.budget().total_cents(),
+                        final_output: format!(
+                            "Agent stalled: {} consecutive responses without action. Partial results may be in working directory.",
+                            empty_response_count
+                        ),
+                        model_used: model.to_string(),
+                    };
+                    return ExecutionLoopResult {
+                        output: format!(
+                            "Agent stalled after {} responses without taking action. Check working directory for partial results.",
+                            empty_response_count
+                        ),
+                        cost_cents: total_cost_cents,
+                        tool_log,
+                        usage,
+                        signals,
+                        success: false,
+                        terminal_reason: Some(TerminalReason::Stalled),
+                    };
+                }
+                
+                // Inject a prompt to get the model to take action
+                if empty_response_count >= EMPTY_RESPONSE_WARNING_THRESHOLD {
+                    messages.push(ChatMessage::new(
+                        Role::User,
+                        format!(
+                            "[SYSTEM WARNING] You've returned {} responses without taking any action (only thinking/reasoning).\n\n\
+                            You MUST now do ONE of:\n\
+                            1. Call a tool to continue working on the task\n\
+                            2. Provide a complete final response summarizing your work\n\
+                            3. Call complete_mission with status='completed' if done, or status='blocked' if stuck\n\n\
+                            DO NOT respond with only thinking - take concrete action NOW.",
+                            empty_response_count
+                        )
+                    ));
+                }
+                
+                continue; // Retry - let the model try again with the warning
+            }
+            
+            // If we reach here with content, it's the final response
+            // (no need to reset empty_response_count since we're returning)
+
            // No tool calls - final response
            if let Some(content) = response.content.filter(|c| !c.trim().is_empty()) {
                let signals = ExecutionSignals {
@@ -1236,6 +1485,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
                    usage,
                    signals,
                    success: true,
+                    terminal_reason: None,
                };
            }

@@ -1262,6 +1512,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
                usage,
                signals,
                success: false,
+                terminal_reason: Some(TerminalReason::LlmError),
            };
        }

@@ -1287,6 +1538,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
            usage,
            signals,
            success: false,
+            terminal_reason: Some(TerminalReason::MaxIterations),
        }
    }
 }
@@ -1350,6 +1602,11 @@ impl Agent for TaskExecutor {
            AgentResult::failure(&result.output, result.cost_cents)
        };

+        // Propagate terminal reason from execution loop
+        if let Some(reason) = result.terminal_reason {
+            agent_result = agent_result.with_terminal_reason(reason);
+        }
+
        agent_result = agent_result
            .with_model(model)
            .with_data(json!({
@@ -1415,6 +1672,11 @@ impl TaskExecutor {
            AgentResult::failure(&result.output, result.cost_cents)
        };

+        // Propagate terminal reason from execution loop
+        if let Some(reason) = result.terminal_reason {
+            agent_result = agent_result.with_terminal_reason(reason);
+        }
+
        agent_result = agent_result
            .with_model(model)
            .with_data(json!({
--- a/src/agents/leaf/mod.rs
+++ b/src/agents/leaf/mod.rs
@@ -1,18 +1,13 @@
 //! Leaf agents - specialized agents that do actual work.
 //!
-//! # Leaf Agent Types
-//! - `ComplexityEstimator`: Estimates task complexity (0-1 score)
-//! - `ModelSelector`: Selects optimal model for task/budget
+//! # Active Leaf Agent
 //! - `TaskExecutor`: Executes tasks using tools (main worker)
-//! - `Verifier`: Validates task completion
+//!
+//! # Removed Agents (superseded by SimpleAgent)
+//! - `ComplexityEstimator`: Was unreliable (LLM-based estimation)
+//! - `ModelSelector`: Was over-engineered (U-curve optimization)
+//! - `Verifier`: Was ineffective (rubber-stamped everything)

-mod complexity;
-mod model_select;
 mod executor;
-mod verifier;

-pub use complexity::{ComplexityEstimator, ComplexityPromptVariant};
-pub use model_select::ModelSelector;
 pub use executor::{TaskExecutor, ExecutionLoopResult};
-pub use verifier::Verifier;
-
--- a/src/agents/leaf/model_select.rs
+++ b/src/agents/leaf/model_select.rs
@@ -1,766 +0,0 @@
-//! Model selection agent with U-curve cost optimization.
-//!
-//! # U-Curve Optimization
-//! The total expected cost follows a U-shaped curve:
-//! - Cheap models: Low per-token cost, but may fail/retry, use more tokens
-//! - Expensive models: High per-token cost, but succeed more often
-//! - Optimal: Somewhere in the middle, minimizing total expected cost
-//!
-//! # Cost Model
-//! Expected Cost = base_cost * (1 + failure_rate * retry_multiplier) * token_efficiency
-//!
-//! # Benchmark Integration
-//! When benchmark data is available, uses actual benchmark scores (from llm-stats.com)
-//! for task-type-specific capability estimation instead of price-based heuristics.
-//!
-//! # Learning Integration
-//! When memory is available, uses historical model statistics (actual success rates,
-//! cost ratios) instead of pure heuristics.
-
-use async_trait::async_trait;
-use serde_json::json;
-use std::collections::HashMap;
-
-use crate::agents::{
-    Agent, AgentContext, AgentId, AgentResult, AgentType, LeafAgent, LeafCapability,
-};
-use crate::budget::{PricingInfo, TaskType};
-use crate::memory::ModelStats;
-use crate::task::Task;
-
-/// Agent that selects the optimal model for a task.
-/// 
-/// # Algorithm
-/// 1. Get task complexity and budget constraints
-/// 2. Fetch available models and pricing
-/// 3. For each model, calculate expected total cost
-/// 4. Return model with minimum expected cost within budget
-pub struct ModelSelector {
-    id: AgentId,
-    retry_multiplier: f64,
-    inefficiency_scale: f64,
-    max_failure_probability: f64,
-}
-
-/// Model recommendation from the selector.
-#[derive(Debug, Clone)]
-pub struct ModelRecommendation {
-    /// Recommended model ID
-    pub model_id: String,
-    
-    /// Expected cost in cents
-    pub expected_cost_cents: u64,
-    
-    /// Confidence in this recommendation (0-1)
-    pub confidence: f64,
-    
-    /// Reasoning for the selection
-    pub reasoning: String,
-    
-    /// Alternative models if primary fails
-    pub fallbacks: Vec<String>,
-    
-    /// Whether historical data was used for this selection
-    pub used_historical_data: bool,
-
-    /// Whether benchmark data was used for capability estimation
-    pub used_benchmark_data: bool,
-
-    /// Inferred task type
-    pub task_type: Option<TaskType>,
-}
-
-impl ModelSelector {
-    /// Create a new model selector.
-    pub fn new() -> Self {
-        Self {
-            id: AgentId::new(),
-            retry_multiplier: 1.5,
-            inefficiency_scale: 0.5,
-            max_failure_probability: 0.9,
-        }
-    }
-
-    /// Create a selector with calibrated parameters.
-    pub fn with_params(retry_multiplier: f64, inefficiency_scale: f64, max_failure_probability: f64) -> Self {
-        Self {
-            id: AgentId::new(),
-            retry_multiplier: retry_multiplier.max(1.0),
-            inefficiency_scale: inefficiency_scale.max(0.0),
-            max_failure_probability: max_failure_probability.clamp(0.0, 0.99),
-        }
-    }
-
-    /// Calculate expected cost for a model given task complexity.
-    /// 
-    /// # Formula
-    /// ```text
-    /// expected_cost = base_cost * (1 + failure_prob * retry_cost) * inefficiency_factor
-    /// ```
-    /// 
-    /// # Parameters
-    /// - `pricing`: Model pricing info
-    /// - `complexity`: Task complexity (0-1)
-    /// - `estimated_tokens`: Estimated tokens needed
-    /// - `capability`: Model capability score (0-1), from benchmarks or price heuristic
-    /// 
-    /// # Returns
-    /// Expected cost in cents
-    /// 
-    /// # Pure Function
-    /// No side effects, deterministic output.
-    fn calculate_expected_cost_with_capability(
-        &self,
-        pricing: &PricingInfo,
-        complexity: f64,
-        estimated_tokens: u64,
-        capability: f64,
-        from_benchmarks: bool,
-    ) -> ExpectedCost {
-        // Failure probability: higher complexity + lower capability = more failures
-        // Formula: P(fail) = complexity * (1 - capability)
-        let failure_prob = (complexity * (1.0 - capability)).clamp(0.0, self.max_failure_probability);
-        
-        // Token inefficiency: weaker models need more tokens
-        // Formula: inefficiency = 1 + (1 - capability) * 0.5
-        let inefficiency = 1.0 + (1.0 - capability) * self.inefficiency_scale;
-        
-        // Retry cost: if it fails, we pay again (possibly with a better model)
-        let retry_multiplier = self.retry_multiplier;
-        
-        // Base cost for estimated tokens
-        let input_tokens = estimated_tokens / 2;
-        let output_tokens = estimated_tokens / 2;
-        let base_cost = pricing.calculate_cost_cents(input_tokens, output_tokens);
-        
-        // Adjusted for inefficiency (weak models use more tokens)
-        let adjusted_tokens = ((estimated_tokens as f64) * inefficiency) as u64;
-        let adjusted_cost = pricing.calculate_cost_cents(adjusted_tokens / 2, adjusted_tokens / 2);
-        
-        // Expected cost including retry probability
-        let expected_cost = (adjusted_cost as f64) * (1.0 + failure_prob * retry_multiplier);
-        
-        ExpectedCost {
-            model_id: pricing.model_id.clone(),
-            base_cost_cents: base_cost,
-            expected_cost_cents: expected_cost.ceil() as u64,
-            failure_probability: failure_prob,
-            capability,
-            inefficiency,
-            from_benchmarks,
-        }
-    }
-
-    /// Calculate expected cost using price-based capability (fallback).
-    fn calculate_expected_cost(
-        &self,
-        pricing: &PricingInfo,
-        complexity: f64,
-        estimated_tokens: u64,
-    ) -> ExpectedCost {
-        let avg_cost = pricing.average_cost_per_token();
-        let capability = self.estimate_capability_from_price(avg_cost);
-        self.calculate_expected_cost_with_capability(pricing, complexity, estimated_tokens, capability, false)
-    }
-
-    /// Estimate model capability from its cost (fallback heuristic).
-    /// 
-    /// # Heuristic
-    /// More expensive models are generally more capable.
-    /// Uses log scale to normalize across price ranges.
-    /// 
-    /// # Returns
-    /// Capability score 0-1
-    fn estimate_capability_from_price(&self, avg_cost_per_token: f64) -> f64 {
-        // Cost tiers (per token):
-        // < 0.0001: weak (capability ~0.3)
-        // 0.0001-0.001: moderate (capability ~0.6)
-        // > 0.001: strong (capability ~0.9)
-        
-        if avg_cost_per_token < 0.0000001 {
-            return 0.3; // Free/very cheap
-        }
-        
-        // Log scale normalization
-        let log_cost = avg_cost_per_token.log10();
-        // Map from ~-7 (cheap) to ~-3 (expensive) => 0.3 to 0.95
-        let normalized = ((log_cost + 7.0) / 4.0).clamp(0.0, 1.0);
-        
-        0.3 + normalized * 0.65
-    }
-
-    /// Get model capability from benchmarks (preferred) or fall back to price heuristic.
-    /// 
-    /// # Benchmark-Based Capability
-    /// Uses actual benchmark scores from llm-stats.com when available.
-    /// This provides task-type-specific capability estimation.
-    async fn get_capability(
-        &self,
-        model_id: &str,
-        task_type: TaskType,
-        avg_cost_per_token: f64,
-        ctx: &AgentContext,
-    ) -> (f64, bool) {
-        // Try to get benchmark-based capability
-        if let Some(benchmarks) = &ctx.benchmarks {
-            let registry = benchmarks.read().await;
-            if let Some(model) = registry.get(model_id) {
-                if model.has_benchmarks() {
-                    let capability = model.capability(task_type);
-                    tracing::info!(
-                        "Using benchmark capability for {}: {:.3} (task_type: {:?})",
-                        model_id, capability, task_type
-                    );
-                    return (capability, true); // (capability, from_benchmarks)
-                }
-            }
-        }
-        
-        // Fall back to price-based heuristic
-        let capability = self.estimate_capability_from_price(avg_cost_per_token);
-        tracing::debug!(
-            "Using price-based capability for {}: {:.3} (avg_cost: {:.10})",
-            model_id, capability, avg_cost_per_token
-        );
-        (capability, false)
-    }
-
-    /// Select optimal model from available options.
-    /// 
-    /// # Algorithm
-    /// 1. Calculate expected cost for each model using benchmark capabilities when available
-    /// 2. If user requested a specific model, use it as minimum capability floor
-    /// 3. Filter models exceeding budget
-    /// 4. Select model with minimum expected cost
-    /// 5. Include fallbacks in case of failure
-    /// 
-    /// # Preconditions
-    /// - `models` is non-empty
-    /// - `budget_cents > 0`
-    async fn select_optimal(
-        &self,
-        models: &[PricingInfo],
-        complexity: f64,
-        estimated_tokens: u64,
-        budget_cents: u64,
-        task_type: TaskType,
-        historical_stats: Option<&HashMap<String, ModelStats>>,
-        requested_model: Option<&str>,
-        ctx: &AgentContext,
-    ) -> Option<ModelRecommendation> {
-        if models.is_empty() {
-            return None;
-        }
-
-        // Calculate expected cost for all models, using benchmark or historical stats when available
-        let mut costs: Vec<ExpectedCost> = Vec::with_capacity(models.len());
-        let mut any_from_benchmarks = false;
-
-        for m in models {
-            let cost = if let Some(stats) = historical_stats.and_then(|h| h.get(&m.model_id)) {
-                // Use historical data if available (highest priority)
-                self.calculate_expected_cost_with_history(m, complexity, estimated_tokens, stats)
-            } else {
-                // Use benchmark data for capability
-                let (capability, from_benchmarks) = self.get_capability(
-                    &m.model_id,
-                    task_type,
-                    m.average_cost_per_token(),
-                    ctx,
-                ).await;
-                
-                if from_benchmarks {
-                    any_from_benchmarks = true;
-                }
-                
-                self.calculate_expected_cost_with_capability(
-                    m, complexity, estimated_tokens, capability, from_benchmarks
-                )
-            };
-            costs.push(cost);
-        }
-
-        // Sort by expected cost (ascending)
-        costs.sort_by(|a, b| {
-            a.expected_cost_cents
-                .cmp(&b.expected_cost_cents)
-        });
-
-        // If user requested a specific model, use it as minimum capability floor
-        // Filter out models with lower capability than the requested one
-        let min_capability = if let Some(req_model) = requested_model {
-            // Find the requested model's capability
-            if let Some(req_cost) = costs.iter().find(|c| c.model_id == req_model) {
-                tracing::info!(
-                    "Using requested model {} as capability floor: {:.3}",
-                    req_model,
-                    req_cost.capability
-                );
-                req_cost.capability
-            } else {
-                // Requested model not found - fall back to looking up its price
-                if let Some(req_pricing) = models.iter().find(|m| m.model_id == req_model) {
-                    let cap = self.estimate_capability_from_price(req_pricing.average_cost_per_token());
-                    tracing::info!(
-                        "Requested model {} not in costs list, using price-based capability: {:.3}",
-                        req_model,
-                        cap
-                    );
-                    cap
-                } else {
-                    // Model not found at all, use a reasonable floor (0.7 = mid-tier)
-                    tracing::warn!(
-                        "Requested model {} not found, using default capability floor 0.7",
-                        req_model
-                    );
-                    0.7
-                }
-            }
-        } else {
-            0.0 // No minimum
-        };
-
-        // Filter to models meeting minimum capability
-        let filtered_costs: Vec<_> = if min_capability > 0.0 {
-            costs.iter()
-                .filter(|c| c.capability >= min_capability * 0.95) // Allow 5% tolerance
-                .cloned()
-                .collect()
-        } else {
-            costs.clone()
-        };
-
-        let costs_to_use = if filtered_costs.is_empty() {
-            tracing::warn!("No models meet minimum capability {:.2}, using all models", min_capability);
-            &costs
-        } else {
-            &filtered_costs
-        };
-
-        // Find cheapest model within budget
-        let within_budget: Vec<_> = costs_to_use
-            .iter()
-            .filter(|c| c.expected_cost_cents <= budget_cents)
-            .cloned()
-            .collect();
-
-        let selected = within_budget.first().cloned().or_else(|| costs_to_use.first().cloned())?;
-        
-        // Get fallback models (next best options)
-        let fallbacks: Vec<String> = costs
-            .iter()
-            .filter(|c| c.model_id != selected.model_id)
-            .take(2)
-            .map(|c| c.model_id.clone())
-            .collect();
-
-        let used_history = historical_stats.and_then(|h| h.get(&selected.model_id)).is_some();
-
-        let recommendation = ModelRecommendation {
-            model_id: selected.model_id.clone(),
-            expected_cost_cents: selected.expected_cost_cents,
-            confidence: 1.0 - selected.failure_probability,
-            reasoning: format!(
-                "Selected {} for {:?} task with expected cost {} cents (capability: {:.2}, failure prob: {:.2}){}{}",
-                selected.model_id,
-                task_type,
-                selected.expected_cost_cents,
-                selected.capability,
-                selected.failure_probability,
-                if used_history { " [historical]" } else { "" },
-                if selected.from_benchmarks { " [benchmark]" } else { "" }
-            ),
-            fallbacks,
-            used_historical_data: used_history,
-            used_benchmark_data: selected.from_benchmarks,
-            task_type: Some(task_type),
-        };
-        
-        tracing::info!(
-            "Model selected: {} (task: {:?}, cost: {} cents, benchmark_data: {}, history: {})",
-            recommendation.model_id,
-            task_type,
-            recommendation.expected_cost_cents,
-            recommendation.used_benchmark_data,
-            recommendation.used_historical_data
-        );
-        
-        Some(recommendation)
-    }
-
-    /// Calculate expected cost using actual historical statistics.
-    /// 
-    /// This uses real success rates and cost ratios from past executions
-    /// instead of heuristic estimates.
-    fn calculate_expected_cost_with_history(
-        &self,
-        pricing: &PricingInfo,
-        _complexity: f64,
-        estimated_tokens: u64,
-        stats: &ModelStats,
-    ) -> ExpectedCost {
-        // Use actual failure rate from history (inverted success rate)
-        let failure_prob = (1.0 - stats.success_rate).clamp(0.0, self.max_failure_probability);
-        
-        // Use actual token ratio from history for inefficiency
-        let inefficiency = stats.avg_token_ratio.clamp(0.5, 3.0);
-        
-        // Base cost for estimated tokens
-        let input_tokens = estimated_tokens / 2;
-        let output_tokens = estimated_tokens / 2;
-        let base_cost = pricing.calculate_cost_cents(input_tokens, output_tokens);
-        
-        // Adjust for actual inefficiency
-        let adjusted_tokens = ((estimated_tokens as f64) * inefficiency) as u64;
-        let adjusted_cost = pricing.calculate_cost_cents(adjusted_tokens / 2, adjusted_tokens / 2);
-        
-        // Apply actual cost ratio (how much more/less than predicted)
-        let cost_with_ratio = (adjusted_cost as f64) * stats.avg_cost_ratio.clamp(0.5, 3.0);
-        
-        // Expected cost including retry probability
-        let expected_cost = cost_with_ratio * (1.0 + failure_prob * self.retry_multiplier);
-        
-        // Capability estimated from success rate rather than price
-        let capability = stats.success_rate.clamp(0.3, 0.95);
-        
-        ExpectedCost {
-            model_id: pricing.model_id.clone(),
-            base_cost_cents: base_cost,
-            expected_cost_cents: expected_cost.ceil() as u64,
-            failure_probability: failure_prob,
-            capability,
-            inefficiency,
-            from_benchmarks: false, // Historical data is not benchmark data
-        }
-    }
-
-    /// Query historical model stats from memory.
-    async fn get_historical_model_stats(
-        &self,
-        complexity: f64,
-        ctx: &AgentContext,
-    ) -> Option<HashMap<String, ModelStats>> {
-        let memory = ctx.memory.as_ref()?;
-        
-        // Query stats for models at similar complexity levels (+/- 0.2)
-        match memory.retriever.get_model_stats(complexity, 0.2).await {
-            Ok(stats) if !stats.is_empty() => {
-                tracing::debug!(
-                    "Found historical stats for {} models at complexity ~{:.2}",
-                    stats.len(),
-                    complexity
-                );
-                
-                // Convert to HashMap for easy lookup
-                Some(stats.into_iter()
-                    .map(|s| (s.model_id.clone(), s))
-                    .collect())
-            }
-            Ok(_) => {
-                tracing::debug!("No historical stats found for complexity ~{:.2}", complexity);
-                None
-            }
-            Err(e) => {
-                tracing::warn!("Failed to fetch model stats: {}", e);
-                None
-            }
-        }
-    }
-}
-
-/// Intermediate calculation result for a model.
-#[derive(Debug, Clone)]
-struct ExpectedCost {
-    model_id: String,
-    #[allow(dead_code)]
-    base_cost_cents: u64,
-    expected_cost_cents: u64,
-    failure_probability: f64,
-    capability: f64,
-    #[allow(dead_code)]
-    inefficiency: f64,
-    /// Whether capability was derived from benchmark data
-    from_benchmarks: bool,
-}
-
-impl Default for ModelSelector {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[async_trait]
-impl Agent for ModelSelector {
-    fn id(&self) -> &AgentId {
-        &self.id
-    }
-
-    fn agent_type(&self) -> AgentType {
-        AgentType::ModelSelector
-    }
-
-    fn description(&self) -> &str {
-        "Selects optimal model for task based on complexity and budget (U-curve optimization)"
-    }
-
-    /// Select the optimal model for a task.
-    /// 
-    /// # Expected Input
-    /// Task should have complexity data in its context (from ComplexityEstimator).
-    /// 
-    /// # Returns
-    /// AgentResult with ModelRecommendation in the `data` field.
-    /// 
-    /// # Benchmark Integration
-    /// When benchmark data is available, uses actual benchmark scores for
-    /// task-type-specific capability estimation.
-    /// 
-    /// # Learning Integration
-    /// When memory is available, queries historical model statistics and uses
-    /// actual success rates/cost ratios instead of heuristics.
-    async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
-        // Get complexity + estimated tokens from task analysis (populated by ComplexityEstimator).
-        let complexity = task
-            .analysis()
-            .complexity_score
-            .unwrap_or(0.5)
-            .clamp(0.0, 1.0);
-        let estimated_tokens = task.analysis().estimated_total_tokens.unwrap_or(2000_u64);
-        
-        // Infer task type from description for benchmark-based selection
-        let task_type = TaskType::infer_from_description(task.description());
-        
-        // Get available budget
-        let budget_cents = task.budget().remaining_cents();
-        
-        // Query historical model stats (if memory available)
-        let historical_stats = self.get_historical_model_stats(complexity, ctx).await;
-        
-        // Fetch pricing for tool-supporting models only
-        let models = ctx.pricing.models_by_cost_filtered(true).await;
-        
-        if models.is_empty() {
-            // Fall back to configured default model (after resolving to latest)
-            let default_model = if let Some(resolver) = &ctx.resolver {
-                let resolver = resolver.read().await;
-                let resolved = resolver.resolve(&ctx.config.default_model);
-                if resolved.upgraded {
-                    tracing::info!(
-                        "Default model auto-upgraded: {} → {}",
-                        resolved.original, resolved.resolved
-                    );
-                }
-                resolved.resolved
-            } else {
-                ctx.config.default_model.clone()
-            };
-            
-            // Record on task analysis
-            {
-                let a = task.analysis_mut();
-                a.selected_model = Some(default_model.clone());
-            }
-            
-            return AgentResult::success(
-                "Using configured default model (no other models available)",
-                0,
-            )
-            .with_data(json!({
-                "model_id": default_model,
-                "expected_cost_cents": 50,
-                "confidence": 0.8,
-                "reasoning": "Fallback to configured default model",
-                "fallbacks": [],
-                "used_historical_data": false,
-                "used_benchmark_data": false,
-                "task_type": format!("{:?}", task_type),
-            }));
-        }
-
-        // Get user-requested model - if specified, resolve to latest version and use it
-        let requested_model = task.analysis().requested_model.clone();
-        
-        // Auto-upgrade outdated model names using the resolver
-        let (resolved_model, was_upgraded) = if let Some(ref req_model) = requested_model {
-            if let Some(resolver) = &ctx.resolver {
-                let resolver = resolver.read().await;
-                let resolved = resolver.resolve(req_model);
-                if resolved.upgraded {
-                    tracing::info!(
-                        "Model auto-upgraded: {} → {} ({})",
-                        resolved.original,
-                        resolved.resolved,
-                        resolved.reason.as_deref().unwrap_or("family upgrade")
-                    );
-                }
-                (Some(resolved.resolved), resolved.upgraded)
-            } else {
-                (Some(req_model.clone()), false)
-            }
-        } else {
-            (None, false)
-        };
-        
-        // If user explicitly requested a model (possibly upgraded), use it directly
-        // This bypasses the allowlist check - user knows what they want
-        if let Some(ref req_model) = resolved_model {
-            let in_allowlist = models.iter().any(|m| &m.model_id == req_model);
-            let upgrade_note = if was_upgraded {
-                format!(" (auto-upgraded from {})", requested_model.as_deref().unwrap_or("unknown"))
-            } else {
-                String::new()
-            };
-            
-            if !in_allowlist {
-                tracing::warn!(
-                    "Requested model {} not in allowlist, but using it anyway (user override){}",
-                    req_model,
-                    upgrade_note
-                );
-            }
-            
-            tracing::info!(
-                "Using requested model directly: {}{}",
-                req_model,
-                upgrade_note
-            );
-            
-            // Record selection in analysis
-            {
-                let a = task.analysis_mut();
-                a.selected_model = Some(req_model.clone());
-                a.estimated_cost_cents = Some(50); // Default estimate
-            }
-
-            return AgentResult::success(
-                &format!("Using requested model: {}{}", req_model, upgrade_note),
-                1,
-            )
-            .with_data(json!({
-                "model_id": req_model,
-                "expected_cost_cents": 50,
-                "confidence": 1.0,
-                "reasoning": format!("User requested model: {}{}", req_model, upgrade_note),
-                "fallbacks": [],
-                "used_historical_data": false,
-                "used_benchmark_data": false,
-                "was_upgraded": was_upgraded,
-                "original_model": requested_model,
-                "task_type": format!("{:?}", task_type),
-                "in_allowlist": in_allowlist,
-            }));
-        }
-
-        match self.select_optimal(
-            &models,
-            complexity,
-            estimated_tokens,
-            budget_cents,
-            task_type,
-            historical_stats.as_ref(),
-            resolved_model.as_deref(),
-            ctx,
-        ).await {
-            Some(rec) => {
-                // Record selection in analysis
-                {
-                    let a = task.analysis_mut();
-                    a.selected_model = Some(rec.model_id.clone());
-                    a.estimated_cost_cents = Some(rec.expected_cost_cents);
-                }
-
-                AgentResult::success(
-                    &rec.reasoning,
-                    1, // Minimal cost for selection itself
-                )
-                .with_data(json!({
-                    "model_id": rec.model_id,
-                    "expected_cost_cents": rec.expected_cost_cents,
-                    "confidence": rec.confidence,
-                    "reasoning": rec.reasoning,
-                    "fallbacks": rec.fallbacks,
-                    "used_historical_data": rec.used_historical_data,
-                    "used_benchmark_data": rec.used_benchmark_data,
-                    "task_type": format!("{:?}", task_type),
-                    "historical_stats_available": historical_stats.as_ref().map(|h| h.len()),
-                    "inputs": {
-                        "complexity": complexity,
-                        "estimated_tokens": estimated_tokens,
-                        "budget_cents": budget_cents
-                    }
-                }))
-            }
-            None => {
-                AgentResult::failure(
-                    "No suitable model found within budget",
-                    0,
-                )
-            }
-        }
-    }
-}
-
-impl LeafAgent for ModelSelector {
-    fn capability(&self) -> LeafCapability {
-        LeafCapability::ModelSelection
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn make_pricing(id: &str, prompt: f64, completion: f64) -> PricingInfo {
-        PricingInfo {
-            model_id: id.to_string(),
-            prompt_cost_per_million: prompt,
-            completion_cost_per_million: completion,
-            context_length: 100000,
-            max_output_tokens: None,
-            supports_tools: true,
-        }
-    }
-
-    #[test]
-    fn test_expected_cost_u_curve() {
-        let selector = ModelSelector::new();
-        
-        let cheap = make_pricing("cheap", 0.1, 0.2);
-        let medium = make_pricing("medium", 1.0, 2.0);
-        let expensive = make_pricing("expensive", 10.0, 20.0);
-        
-        let complexity = 0.7;
-        let tokens = 2000;
-        
-        let cheap_cost = selector.calculate_expected_cost(&cheap, complexity, tokens);
-        let medium_cost = selector.calculate_expected_cost(&medium, complexity, tokens);
-        let expensive_cost = selector.calculate_expected_cost(&expensive, complexity, tokens);
-        
-        // For complex tasks, medium should be optimal (U-curve)
-        // Cheap model has high failure rate
-        // Expensive model has high base cost
-        println!("Cheap: {} (fail: {})", cheap_cost.expected_cost_cents, cheap_cost.failure_probability);
-        println!("Medium: {} (fail: {})", medium_cost.expected_cost_cents, medium_cost.failure_probability);
-        println!("Expensive: {} (fail: {})", expensive_cost.expected_cost_cents, expensive_cost.failure_probability);
-        
-        // Basic sanity check: cheap model should have higher failure rate
-        assert!(cheap_cost.failure_probability > medium_cost.failure_probability);
-    }
-
-    #[test]
-    fn test_task_type_inference() {
-        assert_eq!(
-            TaskType::infer_from_description("Implement a function to sort arrays"),
-            TaskType::Code
-        );
-        assert_eq!(
-            TaskType::infer_from_description("Calculate the integral of x^2"),
-            TaskType::Math
-        );
-        assert_eq!(
-            TaskType::infer_from_description("Explain quantum mechanics"),
-            TaskType::Reasoning
-        );
-    }
-}
-
--- a/src/agents/leaf/verifier.rs
+++ b/src/agents/leaf/verifier.rs
@@ -1,361 +0,0 @@
-//! Verification agent - validates task completion.
-//!
-//! # Verification Strategy (Hybrid)
-//! 1. Try programmatic verification first (fast, deterministic)
-//! 2. Fall back to LLM verification if needed
-//!
-//! # Programmatic Checks
-//! - File exists
-//! - Command succeeds
-//! - Output matches pattern
-
-use async_trait::async_trait;
-use std::path::Path;
-use std::process::Stdio;
-use tokio::process::Command;
-
-use crate::agents::{
-    Agent, AgentContext, AgentId, AgentResult, AgentType, LeafAgent, LeafCapability,
-};
-use crate::llm::{ChatMessage, Role};
-use crate::task::{ProgrammaticCheck, Task, VerificationCriteria, VerificationMethod, VerificationResult};
-
-/// Agent that verifies task completion.
-/// 
-/// # Hybrid Verification
-/// - Programmatic: Fast, deterministic, no cost
-/// - LLM: Flexible, for subjective criteria
-pub struct Verifier {
-    id: AgentId,
-}
-
-impl Verifier {
-    /// Create a new verifier.
-    pub fn new() -> Self {
-        Self { id: AgentId::new() }
-    }
-
-    /// Execute a programmatic check.
-    /// 
-    /// # Returns
-    /// `Ok(true)` if check passes, `Ok(false)` if fails, `Err` on error.
-    /// 
-    /// # Note
-    /// Paths in checks can be absolute or relative to working_dir.
-    async fn run_programmatic_check(
-        &self,
-        check: &ProgrammaticCheck,
-        working_dir: &Path,
-    ) -> Result<bool, String> {
-        match check {
-            ProgrammaticCheck::FileExists { path } => {
-                let full_path = Self::resolve_path(path, working_dir);
-                Ok(full_path.exists())
-            }
-
-            ProgrammaticCheck::FileContains { path, content } => {
-                let full_path = Self::resolve_path(path, working_dir);
-                match tokio::fs::read_to_string(&full_path).await {
-                    Ok(file_content) => Ok(file_content.contains(content)),
-                    Err(_) => Ok(false),
-                }
-            }
-
-            ProgrammaticCheck::CommandSucceeds { command } => {
-                let output = Command::new("sh")
-                    .arg("-c")
-                    .arg(command)
-                    .current_dir(working_dir)
-                    .stdout(Stdio::null())
-                    .stderr(Stdio::null())
-                    .status()
-                    .await
-                    .map_err(|e| e.to_string())?;
-                
-                Ok(output.success())
-            }
-
-            ProgrammaticCheck::CommandOutputMatches { command, pattern } => {
-                let output = Command::new("sh")
-                    .arg("-c")
-                    .arg(command)
-                    .current_dir(working_dir)
-                    .output()
-                    .await
-                    .map_err(|e| e.to_string())?;
-                
-                let stdout = String::from_utf8_lossy(&output.stdout);
-                let regex = regex::Regex::new(pattern).map_err(|e| e.to_string())?;
-                Ok(regex.is_match(&stdout))
-            }
-
-            ProgrammaticCheck::DirectoryExists { path } => {
-                let full_path = Self::resolve_path(path, working_dir);
-                Ok(full_path.is_dir())
-            }
-
-            ProgrammaticCheck::FileMatchesRegex { path, pattern } => {
-                let full_path = Self::resolve_path(path, working_dir);
-                match tokio::fs::read_to_string(&full_path).await {
-                    Ok(content) => {
-                        let regex = regex::Regex::new(pattern).map_err(|e| e.to_string())?;
-                        Ok(regex.is_match(&content))
-                    }
-                    Err(_) => Ok(false),
-                }
-            }
-
-            ProgrammaticCheck::All(checks) => {
-                for c in checks {
-                    if !Box::pin(self.run_programmatic_check(c, working_dir)).await? {
-                        return Ok(false);
-                    }
-                }
-                Ok(true)
-            }
-
-            ProgrammaticCheck::Any(checks) => {
-                for c in checks {
-                    if Box::pin(self.run_programmatic_check(c, working_dir)).await? {
-                        return Ok(true);
-                    }
-                }
-                Ok(false)
-            }
-        }
-    }
-
-    /// Resolve a path - if absolute, use as-is; if relative, join with working_dir.
-    fn resolve_path(path_str: &str, working_dir: &Path) -> std::path::PathBuf {
-        let path = Path::new(path_str);
-        if path.is_absolute() {
-            path.to_path_buf()
-        } else {
-            working_dir.join(path)
-        }
-    }
-
-    /// Verify using LLM.
-    /// 
-    /// # Parameters
-    /// - `task`: The task that was executed
-    /// - `success_criteria`: What success looks like
-    /// - `task_output`: The actual output produced by the executor
-    /// - `ctx`: Agent context
-    /// 
-    /// # Returns
-    /// VerificationResult with LLM's assessment
-    async fn verify_with_llm(
-        &self,
-        task: &Task,
-        success_criteria: &str,
-        task_output: Option<&str>,
-        ctx: &AgentContext,
-    ) -> VerificationResult {
-        let output_section = task_output
-            .map(|o| format!("\n\nActual Output/Result:\n{}\n", o.chars().take(3000).collect::<String>()))
-            .unwrap_or_default();
-        
-        let prompt = format!(
-            r#"You are verifying if a task was completed correctly.
-
-Task: {}
-
-Success Criteria: {}
-{output_section}
-Based on what was actually accomplished (shown in the output above), respond with a JSON object:
-{{
-    "passed": true/false,
-    "reasoning": "explanation of why the task passed or failed based on the actual output"
-}}
-
-Be lenient - if the core goal was achieved even if the format isn't perfect, pass it.
-Respond ONLY with the JSON object."#,
-            task.description(),
-            success_criteria
-        );
-
-        let messages = vec![
-            ChatMessage::new(Role::System, "You are a precise task verifier. Respond only with JSON."),
-            ChatMessage::new(Role::User, prompt),
-        ];
-
-        let model = "openai/gpt-4.1-mini";
-        
-        match ctx.llm.chat_completion(model, &messages, None).await {
-            Ok(response) => {
-                let content = response.content.unwrap_or_default();
-                self.parse_llm_verification(&content, model)
-            }
-            Err(e) => {
-                VerificationResult::fail(
-                    format!("LLM verification failed: {}", e),
-                    VerificationMethod::Llm { model: model.to_string() },
-                    0,
-                )
-            }
-        }
-    }
-
-    /// Parse LLM verification response.
-    fn parse_llm_verification(&self, response: &str, model: &str) -> VerificationResult {
-        if let Ok(json) = serde_json::from_str::<serde_json::Value>(response) {
-            let passed = json["passed"].as_bool().unwrap_or(false);
-            let reasoning = json["reasoning"]
-                .as_str()
-                .unwrap_or("No reasoning provided")
-                .to_string();
-            
-            if passed {
-                VerificationResult::pass(
-                    reasoning,
-                    VerificationMethod::Llm { model: model.to_string() },
-                    1, // Minimal cost
-                )
-            } else {
-                VerificationResult::fail(
-                    reasoning,
-                    VerificationMethod::Llm { model: model.to_string() },
-                    1,
-                )
-            }
-        } else {
-            // Try to infer from text
-            let passed = response.to_lowercase().contains("pass")
-                || response.to_lowercase().contains("success")
-                || response.to_lowercase().contains("completed");
-            
-            if passed {
-                VerificationResult::pass(
-                    response.to_string(),
-                    VerificationMethod::Llm { model: model.to_string() },
-                    1,
-                )
-            } else {
-                VerificationResult::fail(
-                    response.to_string(),
-                    VerificationMethod::Llm { model: model.to_string() },
-                    1,
-                )
-            }
-        }
-    }
-
-    /// Run verification according to criteria.
-    async fn verify(
-        &self,
-        task: &Task,
-        ctx: &AgentContext,
-    ) -> VerificationResult {
-        match task.verification() {
-            VerificationCriteria::None => {
-                VerificationResult::pass(
-                    "No verification required",
-                    VerificationMethod::None,
-                    0,
-                )
-            }
-
-            VerificationCriteria::Programmatic(check) => {
-                match self.run_programmatic_check(check, &ctx.working_dir).await {
-                    Ok(true) => VerificationResult::pass(
-                        "Programmatic check passed",
-                        VerificationMethod::Programmatic,
-                        0,
-                    ),
-                    Ok(false) => VerificationResult::fail(
-                        "Programmatic check failed",
-                        VerificationMethod::Programmatic,
-                        0,
-                    ),
-                    Err(e) => VerificationResult::fail(
-                        format!("Programmatic check error: {}", e),
-                        VerificationMethod::Programmatic,
-                        0,
-                    ),
-                }
-            }
-
-            VerificationCriteria::LlmBased { success_criteria } => {
-                // Get last output from task analysis if available
-                let last_output = task.last_output();
-                self.verify_with_llm(task, success_criteria, last_output, ctx).await
-            }
-
-            VerificationCriteria::Hybrid { programmatic, llm_fallback } => {
-                // Try programmatic first
-                match self.run_programmatic_check(programmatic, &ctx.working_dir).await {
-                    Ok(true) => VerificationResult::pass(
-                        "Programmatic check passed",
-                        VerificationMethod::Programmatic,
-                        0,
-                    ),
-                    Ok(false) => {
-                        // Fall back to LLM
-                        let last_output = task.last_output();
-                        self.verify_with_llm(task, llm_fallback, last_output, ctx).await
-                    }
-                    Err(_) => {
-                        // Error in programmatic, fall back to LLM
-                        let last_output = task.last_output();
-                        self.verify_with_llm(task, llm_fallback, last_output, ctx).await
-                    }
-                }
-            }
-        }
-    }
-}
-
-impl Default for Verifier {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[async_trait]
-impl Agent for Verifier {
-    fn id(&self) -> &AgentId {
-        &self.id
-    }
-
-    fn agent_type(&self) -> AgentType {
-        AgentType::Verifier
-    }
-
-    fn description(&self) -> &str {
-        "Verifies task completion using programmatic checks and LLM fallback"
-    }
-
-    async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
-        let result = self.verify(task, ctx).await;
-        
-        if result.passed() {
-            AgentResult::success(
-                result.reasoning(),
-                result.cost_cents(),
-            )
-            .with_data(serde_json::json!({
-                "passed": true,
-                "method": format!("{:?}", result.method()),
-                "reasoning": result.reasoning(),
-            }))
-        } else {
-            AgentResult::failure(
-                result.reasoning(),
-                result.cost_cents(),
-            )
-            .with_data(serde_json::json!({
-                "passed": false,
-                "method": format!("{:?}", result.method()),
-                "reasoning": result.reasoning(),
-            }))
-        }
-    }
-}
-
-impl LeafAgent for Verifier {
-    fn capability(&self) -> LeafCapability {
-        LeafCapability::Verification
-    }
-}
-
--- a/src/agents/mod.rs
+++ b/src/agents/mod.rs
@@ -1,29 +1,33 @@
-//! Agents module - the hierarchical agent tree.
+//! Agents module - task execution system.
 //!
 //! # Agent Types
-//! - **RootAgent**: Top-level orchestrator, receives tasks from API
-//! - **NodeAgent**: Intermediate orchestrator, delegates to children
-//! - **LeafAgent**: Specialized agents that do actual work
+//! - **SimpleAgent**: Unified agent that directly executes tasks
+//! - **TaskExecutor**: Core execution loop with tools (used by SimpleAgent)
 //!
-//! # Leaf Agent Specializations
-//! - `ComplexityEstimator`: Estimates task difficulty
-//! - `ModelSelector`: Chooses optimal model for cost/capability
-//! - `TaskExecutor`: Executes tasks using tools
-//! - `Verifier`: Validates task completion
+//! # Legacy Types (deprecated, will be removed)
+//! - **RootAgent**: Complex orchestrator (replaced by SimpleAgent)
+//! - **NodeAgent**: Recursive splitter (removed - lost context)
+//! - **ComplexityEstimator**: LLM-based estimation (unreliable)
+//! - **ModelSelector**: U-curve optimization (over-engineered)
+//! - **Verifier**: LLM self-verification (rubber-stamped everything)
 //!
 //! # Design Principles
-//! - Agents communicate synchronously (parent calls child, child returns)
-//! - Designed for future async message passing migration
-//! - All operations return `Result` with meaningful errors
+//! - Direct execution without orchestration overhead
+//! - User controls task granularity (no auto-splitting)
+//! - Blocker detection via system prompt rules
+//! - Mission completion via complete_mission tool

 mod types;
 mod context;
 mod tree;
 pub mod tuning;
-pub mod orchestrator;
+pub mod orchestrator;  // TODO: Remove after migration
 pub mod leaf;
+mod simple;

-pub use types::{AgentId, AgentType, AgentResult, AgentError, Complexity};
+pub use simple::SimpleAgent;
+
+pub use types::{AgentId, AgentType, AgentResult, AgentError, Complexity, TerminalReason};
 pub use context::AgentContext;
 pub use tree::{AgentTree, AgentRef};
 pub use tuning::TuningParams;
--- a/src/agents/orchestrator/mod.rs
+++ b/src/agents/orchestrator/mod.rs
@@ -1,8 +1,4 @@
-//! Orchestrator agents - Root and Node agents that manage the tree.
-
-mod root;
-mod node;
-
-pub use root::RootAgent;
-pub use node::NodeAgent;
-
+//! Orchestrator agents (legacy - removed).
+//!
+//! The orchestrator agents (RootAgent, NodeAgent) have been superseded by SimpleAgent.
+//! This module is kept empty for now; will be removed in a future cleanup.
--- a/src/agents/orchestrator/node.rs
+++ b/src/agents/orchestrator/node.rs
@@ -1,889 +0,0 @@
-//! Node agent - intermediate orchestrator in the agent tree.
-//!
-//! Node agents are like mini-root agents that can:
-//! - Receive delegated tasks from parent
-//! - Estimate complexity and split complex subtasks further (recursive)
-//! - Delegate to their own children
-//! - Aggregate results for parent
-
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use serde_json::json;
-
-use crate::agents::{
-    leaf::{ComplexityEstimator, ModelSelector, TaskExecutor, Verifier},
-    Agent, AgentContext, AgentId, AgentRef, AgentResult, AgentType, Complexity, OrchestratorAgent,
-};
-use crate::budget::Budget;
-use crate::llm::{ChatMessage, Role};
-use crate::task::{Subtask, SubtaskPlan, Task, VerificationCriteria};
-
-/// Node agent - intermediate orchestrator.
-///
-/// # Purpose
-/// Handles subtasks that may still be complex enough
-/// to warrant further splitting. Now with full recursive
-/// splitting capabilities like RootAgent.
-///
-/// # Recursive Splitting
-/// NodeAgent can estimate complexity of its subtasks and
-/// recursively split them if they're still too complex,
-/// respecting the `max_split_depth` in context.
-pub struct NodeAgent {
-    id: AgentId,
-
-    /// Name for identification in logs
-    name: String,
-
-    // Child agents - full pipeline for recursive splitting
-    complexity_estimator: Arc<ComplexityEstimator>,
-    model_selector: Arc<ModelSelector>,
-    task_executor: Arc<TaskExecutor>,
-    verifier: Arc<Verifier>,
-
-    // Child node agents (for further splitting)
-    child_nodes: Vec<Arc<NodeAgent>>,
-}
-
-impl NodeAgent {
-    /// Create a new node agent with full recursive capabilities.
-    pub fn new(name: impl Into<String>) -> Self {
-        Self {
-            id: AgentId::new(),
-            name: name.into(),
-            complexity_estimator: Arc::new(ComplexityEstimator::new()),
-            model_selector: Arc::new(ModelSelector::new()),
-            task_executor: Arc::new(TaskExecutor::new()),
-            verifier: Arc::new(Verifier::new()),
-            child_nodes: Vec::new(),
-        }
-    }
-
-    /// Create a node with custom executor.
-    pub fn with_executor(mut self, executor: Arc<TaskExecutor>) -> Self {
-        self.task_executor = executor;
-        self
-    }
-
-    /// Add a child node for hierarchical delegation.
-    pub fn add_child_node(&mut self, child: Arc<NodeAgent>) {
-        self.child_nodes.push(child);
-    }
-
-    /// Get the node's name.
-    pub fn name(&self) -> &str {
-        &self.name
-    }
-
-    /// Estimate complexity of a task.
-    async fn estimate_complexity(&self, task: &mut Task, ctx: &AgentContext) -> Complexity {
-        let result = self.complexity_estimator.execute(task, ctx).await;
-
-        if let Some(data) = result.data {
-            let score = data["score"].as_f64().unwrap_or(0.5);
-            let reasoning = data["reasoning"].as_str().unwrap_or("").to_string();
-            let estimated_tokens = data["estimated_tokens"].as_u64().unwrap_or(2000);
-            let should_split = data["should_split"].as_bool().unwrap_or(false);
-
-            Complexity::new(score, reasoning, estimated_tokens).with_split(should_split)
-        } else {
-            Complexity::moderate("Could not estimate complexity")
-        }
-    }
-
-    /// Split a complex task into subtasks.
-    async fn split_task(
-        &self,
-        task: &Task,
-        ctx: &AgentContext,
-    ) -> Result<SubtaskPlan, AgentResult> {
-        let prompt = format!(
-            r#"You are a task planner. Break down this task into smaller, manageable subtasks.
-
-Task: {}
-
-Respond with a JSON object:
-{{
-    "subtasks": [
-        {{
-            "description": "What to do",
-            "verification": "How to verify it's done",
-            "weight": 1.0,
-            "dependencies": []
-        }}
-    ],
-    "reasoning": "Why this breakdown makes sense"
-}}
-
-Guidelines:
- Each subtask should be independently executable once its dependencies are complete
- The "dependencies" array contains indices (0-based) of subtasks that MUST complete before this one can start
- For example, if subtask 2 needs subtask 0's output, set "dependencies": [0]
- Include verification for each subtask
- Weight indicates relative effort (higher = more work)
- Keep subtasks focused and specific
- Aim for 2-4 subtasks typically
- IMPORTANT: If subtasks have a logical order (e.g., download before analyze), specify dependencies!
-
-PREFER COMMAND-LINE APPROACHES:
- For downloading files: use curl/wget, NOT browser automation
- For Chrome extensions: download CRX directly via URL pattern, then unzip
- For file analysis: use grep/find/ripgrep, NOT GUI tools
- For web APIs: use curl/fetch_url, NOT browser clicks
- Desktop automation is a LAST RESORT only when no CLI option exists
-
-Respond ONLY with the JSON object."#,
-            task.description()
-        );
-
-        let messages = vec![
-            ChatMessage::new(
-                Role::System,
-                "You are a precise task planner. Respond only with JSON.",
-            ),
-            ChatMessage::new(Role::User, prompt),
-        ];
-
-        let response = ctx
-            .llm
-            .chat_completion("openai/gpt-4.1-mini", &messages, None)
-            .await
-            .map_err(|e| AgentResult::failure(format!("LLM error: {}", e), 1))?;
-
-        let content = response.content.unwrap_or_default();
-        self.parse_subtask_plan(&content, task.id())
-    }
-
-    /// Extract JSON from LLM response (handles markdown code blocks).
-    fn extract_json(response: &str) -> String {
-        let trimmed = response.trim();
-
-        // Check for markdown code block
-        if trimmed.starts_with("```") {
-            // Find the end of the opening fence
-            if let Some(start_idx) = trimmed.find('\n') {
-                let after_fence = &trimmed[start_idx + 1..];
-                // Find the closing fence
-                if let Some(end_idx) = after_fence.rfind("```") {
-                    return after_fence[..end_idx].trim().to_string();
-                }
-            }
-        }
-
-        // Try to find JSON object in the response
-        if let Some(start) = trimmed.find('{') {
-            if let Some(end) = trimmed.rfind('}') {
-                if end > start {
-                    return trimmed[start..=end].to_string();
-                }
-            }
-        }
-
-        // Return as-is if no extraction needed
-        trimmed.to_string()
-    }
-
-    /// Parse LLM response into SubtaskPlan.
-    fn parse_subtask_plan(
-        &self,
-        response: &str,
-        parent_id: crate::task::TaskId,
-    ) -> Result<SubtaskPlan, AgentResult> {
-        let extracted = Self::extract_json(response);
-        let json: serde_json::Value = serde_json::from_str(&extracted).map_err(|e| {
-            AgentResult::failure(
-                format!(
-                    "Failed to parse subtasks: {} (raw: {}...)",
-                    e,
-                    response.chars().take(100).collect::<String>()
-                ),
-                0,
-            )
-        })?;
-
-        let reasoning = json["reasoning"]
-            .as_str()
-            .unwrap_or("No reasoning provided")
-            .to_string();
-
-        let subtasks: Vec<Subtask> = json["subtasks"]
-            .as_array()
-            .map(|arr| {
-                arr.iter()
-                    .map(|s| {
-                        let desc = s["description"].as_str().unwrap_or("").to_string();
-                        let verification = s["verification"].as_str().unwrap_or("");
-                        let weight = s["weight"].as_f64().unwrap_or(1.0);
-
-                        // Parse dependencies array
-                        let dependencies: Vec<usize> = s["dependencies"]
-                            .as_array()
-                            .map(|deps| {
-                                deps.iter()
-                                    .filter_map(|d| d.as_u64().map(|n| n as usize))
-                                    .collect()
-                            })
-                            .unwrap_or_default();
-
-                        Subtask::new(desc, VerificationCriteria::llm_based(verification), weight)
-                            .with_dependencies(dependencies)
-                    })
-                    .collect()
-            })
-            .unwrap_or_default();
-
-        if subtasks.is_empty() {
-            return Err(AgentResult::failure("No subtasks generated", 1));
-        }
-
-        SubtaskPlan::new(parent_id, subtasks, reasoning)
-            .map_err(|e| AgentResult::failure(format!("Invalid subtask plan: {}", e), 0))
-    }
-
-    /// Execute subtasks recursively, potentially splitting further.
-    async fn execute_subtasks(
-        &self,
-        subtask_plan: SubtaskPlan,
-        parent_budget: &Budget,
-        ctx: &AgentContext,
-        requested_model: Option<&str>,
-    ) -> AgentResult {
-        // Convert plan to tasks
-        let mut tasks = match subtask_plan.into_tasks(parent_budget) {
-            Ok(t) => t,
-            Err(e) => return AgentResult::failure(format!("Failed to create subtasks: {}", e), 0),
-        };
-
-        // Propagate requested_model to all subtasks
-        if let Some(model) = requested_model {
-            for task in &mut tasks {
-                task.analysis_mut().requested_model = Some(model.to_string());
-            }
-        }
-
-        let mut results = Vec::new();
-        let mut total_cost = 0u64;
-
-        // Create a child context with reduced split depth
-        let child_ctx = ctx.child_context();
-
-        // Execute each subtask recursively
-        for task in &mut tasks {
-            tracing::info!(
-                "NodeAgent '{}' processing subtask: {}",
-                self.name,
-                task.description().chars().take(80).collect::<String>()
-            );
-
-            // Create a child NodeAgent for this subtask (recursive)
-            let child_node = NodeAgent::new(format!("{}-sub", self.name));
-
-            // Execute through the child node (which may split further)
-            let result = child_node.execute(task, &child_ctx).await;
-            total_cost += result.cost_cents;
-
-            results.push(result);
-        }
-
-        // Aggregate results
-        let successes = results.iter().filter(|r| r.success).count();
-        let total = results.len();
-
-        // Concatenate successful outputs for meaningful aggregation
-        let combined_output = Self::concatenate_outputs(&results);
-
-        if successes == total {
-            AgentResult::success(combined_output, total_cost).with_data(json!({
-                "subtasks_total": total,
-                "subtasks_succeeded": successes,
-                "results": results.iter().map(|r| &r.output).collect::<Vec<_>>(),
-            }))
-        } else {
-            AgentResult::failure(
-                format!(
-                    "{}/{} subtasks succeeded\n\n{}",
-                    successes, total, combined_output
-                ),
-                total_cost,
-            )
-            .with_data(json!({
-                "subtasks_total": total,
-                "subtasks_succeeded": successes,
-                "results": results.iter().map(|r| json!({
-                    "success": r.success,
-                    "output": &r.output,
-                })).collect::<Vec<_>>(),
-            }))
-        }
-    }
-
-    /// Concatenate subtask outputs into a single string.
-    /// Used for intermediate aggregation (RootAgent handles final synthesis).
-    fn concatenate_outputs(results: &[AgentResult]) -> String {
-        let outputs: Vec<String> = results
-            .iter()
-            .enumerate()
-            .filter(|(_, r)| r.success && !r.output.is_empty())
-            .map(|(i, r)| {
-                if results.len() == 1 {
-                    r.output.clone()
-                } else {
-                    format!("### Part {}\n{}", i + 1, r.output)
-                }
-            })
-            .collect();
-
-        if outputs.is_empty() {
-            "No output generated.".to_string()
-        } else if outputs.len() == 1 {
-            outputs.into_iter().next().unwrap()
-        } else {
-            outputs.join("\n\n")
-        }
-    }
-
-    /// Execute with tree updates for visualization.
-    /// This method updates the parent's tree structure as this node executes.
-    pub async fn execute_with_tree(
-        &self,
-        task: &mut Task,
-        ctx: &AgentContext,
-        node_id: &str,
-        root_tree: &mut crate::api::control::AgentTreeNode,
-        emit_ctx: &AgentContext,
-    ) -> AgentResult {
-        use crate::api::control::AgentTreeNode;
-
-        let mut total_cost = 0u64;
-
-        tracing::info!(
-            "NodeAgent '{}' executing task (depth remaining: {}): {}",
-            self.name,
-            ctx.max_split_depth,
-            task.description().chars().take(80).collect::<String>()
-        );
-
-        // Step 1: Estimate complexity
-        ctx.emit_phase(
-            "estimating_complexity",
-            Some("Analyzing subtask..."),
-            Some(&self.name),
-        );
-        let complexity = self.estimate_complexity(task, ctx).await;
-        total_cost += 1;
-
-        // Update node with complexity
-        if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
-            node.complexity = Some(complexity.score());
-        }
-        emit_ctx.emit_tree(root_tree.clone());
-
-        tracing::info!(
-            "NodeAgent '{}' complexity: {:.2} (should_split: {}, can_split: {})",
-            self.name,
-            complexity.score(),
-            complexity.should_split(),
-            ctx.can_split()
-        );
-
-        // Step 2: Decide execution strategy
-        if complexity.should_split() && ctx.can_split() {
-            ctx.emit_phase(
-                "splitting_task",
-                Some("Decomposing subtask..."),
-                Some(&self.name),
-            );
-            tracing::info!("NodeAgent '{}' splitting task into sub-subtasks", self.name);
-
-            match self.split_task(task, ctx).await {
-                Ok(plan) => {
-                    total_cost += 2;
-
-                    // Add child nodes to this node in the tree
-                    if let Some(parent_node) =
-                        root_tree.children.iter_mut().find(|n| n.id == node_id)
-                    {
-                        for (i, subtask) in plan.subtasks().iter().enumerate() {
-                            let child_node = AgentTreeNode::new(
-                                &format!("{}-sub-{}", node_id, i + 1),
-                                "Node",
-                                &format!("Sub-subtask {}", i + 1),
-                                &subtask.description.chars().take(40).collect::<String>(),
-                            )
-                            .with_status("pending");
-                            parent_node.children.push(child_node);
-                        }
-                    }
-                    emit_ctx.emit_tree(root_tree.clone());
-
-                    let subtask_count = plan.subtasks().len();
-                    tracing::info!(
-                        "NodeAgent '{}' created {} sub-subtasks",
-                        self.name,
-                        subtask_count
-                    );
-
-                    // Execute subtasks recursively with tree updates
-                    let child_ctx = ctx.child_context();
-                    let requested_model = task.analysis().requested_model.as_deref();
-                    let result = self
-                        .execute_subtasks_with_tree(
-                            plan,
-                            task.budget(),
-                            &child_ctx,
-                            node_id,
-                            root_tree,
-                            emit_ctx,
-                            requested_model,
-                        )
-                        .await;
-
-                    return AgentResult {
-                        success: result.success,
-                        output: result.output,
-                        cost_cents: total_cost + result.cost_cents,
-                        model_used: result.model_used,
-                        data: result.data,
-                    };
-                }
-                Err(e) => {
-                    tracing::warn!(
-                        "NodeAgent '{}' couldn't split, executing directly: {}",
-                        self.name,
-                        e.output
-                    );
-                }
-            }
-        }
-
-        // Simple task: add child nodes for executor and verifier
-        if let Some(parent_node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
-            parent_node.children.push(
-                AgentTreeNode::new(
-                    &format!("{}-executor", node_id),
-                    "TaskExecutor",
-                    "Task Executor",
-                    "Execute subtask",
-                )
-                .with_status("running"),
-            );
-            parent_node.children.push(
-                AgentTreeNode::new(
-                    &format!("{}-verifier", node_id),
-                    "Verifier",
-                    "Verifier",
-                    "Verify result",
-                )
-                .with_status("pending"),
-            );
-        }
-        emit_ctx.emit_tree(root_tree.clone());
-
-        // Select model
-        ctx.emit_phase(
-            "selecting_model",
-            Some("Choosing model..."),
-            Some(&self.name),
-        );
-        let sel_result = self.model_selector.execute(task, ctx).await;
-        total_cost += sel_result.cost_cents;
-
-        // Execute
-        ctx.emit_phase("executing", Some("Running subtask..."), Some(&self.name));
-        let result = self.task_executor.execute(task, ctx).await;
-        total_cost += result.cost_cents;
-
-        // Update executor status
-        if let Some(parent_node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
-            if let Some(exec_node) = parent_node
-                .children
-                .iter_mut()
-                .find(|n| n.id == format!("{}-executor", node_id))
-            {
-                exec_node.status = if result.success {
-                    "completed".to_string()
-                } else {
-                    "failed".to_string()
-                };
-                exec_node.budget_spent = result.cost_cents;
-            }
-        }
-        emit_ctx.emit_tree(root_tree.clone());
-
-        // Store the executor output for verification
-        task.set_last_output(result.output.clone());
-
-        if !result.success {
-            return AgentResult::failure(result.output, total_cost).with_data(json!({
-                "node_name": self.name,
-                "complexity": complexity.score(),
-                "was_split": false,
-                "execution": result.data,
-            }));
-        }
-
-        // Verify
-        if let Some(parent_node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
-            if let Some(ver_node) = parent_node
-                .children
-                .iter_mut()
-                .find(|n| n.id == format!("{}-verifier", node_id))
-            {
-                ver_node.status = "running".to_string();
-            }
-        }
-        emit_ctx.emit_tree(root_tree.clone());
-
-        ctx.emit_phase("verifying", Some("Checking results..."), Some(&self.name));
-        let verification = self.verifier.execute(task, ctx).await;
-        total_cost += verification.cost_cents;
-
-        // Update verifier status
-        if let Some(parent_node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
-            if let Some(ver_node) = parent_node
-                .children
-                .iter_mut()
-                .find(|n| n.id == format!("{}-verifier", node_id))
-            {
-                ver_node.status = if verification.success {
-                    "completed".to_string()
-                } else {
-                    "failed".to_string()
-                };
-                ver_node.budget_spent = verification.cost_cents;
-            }
-        }
-        emit_ctx.emit_tree(root_tree.clone());
-
-        if verification.success {
-            AgentResult::success(result.output, total_cost)
-                .with_model(result.model_used.unwrap_or_default())
-                .with_data(json!({
-                    "node_name": self.name,
-                    "complexity": complexity.score(),
-                    "was_split": false,
-                    "execution": result.data,
-                    "verification": verification.data,
-                }))
-        } else {
-            AgentResult::failure(
-                format!(
-                    "Task completed but verification failed: {}",
-                    verification.output
-                ),
-                total_cost,
-            )
-            .with_data(json!({
-                "node_name": self.name,
-                "complexity": complexity.score(),
-                "was_split": false,
-                "execution": result.data,
-                "verification": verification.data,
-            }))
-        }
-    }
-
-    /// Execute subtasks with tree updates for visualization.
-    async fn execute_subtasks_with_tree(
-        &self,
-        subtask_plan: SubtaskPlan,
-        parent_budget: &Budget,
-        ctx: &AgentContext,
-        parent_node_id: &str,
-        root_tree: &mut crate::api::control::AgentTreeNode,
-        emit_ctx: &AgentContext,
-        requested_model: Option<&str>,
-    ) -> AgentResult {
-        let mut tasks = match subtask_plan.into_tasks(parent_budget) {
-            Ok(t) => t,
-            Err(e) => return AgentResult::failure(format!("Failed to create subtasks: {}", e), 0),
-        };
-
-        // Propagate requested_model to all subtasks
-        if let Some(model) = requested_model {
-            for task in &mut tasks {
-                task.analysis_mut().requested_model = Some(model.to_string());
-            }
-        }
-
-        let mut results = Vec::new();
-        let mut total_cost = 0u64;
-        let child_ctx = ctx.child_context();
-
-        for (i, task) in tasks.iter_mut().enumerate() {
-            let subtask_id = format!("{}-sub-{}", parent_node_id, i + 1);
-
-            // Update subtask status to running
-            if let Some(parent_node) = root_tree
-                .children
-                .iter_mut()
-                .find(|n| n.id == parent_node_id)
-            {
-                if let Some(child_node) =
-                    parent_node.children.iter_mut().find(|n| n.id == subtask_id)
-                {
-                    child_node.status = "running".to_string();
-                }
-            }
-            emit_ctx.emit_tree(root_tree.clone());
-
-            tracing::info!(
-                "NodeAgent '{}' processing sub-subtask: {}",
-                self.name,
-                task.description().chars().take(80).collect::<String>()
-            );
-
-            // Create and execute a child NodeAgent
-            let child_node_agent = NodeAgent::new(subtask_id.clone());
-            let result = child_node_agent.execute(task, &child_ctx).await;
-            total_cost += result.cost_cents;
-
-            // Update subtask status
-            if let Some(parent_node) = root_tree
-                .children
-                .iter_mut()
-                .find(|n| n.id == parent_node_id)
-            {
-                if let Some(child_node) =
-                    parent_node.children.iter_mut().find(|n| n.id == subtask_id)
-                {
-                    child_node.status = if result.success {
-                        "completed".to_string()
-                    } else {
-                        "failed".to_string()
-                    };
-                    child_node.budget_spent = result.cost_cents;
-                }
-            }
-            emit_ctx.emit_tree(root_tree.clone());
-
-            results.push(result);
-        }
-
-        let successes = results.iter().filter(|r| r.success).count();
-        let total = results.len();
-
-        // Concatenate successful outputs for meaningful aggregation
-        let combined_output = Self::concatenate_outputs(&results);
-
-        if successes == total {
-            AgentResult::success(combined_output, total_cost).with_data(json!({
-                "subtasks_total": total,
-                "subtasks_succeeded": successes,
-            }))
-        } else {
-            AgentResult::failure(
-                format!(
-                    "{}/{} sub-subtasks succeeded\n\n{}",
-                    successes, total, combined_output
-                ),
-                total_cost,
-            )
-            .with_data(json!({
-                "subtasks_total": total,
-                "subtasks_succeeded": successes,
-            }))
-        }
-    }
-}
-
-impl Default for NodeAgent {
-    fn default() -> Self {
-        Self::new("node")
-    }
-}
-
-#[async_trait]
-impl Agent for NodeAgent {
-    fn id(&self) -> &AgentId {
-        &self.id
-    }
-
-    fn agent_type(&self) -> AgentType {
-        AgentType::Node
-    }
-
-    fn description(&self) -> &str {
-        "Intermediate orchestrator with recursive splitting capabilities"
-    }
-
-    async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
-        let mut total_cost = 0u64;
-
-        tracing::info!(
-            "NodeAgent '{}' executing task (depth remaining: {}): {}",
-            self.name,
-            ctx.max_split_depth,
-            task.description().chars().take(80).collect::<String>()
-        );
-
-        // Step 1: Estimate complexity
-        ctx.emit_phase(
-            "estimating_complexity",
-            Some("Analyzing subtask..."),
-            Some(&self.name),
-        );
-        let complexity = self.estimate_complexity(task, ctx).await;
-        total_cost += 1;
-
-        tracing::info!(
-            "NodeAgent '{}' complexity: {:.2} (should_split: {}, can_split: {})",
-            self.name,
-            complexity.score(),
-            complexity.should_split(),
-            ctx.can_split()
-        );
-
-        // Step 2: Decide execution strategy
-        if complexity.should_split() && ctx.can_split() {
-            // Complex subtask: split further recursively
-            ctx.emit_phase(
-                "splitting_task",
-                Some("Decomposing subtask..."),
-                Some(&self.name),
-            );
-            tracing::info!("NodeAgent '{}' splitting task into sub-subtasks", self.name);
-
-            match self.split_task(task, ctx).await {
-                Ok(plan) => {
-                    total_cost += 2; // Splitting cost
-
-                    let subtask_count = plan.subtasks().len();
-                    tracing::info!(
-                        "NodeAgent '{}' created {} sub-subtasks",
-                        self.name,
-                        subtask_count
-                    );
-
-                    // Execute subtasks recursively
-                    let requested_model = task.analysis().requested_model.as_deref();
-                    let result = self
-                        .execute_subtasks(plan, task.budget(), ctx, requested_model)
-                        .await;
-
-                    return AgentResult {
-                        success: result.success,
-                        output: result.output,
-                        cost_cents: total_cost + result.cost_cents,
-                        model_used: result.model_used,
-                        data: result.data,
-                    };
-                }
-                Err(e) => {
-                    tracing::warn!(
-                        "NodeAgent '{}' couldn't split, executing directly: {}",
-                        self.name,
-                        e.output
-                    );
-                }
-            }
-        }
-
-        // Simple task or failed to split: execute directly
-        // Select model
-        ctx.emit_phase(
-            "selecting_model",
-            Some("Choosing model..."),
-            Some(&self.name),
-        );
-        let sel_result = self.model_selector.execute(task, ctx).await;
-        total_cost += sel_result.cost_cents;
-
-        // Execute
-        ctx.emit_phase("executing", Some("Running subtask..."), Some(&self.name));
-        let result = self.task_executor.execute(task, ctx).await;
-        total_cost += result.cost_cents;
-
-        // Store the executor output for verification
-        task.set_last_output(result.output.clone());
-
-        if !result.success {
-            return AgentResult::failure(result.output, total_cost).with_data(json!({
-                "node_name": self.name,
-                "complexity": complexity.score(),
-                "was_split": false,
-                "execution": result.data,
-            }));
-        }
-
-        // Verify
-        ctx.emit_phase("verifying", Some("Checking results..."), Some(&self.name));
-        let verification = self.verifier.execute(task, ctx).await;
-        total_cost += verification.cost_cents;
-
-        if verification.success {
-            AgentResult::success(result.output, total_cost)
-                .with_model(result.model_used.unwrap_or_default())
-                .with_data(json!({
-                    "node_name": self.name,
-                    "complexity": complexity.score(),
-                    "was_split": false,
-                    "execution": result.data,
-                    "verification": verification.data,
-                }))
-        } else {
-            AgentResult::failure(
-                format!(
-                    "Task completed but verification failed: {}",
-                    verification.output
-                ),
-                total_cost,
-            )
-            .with_data(json!({
-                "node_name": self.name,
-                "complexity": complexity.score(),
-                "was_split": false,
-                "execution": result.data,
-                "verification": verification.data,
-            }))
-        }
-    }
-}
-
-#[async_trait]
-impl OrchestratorAgent for NodeAgent {
-    fn children(&self) -> Vec<AgentRef> {
-        let mut children: Vec<AgentRef> = vec![
-            Arc::clone(&self.complexity_estimator) as AgentRef,
-            Arc::clone(&self.model_selector) as AgentRef,
-            Arc::clone(&self.task_executor) as AgentRef,
-            Arc::clone(&self.verifier) as AgentRef,
-        ];
-
-        for node in &self.child_nodes {
-            children.push(Arc::clone(node) as AgentRef);
-        }
-
-        children
-    }
-
-    fn find_child(&self, agent_type: AgentType) -> Option<AgentRef> {
-        match agent_type {
-            AgentType::ComplexityEstimator => {
-                Some(Arc::clone(&self.complexity_estimator) as AgentRef)
-            }
-            AgentType::ModelSelector => Some(Arc::clone(&self.model_selector) as AgentRef),
-            AgentType::TaskExecutor => Some(Arc::clone(&self.task_executor) as AgentRef),
-            AgentType::Verifier => Some(Arc::clone(&self.verifier) as AgentRef),
-            AgentType::Node => self.child_nodes.first().map(|n| Arc::clone(n) as AgentRef),
-            _ => None,
-        }
-    }
-
-    async fn delegate(&self, task: &mut Task, child: AgentRef, ctx: &AgentContext) -> AgentResult {
-        child.execute(task, ctx).await
-    }
-
-    async fn delegate_all(&self, tasks: &mut [Task], ctx: &AgentContext) -> Vec<AgentResult> {
-        let mut results = Vec::with_capacity(tasks.len());
-
-        for task in tasks {
-            // Use recursive execution for each task
-            let result = self.execute(task, ctx).await;
-            results.push(result);
-        }
-
-        results
-    }
-}
--- a/src/agents/orchestrator/root.rs
+++ b/src/agents/orchestrator/root.rs
@@ -1,903 +0,0 @@
-//! Root agent - top-level orchestrator of the agent tree.
-//!
-//! # Responsibilities
-//! 1. Receive tasks from the API
-//! 2. Estimate complexity
-//! 3. Decide: execute directly or split into subtasks
-//! 4. Delegate to appropriate children
-//! 5. Aggregate results
-
-use std::sync::Arc;
-
-use async_trait::async_trait;
-use serde_json::json;
-
-use crate::agents::{
-    Agent, AgentContext, AgentId, AgentRef, AgentResult, AgentType, Complexity,
-    OrchestratorAgent,
-    leaf::{ComplexityEstimator, ModelSelector, TaskExecutor, Verifier},
-};
-use crate::agents::tuning::TuningParams;
-use crate::budget::Budget;
-use crate::task::{Task, Subtask, SubtaskPlan, VerificationCriteria};
-
-/// Root agent - the top of the agent tree.
-/// 
-/// # Task Processing Flow
-/// ```text
-/// 1. Estimate complexity (ComplexityEstimator)
-/// 2. If simple: execute directly (TaskExecutor)
-/// 3. If complex: 
-///    a. Split into subtasks (LLM-based)
-///    b. Select model for each subtask (ModelSelector)
-///    c. Execute subtasks (TaskExecutor)
-///    d. Verify results (Verifier)
-/// 4. Return aggregated result
-/// ```
-pub struct RootAgent {
-    id: AgentId,
-    
-    // Child agents
-    complexity_estimator: Arc<ComplexityEstimator>,
-    model_selector: Arc<ModelSelector>,
-    task_executor: Arc<TaskExecutor>,
-    verifier: Arc<Verifier>,
-}
-
-impl RootAgent {
-    /// Create a new root agent with default children.
-    pub fn new() -> Self {
-        Self::new_with_tuning(&TuningParams::default())
-    }
-
-    /// Create a new root agent using empirically tuned parameters.
-    pub fn new_with_tuning(tuning: &TuningParams) -> Self {
-        Self {
-            id: AgentId::new(),
-            complexity_estimator: Arc::new(ComplexityEstimator::with_params(
-                tuning.complexity.prompt_variant,
-                tuning.complexity.split_threshold,
-                tuning.complexity.token_multiplier,
-            )),
-            model_selector: Arc::new(ModelSelector::with_params(
-                tuning.model_selector.retry_multiplier,
-                tuning.model_selector.inefficiency_scale,
-                tuning.model_selector.max_failure_probability,
-            )),
-            task_executor: Arc::new(TaskExecutor::new()),
-            verifier: Arc::new(Verifier::new()),
-        }
-    }
-
-    /// Estimate complexity of a task.
-    async fn estimate_complexity(&self, task: &mut Task, ctx: &AgentContext) -> Complexity {
-        let result = self.complexity_estimator.execute(task, ctx).await;
-        
-        if let Some(data) = result.data {
-            let score = data["score"].as_f64().unwrap_or(0.5);
-            let reasoning = data["reasoning"].as_str().unwrap_or("").to_string();
-            let estimated_tokens = data["estimated_tokens"].as_u64().unwrap_or(2000);
-            let should_split = data["should_split"].as_bool().unwrap_or(false);
-            
-            Complexity::new(score, reasoning, estimated_tokens)
-                .with_split(should_split)
-        } else {
-            Complexity::moderate("Could not estimate complexity")
-        }
-    }
-
-    /// Split a complex task into subtasks.
-    /// 
-    /// Uses LLM to analyze the task and propose subtasks.
-    async fn split_task(&self, task: &Task, ctx: &AgentContext) -> Result<SubtaskPlan, AgentResult> {
-        let prompt = format!(
-            r#"You are a task planner. Break down this task into smaller, manageable subtasks.
-
-Task: {}
-
-Respond with a JSON object:
-{{
-    "subtasks": [
-        {{
-            "description": "What to do",
-            "verification": "How to verify it's done",
-            "weight": 1.0,
-            "dependencies": []
-        }}
-    ],
-    "reasoning": "Why this breakdown makes sense"
-}}
-
-Guidelines:
- Each subtask should be independently executable once its dependencies are complete
- The "dependencies" array contains indices (0-based) of subtasks that MUST complete before this one can start
- For example, if subtask 2 needs subtask 0's output, set "dependencies": [0]
- Include verification for each subtask
- Weight indicates relative effort (higher = more work)
- Keep subtasks focused and specific
- IMPORTANT: If subtasks have a logical order (e.g., download before analyze), specify dependencies!
-
-PREFER COMMAND-LINE APPROACHES:
- For downloading files: use curl/wget, NOT browser automation
- For Chrome extensions: download CRX directly via URL pattern, then unzip
- For file analysis: use grep/find/ripgrep, NOT GUI tools
- For web APIs: use curl/fetch_url, NOT browser clicks
- Desktop automation is a LAST RESORT only when no CLI option exists
-
-Respond ONLY with the JSON object."#,
-            task.description()
-        );
-
-        let messages = vec![
-            crate::llm::ChatMessage::new(crate::llm::Role::System, "You are a precise task planner. Respond only with JSON."),
-            crate::llm::ChatMessage::new(crate::llm::Role::User, prompt),
-        ];
-
-        let response = ctx.llm
-            .chat_completion("openai/gpt-4.1-mini", &messages, None)
-            .await
-            .map_err(|e| AgentResult::failure(format!("LLM error: {}", e), 1))?;
-
-        let content = response.content.unwrap_or_default();
-        self.parse_subtask_plan(&content, task.id())
-    }
-
-    /// Synthesize a final output from subtask results.
-    ///
-    /// # Purpose
-    /// When a task is split into subtasks, this method produces a coherent final
-    /// response by asking the LLM to synthesize all subtask outputs into a single
-    /// answer that addresses the original request.
-    ///
-    /// # Fallback
-    /// If LLM synthesis fails, falls back to concatenating subtask outputs.
-    async fn synthesize_final_output(
-        &self,
-        original_task: &str,
-        results: &[AgentResult],
-        ctx: &AgentContext,
-    ) -> String {
-        // Collect successful outputs
-        let subtask_outputs: Vec<String> = results
-            .iter()
-            .enumerate()
-            .filter(|(_, r)| r.success)
-            .map(|(i, r)| format!("## Subtask {} Output\n{}", i + 1, r.output))
-            .collect();
-
-        if subtask_outputs.is_empty() {
-            return "All subtasks failed - no output to synthesize.".to_string();
-        }
-
-        // If only one subtask, just return its output directly
-        if subtask_outputs.len() == 1 {
-            return results
-                .iter()
-                .find(|r| r.success)
-                .map(|r| r.output.clone())
-                .unwrap_or_default();
-        }
-
-        let combined_outputs = subtask_outputs.join("\n\n---\n\n");
-
-        let prompt = format!(
-            r#"You have completed a multi-step task. Below are the outputs from each step.
-
-## Original Request
-{original_task}
-
-## Subtask Outputs
-{combined_outputs}
-
-## Your Task
-Synthesize these outputs into a single, coherent response that directly answers the original request.
-
-Guidelines:
- Combine findings into a unified narrative or report
- Remove redundancy between subtask outputs
- Maintain the format the user requested (e.g., if they asked for a markdown report, provide one)
- If subtasks produced code or files, list them clearly
- Be comprehensive but concise
- Do NOT mention "subtasks" or the internal execution structure - respond as if you did the work yourself"#
-        );
-
-        let messages = vec![
-            crate::llm::ChatMessage::new(
-                crate::llm::Role::System,
-                "You are a helpful assistant that synthesizes work outputs into coherent responses.",
-            ),
-            crate::llm::ChatMessage::new(crate::llm::Role::User, prompt),
-        ];
-
-        // Use a fast model for synthesis to minimize cost
-        match ctx
-            .llm
-            .chat_completion("openai/gpt-4.1-mini", &messages, None)
-            .await
-        {
-            Ok(response) => response.content.unwrap_or_else(|| {
-                // Fallback: concatenate outputs if synthesis returned empty
-                self.fallback_concatenate_outputs(results)
-            }),
-            Err(e) => {
-                tracing::warn!("Synthesis LLM call failed, using fallback: {}", e);
-                self.fallback_concatenate_outputs(results)
-            }
-        }
-    }
-
-    /// Fallback method: concatenate subtask outputs with headers.
-    fn fallback_concatenate_outputs(&self, results: &[AgentResult]) -> String {
-        let outputs: Vec<String> = results
-            .iter()
-            .enumerate()
-            .filter(|(_, r)| r.success && !r.output.is_empty())
-            .map(|(i, r)| format!("## Part {}\n\n{}", i + 1, r.output))
-            .collect();
-
-        if outputs.is_empty() {
-            "Task completed but no output was generated.".to_string()
-        } else {
-            outputs.join("\n\n---\n\n")
-        }
-    }
-
-    /// Extract JSON from LLM response (handles markdown code blocks).
-    fn extract_json(response: &str) -> String {
-        let trimmed = response.trim();
-        
-        // Check for markdown code block
-        if trimmed.starts_with("```") {
-            // Find the end of the opening fence
-            if let Some(start_idx) = trimmed.find('\n') {
-                let after_fence = &trimmed[start_idx + 1..];
-                // Find the closing fence
-                if let Some(end_idx) = after_fence.rfind("```") {
-                    return after_fence[..end_idx].trim().to_string();
-                }
-            }
-        }
-        
-        // Try to find JSON object in the response
-        if let Some(start) = trimmed.find('{') {
-            if let Some(end) = trimmed.rfind('}') {
-                if end > start {
-                    return trimmed[start..=end].to_string();
-                }
-            }
-        }
-        
-        // Return as-is if no extraction needed
-        trimmed.to_string()
-    }
-
-    /// Parse LLM response into SubtaskPlan.
-    fn parse_subtask_plan(
-        &self,
-        response: &str,
-        parent_id: crate::task::TaskId,
-    ) -> Result<SubtaskPlan, AgentResult> {
-        let extracted = Self::extract_json(response);
-        let json: serde_json::Value = serde_json::from_str(&extracted)
-            .map_err(|e| AgentResult::failure(format!("Failed to parse subtasks: {} (raw: {}...)", e, response.chars().take(100).collect::<String>()), 0))?;
-
-        let reasoning = json["reasoning"]
-            .as_str()
-            .unwrap_or("No reasoning provided")
-            .to_string();
-
-        let subtasks: Vec<Subtask> = json["subtasks"]
-            .as_array()
-            .map(|arr| {
-                arr.iter()
-                    .map(|s| {
-                        let desc = s["description"].as_str().unwrap_or("").to_string();
-                        let verification = s["verification"].as_str().unwrap_or("");
-                        let weight = s["weight"].as_f64().unwrap_or(1.0);
-                        
-                        // Parse dependencies array
-                        let dependencies: Vec<usize> = s["dependencies"]
-                            .as_array()
-                            .map(|deps| {
-                                deps.iter()
-                                    .filter_map(|d| d.as_u64().map(|n| n as usize))
-                                    .collect()
-                            })
-                            .unwrap_or_default();
-                        
-                        Subtask::new(
-                            desc,
-                            VerificationCriteria::llm_based(verification),
-                            weight,
-                        ).with_dependencies(dependencies)
-                    })
-                    .collect()
-            })
-            .unwrap_or_default();
-
-        if subtasks.is_empty() {
-            return Err(AgentResult::failure("No subtasks generated", 1));
-        }
-
-        SubtaskPlan::new(parent_id, subtasks, reasoning)
-            .map_err(|e| AgentResult::failure(format!("Invalid subtask plan: {}", e), 0))
-    }
-
-    /// Execute subtasks using NodeAgents for recursive processing.
-    /// 
-    /// Each subtask is handled by a NodeAgent which can:
-    /// - Estimate complexity of the subtask
-    /// - Recursively split if the subtask is still too complex
-    /// - Execute directly if simple enough
-    async fn execute_subtasks(
-        &self,
-        subtask_plan: SubtaskPlan,
-        parent_budget: &Budget,
-        ctx: &AgentContext,
-    ) -> AgentResult {
-        use super::NodeAgent;
-        
-        // Convert plan to tasks
-        let mut tasks = match subtask_plan.into_tasks(parent_budget) {
-            Ok(t) => t,
-            Err(e) => return AgentResult::failure(format!("Failed to create subtasks: {}", e), 0),
-        };
-
-        let mut results = Vec::new();
-        let mut total_cost = 0u64;
-
-        // Create a child context with reduced split depth for subtasks
-        let child_ctx = ctx.child_context();
-
-        let total_subtasks = tasks.len();
-        
-        tracing::info!(
-            "RootAgent executing {} subtasks (child depth: {})",
-            total_subtasks,
-            child_ctx.max_split_depth
-        );
-
-        // Execute each subtask through a NodeAgent (which can recursively split)
-        for (i, task) in tasks.iter_mut().enumerate() {
-            tracing::info!(
-                "RootAgent delegating subtask {}/{}: {}",
-                i + 1,
-                total_subtasks,
-                task.description().chars().take(80).collect::<String>()
-            );
-
-            // Create a NodeAgent for this subtask
-            let node_agent = NodeAgent::new(format!("subtask-{}", i + 1));
-            
-            // Execute through the NodeAgent (which may split further if complex)
-            let result = node_agent.execute(task, &child_ctx).await;
-            total_cost += result.cost_cents;
-
-            tracing::info!(
-                "Subtask {}/{} {}: {}",
-                i + 1,
-                total_subtasks,
-                if result.success { "succeeded" } else { "failed" },
-                result.output.chars().take(100).collect::<String>()
-            );
-
-            results.push(result);
-        }
-
-        // Aggregate results
-        let successes = results.iter().filter(|r| r.success).count();
-        let total = results.len();
-
-        // Concatenate outputs (fallback aggregation for non-tree path)
-        let combined_output = self.fallback_concatenate_outputs(&results);
-
-        if successes == total {
-            AgentResult::success(combined_output, total_cost)
-                .with_data(json!({
-                    "subtasks_total": total,
-                    "subtasks_succeeded": successes,
-                    "recursive_execution": true,
-                    "results": results.iter().map(|r| json!({
-                        "success": r.success,
-                        "output": &r.output,
-                        "data": &r.data,
-                    })).collect::<Vec<_>>(),
-                }))
-        } else {
-            AgentResult::failure(
-                format!("{}/{} subtasks succeeded\n\n{}", successes, total, combined_output),
-                total_cost,
-            )
-            .with_data(json!({
-                "subtasks_total": total,
-                "subtasks_succeeded": successes,
-                "recursive_execution": true,
-                "results": results.iter().map(|r| json!({
-                    "success": r.success,
-                    "output": &r.output,
-                    "data": &r.data,
-                })).collect::<Vec<_>>(),
-            }))
-        }
-    }
-
-    /// Execute subtasks with tree updates for visualization.
-    /// Uses wave-based parallel execution for independent tasks.
-    ///
-    /// # Parameters
-    /// - `original_task_description`: The user's original request, used for synthesizing the final output
-    async fn execute_subtasks_with_tree(
-        &self,
-        subtask_plan: SubtaskPlan,
-        parent_budget: &Budget,
-        child_ctx: &AgentContext,
-        root_tree: &mut crate::api::control::AgentTreeNode,
-        ctx: &AgentContext,
-        requested_model: Option<&str>,
-        original_task_description: &str,
-    ) -> AgentResult {
-        use super::NodeAgent;
-        use std::sync::Arc;
-        use tokio::sync::Mutex;
-        
-        // Get execution waves for parallel processing
-        let waves = match subtask_plan.execution_waves() {
-            Ok(w) => w,
-            Err(e) => return AgentResult::failure(format!("Invalid subtask dependencies: {}", e), 0),
-        };
-        
-        let mut tasks = match subtask_plan.into_tasks(parent_budget) {
-            Ok(t) => t,
-            Err(e) => return AgentResult::failure(format!("Failed to create subtasks: {}", e), 0),
-        };
-
-        // Propagate requested_model to all subtasks
-        if let Some(model) = requested_model {
-            for task in &mut tasks {
-                task.analysis_mut().requested_model = Some(model.to_string());
-            }
-        }
-
-        let total_subtasks = tasks.len();
-        let num_waves = waves.len();
-        
-        tracing::info!(
-            "RootAgent executing {} subtasks in {} wave(s) (child depth: {})",
-            total_subtasks,
-            num_waves,
-            child_ctx.max_split_depth
-        );
-
-        // Wrap tree in Arc<Mutex> for thread-safe parallel updates
-        let tree = Arc::new(Mutex::new(root_tree.clone()));
-        let mut all_results = Vec::new();
-        let mut total_cost = 0u64;
-
-        // Execute each wave in parallel
-        for (wave_idx, wave) in waves.iter().enumerate() {
-            let is_parallel = wave.len() > 1;
-            
-            tracing::info!(
-                "RootAgent wave {}/{}: {} task(s) {}",
-                wave_idx + 1,
-                num_waves,
-                wave.len(),
-                if is_parallel { "(parallel)" } else { "(sequential)" }
-            );
-
-            // Mark all tasks in this wave as running
-            {
-                let mut tree_guard = tree.lock().await;
-                for &idx in wave {
-                    let subtask_id = format!("subtask-{}", idx + 1);
-                    if let Some(node) = tree_guard.children.iter_mut().find(|n| n.id == subtask_id) {
-                        node.status = "running".to_string();
-                    }
-                }
-                ctx.emit_tree(tree_guard.clone());
-            }
-
-            // Execute tasks in this wave in parallel
-            let wave_futures: Vec<_> = wave.iter().map(|&idx| {
-                let subtask_id = format!("subtask-{}", idx + 1);
-                let task = tasks[idx].clone();
-                let child_ctx = child_ctx.clone();
-                let tree = Arc::clone(&tree);
-                let ctx = ctx.clone();
-                
-                async move {
-                    let mut task = task;
-                    let node_agent = NodeAgent::new(subtask_id.clone());
-                    
-                    tracing::info!(
-                        "RootAgent delegating subtask {}: {}",
-                        subtask_id,
-                        task.description().chars().take(80).collect::<String>()
-                    );
-                    
-                    // Execute (without tree updates for parallel - update after)
-                    let result = node_agent.execute(&mut task, &child_ctx).await;
-                    
-                    // Update tree with result
-                    {
-                        let mut tree_guard = tree.lock().await;
-                        if let Some(node) = tree_guard.children.iter_mut().find(|n| n.id == subtask_id) {
-                            node.status = if result.success { "completed".to_string() } else { "failed".to_string() };
-                            node.budget_spent = result.cost_cents;
-                        }
-                        ctx.emit_tree(tree_guard.clone());
-                    }
-                    
-                    tracing::info!(
-                        "Subtask {} {}: {}",
-                        subtask_id,
-                        if result.success { "succeeded" } else { "failed" },
-                        result.output.chars().take(100).collect::<String>()
-                    );
-                    
-                    (idx, result)
-                }
-            }).collect();
-
-            // Wait for all tasks in wave to complete
-            let wave_results = futures::future::join_all(wave_futures).await;
-            
-            for (idx, result) in wave_results {
-                total_cost += result.cost_cents;
-                // Store result at correct index
-                while all_results.len() <= idx {
-                    all_results.push(None);
-                }
-                all_results[idx] = Some(result);
-            }
-            
-            // Emit progress update after each wave
-            let completed = all_results.iter().filter(|r| r.is_some()).count();
-            let current_subtask = if wave_idx + 1 < num_waves {
-                // Next wave's first task description
-                waves.get(wave_idx + 1).and_then(|w| w.first()).map(|&idx| {
-                    tasks.get(idx).map(|t| t.description().chars().take(50).collect::<String>())
-                }).flatten()
-            } else {
-                None
-            };
-            ctx.emit_progress(total_subtasks, completed, current_subtask, 1);
-        }
-
-        // Collect results in order
-        let results: Vec<AgentResult> = all_results.into_iter().filter_map(|r| r).collect();
-
-        // Update the original tree from our Arc<Mutex> version
-        *root_tree = tree.lock().await.clone();
-
-        // Update verifier to running (repurposed as "synthesizer" for complex tasks)
-        if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "verifier") {
-            node.status = "running".to_string();
-            node.description = "Synthesizing final output...".to_string();
-        }
-        ctx.emit_tree(root_tree.clone());
-
-        // Aggregate results
-        let successes = results.iter().filter(|r| r.success).count();
-        let total = results.len();
-
-        // Synthesize final output from all subtask results
-        let synthesized_output = if successes > 0 {
-            self.synthesize_final_output(original_task_description, &results, ctx).await
-        } else {
-            format!("{}/{} subtasks succeeded ({} waves)", successes, total, num_waves)
-        };
-
-        // Update verifier/synthesizer to completed
-        if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "verifier") {
-            node.status = if successes == total { "completed".to_string() } else { "failed".to_string() };
-            node.budget_spent = 5;
-        }
-        ctx.emit_tree(root_tree.clone());
-
-        if successes == total {
-            AgentResult::success(synthesized_output, total_cost)
-                .with_data(json!({
-                    "subtasks_total": total,
-                    "subtasks_succeeded": successes,
-                    "recursive_execution": true,
-                    "parallel_waves": num_waves,
-                    "results": results.iter().map(|r| json!({
-                        "success": r.success,
-                        "output": &r.output,
-                        "data": &r.data,
-                    })).collect::<Vec<_>>(),
-                }))
-        } else {
-            AgentResult::failure(
-                format!("{}/{} subtasks succeeded ({} waves)\n\n{}", successes, total, num_waves, synthesized_output),
-                total_cost,
-            )
-            .with_data(json!({
-                "subtasks_total": total,
-                "subtasks_succeeded": successes,
-                "recursive_execution": true,
-                "parallel_waves": num_waves,
-                "results": results.iter().map(|r| json!({
-                    "success": r.success,
-                    "output": &r.output,
-                    "data": &r.data,
-                })).collect::<Vec<_>>(),
-            }))
-        }
-    }
-}
-
-impl Default for RootAgent {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[async_trait]
-impl Agent for RootAgent {
-    fn id(&self) -> &AgentId {
-        &self.id
-    }
-
-    fn agent_type(&self) -> AgentType {
-        AgentType::Root
-    }
-
-    fn description(&self) -> &str {
-        "Root orchestrator: estimates complexity, splits tasks, delegates execution"
-    }
-
-    async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
-        use crate::api::control::AgentTreeNode;
-        
-        let mut total_cost = 0u64;
-        let task_desc = task.description().chars().take(60).collect::<String>();
-        let budget_cents = task.budget().total_cents();
-
-        // Build initial tree structure
-        let mut root_tree = AgentTreeNode::new("root", "Root", "Root Agent", &task_desc)
-            .with_budget(budget_cents, 0)
-            .with_status("running");
-
-        // Add child agent nodes
-        root_tree.add_child(
-            AgentTreeNode::new("complexity", "ComplexityEstimator", "Complexity Estimator", "Analyzing task difficulty")
-                .with_budget(10, 0)
-                .with_status("running")
-        );
-        ctx.emit_tree(root_tree.clone());
-
-        // Step 1: Estimate complexity
-        ctx.emit_phase("estimating_complexity", Some("Analyzing task difficulty..."), Some("RootAgent"));
-        let complexity = self.estimate_complexity(task, ctx).await;
-        total_cost += 1;
-
-        // Update complexity node
-        if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "complexity") {
-            node.status = "completed".to_string();
-            node.complexity = Some(complexity.score());
-            node.budget_spent = 5;
-        }
-        ctx.emit_tree(root_tree.clone());
-
-        tracing::info!(
-            "Task complexity: {:.2} (should_split: {})",
-            complexity.score(),
-            complexity.should_split()
-        );
-
-        // Step 2: Decide execution strategy
-        if complexity.should_split() && ctx.can_split() {
-            ctx.emit_phase("splitting_task", Some("Decomposing into subtasks..."), Some("RootAgent"));
-            match self.split_task(task, ctx).await {
-                Ok(plan) => {
-                    total_cost += 2;
-                    
-                    // Add subtask nodes to tree
-                    for (i, subtask) in plan.subtasks().iter().enumerate() {
-                        let subtask_node = AgentTreeNode::new(
-                            &format!("subtask-{}", i + 1),
-                            "Node",
-                            &format!("Subtask {}", i + 1),
-                            &subtask.description.chars().take(50).collect::<String>(),
-                        )
-                        .with_budget(budget_cents / plan.subtasks().len() as u64, 0)
-                        .with_status("pending");
-                        root_tree.add_child(subtask_node);
-                    }
-                    
-                    // Add verifier node
-                    root_tree.add_child(
-                        AgentTreeNode::new("verifier", "Verifier", "Verifier", "Verify task completion")
-                            .with_budget(80, 0)
-                            .with_status("pending")
-                    );
-                    ctx.emit_tree(root_tree.clone());
-                    
-                    // Execute subtasks with tree updates
-                    let child_ctx = ctx.child_context();
-                    let requested_model = task.analysis().requested_model.as_deref();
-                    let original_task_desc = task.description();
-                    let result = self.execute_subtasks_with_tree(plan, task.budget(), &child_ctx, &mut root_tree, ctx, requested_model, original_task_desc).await;
-                    
-                    // Update root status
-                    root_tree.status = if result.success { "completed".to_string() } else { "failed".to_string() };
-                    root_tree.budget_spent = total_cost + result.cost_cents;
-                    ctx.emit_tree(root_tree);
-                    
-                    return AgentResult {
-                        success: result.success,
-                        output: result.output,
-                        cost_cents: total_cost + result.cost_cents,
-                        model_used: result.model_used,
-                        data: result.data,
-                    };
-                }
-                Err(e) => {
-                    tracing::warn!("Couldn't split task, executing directly: {}", e.output);
-                }
-            }
-        }
-
-        // Simple task: add remaining nodes
-        // Check if there's a model override - show it immediately on the node
-        let requested_model = task.analysis().requested_model.clone();
-        let model_selector_node = if let Some(ref model) = requested_model {
-            AgentTreeNode::new("model-selector", "ModelSelector", "Model Selector", &format!("Using: {}", model))
-                .with_budget(10, 0)
-                .with_status("running")
-                .with_model(model)
-        } else {
-            AgentTreeNode::new("model-selector", "ModelSelector", "Model Selector", "Selecting optimal model")
-                .with_budget(10, 0)
-                .with_status("running")
-        };
-        root_tree.add_child(model_selector_node);
-        ctx.emit_tree(root_tree.clone());
-
-        ctx.emit_phase("selecting_model", Some("Choosing optimal model..."), Some("RootAgent"));
-        
-        let has_benchmarks = if let Some(b) = &ctx.benchmarks {
-            let registry = b.read().await;
-            registry.benchmark_count() > 0
-        } else {
-            false
-        };
-        
-        let selected_model = if has_benchmarks {
-            let sel_result = self.model_selector.execute(task, ctx).await;
-            total_cost += sel_result.cost_cents;
-            // Model already resolved by ModelSelector
-            task.analysis().selected_model.clone().unwrap_or_else(|| ctx.config.default_model.clone())
-        } else {
-            // No benchmarks - resolve default model to latest version
-            let default_model = if let Some(resolver) = &ctx.resolver {
-                let resolver = resolver.read().await;
-                let resolved = resolver.resolve(&ctx.config.default_model);
-                if resolved.upgraded {
-                    tracing::info!(
-                        "RootAgent: default model auto-upgraded: {} → {}",
-                        resolved.original, resolved.resolved
-                    );
-                }
-                resolved.resolved
-            } else {
-                ctx.config.default_model.clone()
-            };
-            let a = task.analysis_mut();
-            a.selected_model = Some(default_model.clone());
-            default_model
-        };
-
-        // Update model selector node with final selected model
-        if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "model-selector") {
-            node.status = "completed".to_string();
-            node.selected_model = Some(selected_model.clone());
-            node.description = format!("Using: {}", selected_model);
-            node.budget_spent = 3;
-        }
-
-        // Add executor and verifier nodes
-        root_tree.add_child(
-            AgentTreeNode::new("executor", "TaskExecutor", "Task Executor", "Executing task")
-                .with_budget(budget_cents - 100, 0)
-                .with_status("running")
-        );
-        root_tree.add_child(
-            AgentTreeNode::new("verifier", "Verifier", "Verifier", "Verify task completion")
-                .with_budget(80, 0)
-                .with_status("pending")
-        );
-        ctx.emit_tree(root_tree.clone());
-
-        ctx.emit_phase("executing", Some("Running task..."), Some("RootAgent"));
-        let result = self.task_executor.execute(task, ctx).await;
-
-        // Update executor node
-        if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "executor") {
-            node.status = if result.success { "completed".to_string() } else { "failed".to_string() };
-            node.budget_spent = result.cost_cents;
-        }
-        ctx.emit_tree(root_tree.clone());
-
-        // Store the executor output for verification
-        task.set_last_output(result.output.clone());
-
-        // Step 3: Verify
-        if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "verifier") {
-            node.status = "running".to_string();
-        }
-        ctx.emit_tree(root_tree.clone());
-
-        ctx.emit_phase("verifying", Some("Checking results..."), Some("RootAgent"));
-        let verification = self.verifier.execute(task, ctx).await;
-        total_cost += verification.cost_cents;
-
-        // Update verifier node
-        if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "verifier") {
-            node.status = if verification.success { "completed".to_string() } else { "failed".to_string() };
-            node.budget_spent = verification.cost_cents;
-        }
-
-        // Update root status
-        root_tree.status = if result.success && verification.success { "completed".to_string() } else { "failed".to_string() };
-        root_tree.budget_spent = total_cost + result.cost_cents;
-        ctx.emit_tree(root_tree);
-
-        AgentResult {
-            success: result.success && verification.success,
-            output: if verification.success {
-                result.output
-            } else {
-                format!("{}\n\nVerification failed: {}", result.output, verification.output)
-            },
-            cost_cents: total_cost + result.cost_cents,
-            model_used: result.model_used,
-            data: json!({
-                "complexity": complexity.score(),
-                "was_split": false,
-                "verification": verification.data,
-                "execution": result.data,
-            }).into(),
-        }
-    }
-}
-
-#[async_trait]
-impl OrchestratorAgent for RootAgent {
-    fn children(&self) -> Vec<AgentRef> {
-        vec![
-            Arc::clone(&self.complexity_estimator) as AgentRef,
-            Arc::clone(&self.model_selector) as AgentRef,
-            Arc::clone(&self.task_executor) as AgentRef,
-            Arc::clone(&self.verifier) as AgentRef,
-        ]
-    }
-
-    fn find_child(&self, agent_type: AgentType) -> Option<AgentRef> {
-        match agent_type {
-            AgentType::ComplexityEstimator => Some(Arc::clone(&self.complexity_estimator) as AgentRef),
-            AgentType::ModelSelector => Some(Arc::clone(&self.model_selector) as AgentRef),
-            AgentType::TaskExecutor => Some(Arc::clone(&self.task_executor) as AgentRef),
-            AgentType::Verifier => Some(Arc::clone(&self.verifier) as AgentRef),
-            _ => None,
-        }
-    }
-
-    async fn delegate(&self, task: &mut Task, child: AgentRef, ctx: &AgentContext) -> AgentResult {
-        child.execute(task, ctx).await
-    }
-
-    async fn delegate_all(&self, tasks: &mut [Task], ctx: &AgentContext) -> Vec<AgentResult> {
-        let mut results = Vec::with_capacity(tasks.len());
-        
-        for task in tasks {
-            let result = self.task_executor.execute(task, ctx).await;
-            results.push(result);
-        }
-        
-        results
-    }
-}
-
--- a/src/agents/simple.rs
+++ b/src/agents/simple.rs
@@ -0,0 +1,177 @@
+//! Simple agent - streamlined single-agent executor.
+//!
+//! Replaces the complex RootAgent → NodeAgent → ComplexityEstimator → ModelSelector → TaskExecutor → Verifier
+//! hierarchy with a single agent that directly executes tasks.
+//!
+//! # Why SimpleAgent?
+//! The multi-agent hierarchy added overhead without reliable benefits:
+//! - ComplexityEstimator: LLM-based estimation was unreliable
+//! - ModelSelector: U-curve optimization rarely matched simple "use default" strategy
+//! - NodeAgent: Recursive splitting lost context and produced worse results
+//! - Verifier: Rubber-stamped everything (LLMs are bad at self-verification)
+//!
+//! # Design
+//! - Direct model selection: mission override > config default
+//! - No automatic task splitting (user controls granularity)
+//! - Built-in blocker detection via system prompt
+//! - Mission completion via complete_mission tool
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use serde_json::json;
+
+use crate::agents::{
+    Agent, AgentContext, AgentId, AgentResult, AgentType,
+    leaf::TaskExecutor,
+};
+use crate::api::control::AgentTreeNode;
+use crate::task::Task;
+
+/// Simple agent - unified executor without orchestration overhead.
+///
+/// # Execution Flow
+/// 1. Resolve model (mission override or config default)
+/// 2. Build tree for visualization
+/// 3. Execute task via TaskExecutor
+/// 4. Return result (no verification layer)
+pub struct SimpleAgent {
+    id: AgentId,
+    task_executor: Arc<TaskExecutor>,
+}
+
+impl SimpleAgent {
+    /// Create a new simple agent.
+    pub fn new() -> Self {
+        Self {
+            id: AgentId::new(),
+            task_executor: Arc::new(TaskExecutor::new()),
+        }
+    }
+
+    /// Resolve the model to use for execution.
+    ///
+    /// Priority:
+    /// 1. Task's requested model (from mission override)
+    /// 2. Config default model (auto-upgraded via resolver)
+    async fn resolve_model(&self, task: &Task, ctx: &AgentContext) -> String {
+        // Check for explicit model request (from mission override)
+        if let Some(requested) = &task.analysis().requested_model {
+            // Resolve to latest version if using resolver
+            if let Some(resolver) = &ctx.resolver {
+                let resolver = resolver.read().await;
+                let resolved = resolver.resolve(requested);
+                if resolved.upgraded {
+                    tracing::info!(
+                        "SimpleAgent: requested model auto-upgraded: {} → {}",
+                        resolved.original, resolved.resolved
+                    );
+                }
+                return resolved.resolved;
+            }
+            return requested.clone();
+        }
+
+        // Fall back to config default, resolved to latest version
+        if let Some(resolver) = &ctx.resolver {
+            let resolver = resolver.read().await;
+            let resolved = resolver.resolve(&ctx.config.default_model);
+            if resolved.upgraded {
+                tracing::info!(
+                    "SimpleAgent: default model auto-upgraded: {} → {}",
+                    resolved.original, resolved.resolved
+                );
+            }
+            resolved.resolved
+        } else {
+            ctx.config.default_model.clone()
+        }
+    }
+
+    /// Build a simple agent tree for visualization.
+    fn build_tree(&self, task_desc: &str, budget_cents: u64, model: &str) -> AgentTreeNode {
+        let mut root = AgentTreeNode::new("root", "Simple", "Simple Agent", task_desc)
+            .with_budget(budget_cents, 0)
+            .with_status("running");
+
+        // Add executor node
+        root.add_child(
+            AgentTreeNode::new("executor", "TaskExecutor", "Task Executor", "Executing task")
+                .with_budget(budget_cents, 0)
+                .with_status("running")
+                .with_model(model)
+        );
+
+        root
+    }
+}
+
+impl Default for SimpleAgent {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl Agent for SimpleAgent {
+    fn id(&self) -> &AgentId {
+        &self.id
+    }
+
+    fn agent_type(&self) -> AgentType {
+        AgentType::Root // Presents as Root for compatibility with tree visualization
+    }
+
+    fn description(&self) -> &str {
+        "Simple agent: direct task execution without orchestration overhead"
+    }
+
+    async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
+        let task_desc = task.description().chars().take(60).collect::<String>();
+        let budget_cents = task.budget().total_cents();
+
+        // Step 1: Resolve model
+        let model = self.resolve_model(task, ctx).await;
+        
+        // Update task analysis with selected model
+        task.analysis_mut().selected_model = Some(model.clone());
+
+        tracing::info!(
+            "SimpleAgent executing task with model '{}': {}...",
+            model,
+            task_desc
+        );
+
+        // Step 2: Build and emit tree
+        let mut tree = self.build_tree(&task_desc, budget_cents, &model);
+        ctx.emit_tree(tree.clone());
+
+        // Step 3: Emit phase (for frontend progress indicator)
+        ctx.emit_phase("executing", Some("Running task..."), Some("SimpleAgent"));
+
+        // Step 4: Execute via TaskExecutor
+        let result = self.task_executor.execute(task, ctx).await;
+
+        // Step 5: Update tree with result
+        if let Some(node) = tree.children.iter_mut().find(|n| n.id == "executor") {
+            node.status = if result.success { "completed".to_string() } else { "failed".to_string() };
+            node.budget_spent = result.cost_cents;
+        }
+        tree.status = if result.success { "completed".to_string() } else { "failed".to_string() };
+        tree.budget_spent = result.cost_cents;
+        ctx.emit_tree(tree);
+
+        // Step 6: Return result with metadata
+        AgentResult {
+            success: result.success,
+            output: result.output,
+            cost_cents: result.cost_cents,
+            model_used: result.model_used.or(Some(model)),
+            data: Some(json!({
+                "agent": "SimpleAgent",
+                "execution": result.data,
+            })),
+            terminal_reason: result.terminal_reason,
+        }
+    }
+}
--- a/src/agents/tuning.rs
+++ b/src/agents/tuning.rs
@@ -1,49 +1,24 @@
-//! Empirical tuning parameters for agent heuristics.
+//! Tuning parameters (legacy).
 //!
-//! This module exists to support **trial-and-error calibration**:
-//! we run tasks, compare predicted vs actual usage/cost, and update parameters.
-//!
-//! The core agent logic should remain correct even if tuning values are absent
-//! (defaults apply).
+//! This module is kept for backwards compatibility but is largely unused
+//! since SimpleAgent doesn't require tuning.

 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};

-use crate::agents::leaf::ComplexityPromptVariant;
-
-/// Top-level tuning parameters.
-#[derive(Debug, Clone, Serialize, Deserialize)]
+/// Top-level tuning parameters (legacy).
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct TuningParams {
-    pub complexity: ComplexityTuning,
-    pub model_selector: ModelSelectorTuning,
-}
-
-impl Default for TuningParams {
-    fn default() -> Self {
-        Self {
-            complexity: ComplexityTuning::default(),
-            model_selector: ModelSelectorTuning::default(),
-        }
-    }
+    // Empty - SimpleAgent doesn't use tuning
 }

 impl TuningParams {
    /// Load tuning parameters from the working directory, if present.
-    ///
-    /// # Path
-    /// `{working_dir}/.open_agent/tuning.json`
-    pub async fn load_from_working_dir(working_dir: &Path) -> Self {
-        let path = working_dir.join(".open_agent").join("tuning.json");
-        match tokio::fs::read_to_string(&path).await {
-            Ok(s) => serde_json::from_str::<TuningParams>(&s).unwrap_or_default(),
-            Err(_) => TuningParams::default(),
-        }
+    pub async fn load_from_working_dir(_working_dir: &Path) -> Self {
+        Self::default()
    }

    /// Save tuning parameters to the working directory.
-    ///
-    /// # Postcondition
-    /// If successful, subsequent `load_from_working_dir` returns an equivalent value.
    pub async fn save_to_working_dir(&self, working_dir: &Path) -> anyhow::Result<PathBuf> {
        let dir = working_dir.join(".open_agent");
        tokio::fs::create_dir_all(&dir).await?;
@@ -53,44 +28,3 @@ impl TuningParams {
        Ok(path)
    }
 }
-
-/// Tuning parameters for ComplexityEstimator.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ComplexityTuning {
-    pub prompt_variant: ComplexityPromptVariant,
-    pub split_threshold: f64,
-    pub token_multiplier: f64,
-}
-
-impl Default for ComplexityTuning {
-    fn default() -> Self {
-        Self {
-            prompt_variant: ComplexityPromptVariant::CalibratedV2,
-            split_threshold: 0.60,
-            token_multiplier: 1.00,
-        }
-    }
-}
-
-/// Tuning parameters for ModelSelector.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ModelSelectorTuning {
-    /// Retry multiplier cost penalty for failures.
-    pub retry_multiplier: f64,
-    /// Token inefficiency scaling for weaker models.
-    pub inefficiency_scale: f64,
-    /// Cap for failure probability.
-    pub max_failure_probability: f64,
-}
-
-impl Default for ModelSelectorTuning {
-    fn default() -> Self {
-        Self {
-            retry_multiplier: 1.5,
-            inefficiency_scale: 0.5,
-            max_failure_probability: 0.9,
-        }
-    }
-}
-
-
--- a/src/agents/types.rs
+++ b/src/agents/types.rs
@@ -60,6 +60,25 @@ impl AgentType {
    }
 }

+/// Reason why agent execution terminated (for non-successful completions).
+/// 
+/// Used to determine whether auto-complete should trigger, avoiding substring matching.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum TerminalReason {
+    /// Agent hit the maximum iteration limit
+    MaxIterations,
+    /// Agent was cancelled by user
+    Cancelled,
+    /// Budget was exhausted
+    BudgetExhausted,
+    /// Agent stalled (no progress, timeouts)
+    Stalled,
+    /// Agent got stuck in an infinite loop
+    InfiniteLoop,
+    /// LLM API error
+    LlmError,
+}
+
 /// Result of an agent executing a task.
 /// 
 /// # Invariants
@@ -81,6 +100,11 @@ pub struct AgentResult {
    
    /// Detailed result data (type-specific)
    pub data: Option<serde_json::Value>,
+    
+    /// If execution ended due to a terminal condition (not normal completion),
+    /// this indicates why. Used by auto-complete logic to avoid substring matching.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub terminal_reason: Option<TerminalReason>,
 }

 impl AgentResult {
@@ -92,6 +116,7 @@ impl AgentResult {
            cost_cents,
            model_used: None,
            data: None,
+            terminal_reason: None,
        }
    }

@@ -103,6 +128,7 @@ impl AgentResult {
            cost_cents,
            model_used: None,
            data: None,
+            terminal_reason: None,
        }
    }

@@ -117,6 +143,12 @@ impl AgentResult {
        self.data = Some(data);
        self
    }
+    
+    /// Set the terminal reason (why execution ended abnormally).
+    pub fn with_terminal_reason(mut self, reason: TerminalReason) -> Self {
+        self.terminal_reason = Some(reason);
+        self
+    }
 }

 /// Complexity estimation for a task.
--- a/src/api/control.rs
+++ b/src/api/control.rs
@@ -1740,7 +1740,7 @@ async fn control_actor_loop(
                                    .map(|e| (e.role.clone(), e.content.clone()))
                                    .collect();
                                *current_mission.write().await = Some(mission_id);
-                                
+
                                // Update mission status back to active
                                if let Some(mem) = &memory {
                                    let _ = mem.supabase.update_mission_status(mission_id, "active").await;
@@ -1947,6 +1947,56 @@ async fn control_actor_loop(
                            // Persist to mission
                            persist_mission_history(&memory, &current_mission, &history).await;

+                            // P1 FIX: Auto-complete mission if agent execution ended in a terminal state
+                            // without an explicit complete_mission call.
+                            // This prevents missions from staying "active" forever after max iterations, stalls, etc.
+                            //
+                            // We use terminal_reason (structured enum) instead of substring matching to avoid
+                            // false positives when agent output legitimately contains words like "infinite loop".
+                            // We also check the current mission status from DB to handle:
+                            // - Explicit complete_mission calls (which update DB status)
+                            // - Parallel missions (each has its own DB status)
+                            if agent_result.terminal_reason.is_some() {
+                                if let Some(mem) = &memory {
+                                    if let Some(mission_id) = current_mission.read().await.clone() {
+                                        // Check current mission status from DB - only auto-complete if still "active"
+                                        let current_status = mem.supabase.get_mission(mission_id).await
+                                            .ok()
+                                            .flatten()
+                                            .map(|m| m.status);
+                                        
+                                        if current_status.as_deref() == Some("active") {
+                                            let status = if agent_result.success { "completed" } else { "failed" };
+                                            tracing::info!(
+                                                "Auto-completing mission {} with status '{}' (terminal_reason: {:?})",
+                                                mission_id, status, agent_result.terminal_reason
+                                            );
+                                            if let Err(e) = mem.supabase.update_mission_status(mission_id, status).await {
+                                                tracing::warn!("Failed to auto-complete mission: {}", e);
+                                            } else {
+                                                // Emit status change event
+                                                let new_status = if agent_result.success {
+                                                    MissionStatus::Completed
+                                                } else {
+                                                    MissionStatus::Failed
+                                                };
+                                                let _ = events_tx.send(AgentEvent::MissionStatusChanged {
+                                                    mission_id,
+                                                    status: new_status,
+                                                    summary: Some(format!("Auto-completed: {}", 
+                                                        agent_result.output.chars().take(100).collect::<String>())),
+                                                });
+                                            }
+                                        } else {
+                                            tracing::debug!(
+                                                "Skipping auto-complete: mission {} already has status {:?}",
+                                                mission_id, current_status
+                                            );
+                                        }
+                                    }
+                                }
+                            }
+
                            let _ = events_tx.send(AgentEvent::AssistantMessage {
                                id: Uuid::new_v4(),
                                content: agent_result.output.clone(),
--- a/src/api/fs.rs
+++ b/src/api/fs.rs
@@ -220,6 +220,16 @@ pub async fn upload(
            format!("{}/{}", q.path, file_name)
        };

+        // Ensure the target directory exists (mkdir -p is idempotent)
+        let target_dir = if q.path.ends_with('/') {
+            q.path.trim_end_matches('/').to_string()
+        } else {
+            q.path.clone()
+        };
+        ssh_exec(&cfg, key_file.path(), "mkdir", &["-p".into(), target_dir])
+            .await
+            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to create directory: {}", e)))?;
+
        let batch = format!("put -p \"{}\" \"{}\"\n", tmp.to_string_lossy(), remote_path);
        sftp_batch(&cfg, key_file.path(), &batch)
            .await
--- a/src/api/routes.rs
+++ b/src/api/routes.rs
@@ -21,8 +21,8 @@ use tower_http::cors::CorsLayer;
 use tower_http::trace::TraceLayer;
 use uuid::Uuid;

-use crate::agents::orchestrator::RootAgent;
-use crate::agents::{AgentContext, AgentRef, TuningParams};
+use crate::agents::SimpleAgent;
+use crate::agents::{AgentContext, AgentRef};
 use crate::budget::ModelPricing;
 use crate::config::Config;
 use crate::llm::OpenRouterClient;
@@ -41,7 +41,7 @@ use super::types::*;
 pub struct AppState {
    pub config: Config,
    pub tasks: RwLock<HashMap<Uuid, TaskState>>,
-    /// The hierarchical root agent
+    /// The agent used for task execution
    pub root_agent: AgentRef,
    /// Memory system (optional)
    pub memory: Option<MemorySystem>,
@@ -57,11 +57,8 @@ pub struct AppState {

 /// Start the HTTP server.
 pub async fn serve(config: Config) -> anyhow::Result<()> {
-    // Load empirically tuned parameters (if present in working directory)
-    let tuning = TuningParams::load_from_working_dir(&config.working_dir).await;
-
-    // Create the root agent (hierarchical)
-    let root_agent: AgentRef = Arc::new(RootAgent::new_with_tuning(&tuning));
+    // Create the simple agent (replaces complex RootAgent hierarchy)
+    let root_agent: AgentRef = Arc::new(SimpleAgent::new());

    // Initialize memory system (optional - needs Supabase config)
    let memory = memory::init_memory(&config.memory, &config.api_key).await;
--- a/src/bin/calibrate.rs
+++ b/src/bin/calibrate.rs
@@ -1,244 +0,0 @@
-//! Calibration harness for Open Agent estimators.
-//!
-//! This binary runs trial tasks in a temporary directory and measures:
-//! - ComplexityEstimator: predicted tokens vs actual tokens used by TaskExecutor
-//! - Split decision quality (against a small labeled set)
-//!
-//! The goal is *empirical tuning* by trial-and-error, while keeping the core
-//! agent code maintainable and (eventually) provable.
-//!
-//! ## Usage
-//!
-//! ```bash
-//! export OPENROUTER_API_KEY="..."
-//! cargo run --release --bin calibrate -- --working-dir /tmp/open_agent_calibration --model openai/gpt-4.1-mini
-//! ```
-//!
-//! Notes:
-//! - This will create and delete files under the given directory.
-//! - Costs real money. Keep the task set small.
-
-use std::path::{Path, PathBuf};
-use std::sync::Arc;
-
-use open_agent::agents::leaf::{ComplexityEstimator, ComplexityPromptVariant, TaskExecutor};
-use open_agent::agents::{Agent, AgentContext};
-use open_agent::budget::ModelPricing;
-use open_agent::config::Config;
-use open_agent::llm::OpenRouterClient;
-use open_agent::task::{Task, VerificationCriteria};
-use open_agent::tools::ToolRegistry;
-use open_agent::agents::tuning::{TuningParams, ComplexityTuning};
-
-#[derive(Debug, Clone)]
-struct CalibTask {
-    name: &'static str,
-    prompt: &'static str,
-    expected_should_split: bool,
-}
-
-fn parse_args() -> (PathBuf, String, bool) {
-    let mut working_dir = None::<PathBuf>;
-    let mut model = None::<String>;
-    let mut write_tuning = false;
-
-    let mut args = std::env::args().skip(1);
-    while let Some(a) = args.next() {
-        match a.as_str() {
-            "--working-dir" | "--workspace" => working_dir = args.next().map(PathBuf::from),
-            "--model" => model = args.next(),
-            "--write-tuning" => write_tuning = true,
-            _ => {}
-        }
-    }
-
-    let working_dir = working_dir.unwrap_or_else(|| PathBuf::from("./.open_agent_calibration"));
-    let model = model.unwrap_or_else(|| "openai/gpt-4.1-mini".to_string());
-    (working_dir, model, write_tuning)
-}
-
-fn task_set() -> Vec<CalibTask> {
-    vec![
-        CalibTask {
-            name: "hello_world",
-            prompt: "Create a Python script called hello.py that prints 'Hello World'.",
-            expected_should_split: false,
-        },
-        CalibTask {
-            name: "calculator",
-            prompt: "Create a Python script called calculator.py with add/subtract/multiply/divide functions and a small CLI menu.",
-            expected_should_split: false,
-        },
-        CalibTask {
-            name: "mini_project",
-            prompt: "Create a tiny Python project with: (1) src/app.py that reads a name from argv and prints a greeting, (2) tests/test_app.py using pytest, (3) a pyproject.toml. Ensure 'python -m pytest' passes.",
-            expected_should_split: true,
-        },
-    ]
-}
-
-#[derive(Debug, Clone)]
-struct Score {
-    mean_token_rel_error: f64,
-    split_accuracy: f64,
-}
-
-impl Score {
-    fn objective(&self) -> f64 {
-        // Lower is better. Penalize wrong split decisions.
-        self.mean_token_rel_error + (1.0 - self.split_accuracy) * 0.50
-    }
-}
-
-async fn ensure_clean_dir(dir: &Path) -> anyhow::Result<()> {
-    if dir.exists() {
-        tokio::fs::remove_dir_all(dir).await?;
-    }
-    tokio::fs::create_dir_all(dir).await?;
-    Ok(())
-}
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    let (working_dir_root, exec_model, write_tuning) = parse_args();
-
-    let api_key = std::env::var("OPENROUTER_API_KEY")
-        .map_err(|_| anyhow::anyhow!("OPENROUTER_API_KEY must be set for calibration"))?;
-
-    let tasks = task_set();
-
-    // Grid to try.
-    let variants = [
-        ComplexityPromptVariant::RubricV1,
-        ComplexityPromptVariant::CalibratedV2,
-    ];
-    let split_thresholds = [0.55, 0.60, 0.65];
-    let token_multipliers = [0.9, 1.0, 1.1, 1.2, 1.3];
-
-    let llm: Arc<dyn open_agent::llm::LlmClient> = Arc::new(OpenRouterClient::new(api_key));
-    let pricing = Arc::new(ModelPricing::new());
-
-    let mut best = None::<(ComplexityPromptVariant, f64, f64, Score)>;
-
-    for &variant in &variants {
-        for &split_threshold in &split_thresholds {
-            for &token_mult in &token_multipliers {
-                let mut rel_errors = Vec::new();
-                let mut correct_split = 0usize;
-
-                for t in &tasks {
-                    let wd = working_dir_root.join(format!(
-                        "{}_st{}_tm{}",
-                        t.name,
-                        (split_threshold * 100.0) as u64,
-                        (token_mult * 100.0) as u64
-                    ));
-                    ensure_clean_dir(&wd).await?;
-
-                    // Minimal config for context.
-                    let cfg = Config::new("<redacted>".to_string(), exec_model.clone(), wd.clone());
-
-                    let ctx = AgentContext::new(
-                        cfg,
-                        Arc::clone(&llm),
-                        ToolRegistry::new(),
-                        Arc::clone(&pricing),
-                        wd.clone(),
-                    );
-
-                    // Build task with generous budget.
-                    let budget = open_agent::budget::Budget::new(10_000); // $100 in cents
-                    let mut task = Task::new(
-                        t.prompt.to_string(),
-                        VerificationCriteria::None,
-                        budget,
-                    )?;
-
-                    // Run estimator (with candidate params).
-                    let estimator = ComplexityEstimator::with_params(variant, split_threshold, token_mult);
-                    let _ = estimator.execute(&mut task, &ctx).await;
-
-                    let predicted_tokens = task.analysis().estimated_total_tokens.unwrap_or(2000);
-                    let predicted_split = task.analysis().should_split.unwrap_or(false);
-                    if predicted_split == t.expected_should_split {
-                        correct_split += 1;
-                    }
-
-                    // Force execution model for comparability.
-                    task.analysis_mut().selected_model = Some(exec_model.clone());
-
-                    let executor = TaskExecutor::new();
-                    let _exec_res = executor.execute(&mut task, &ctx).await;
-
-                    let actual_tokens = task
-                        .analysis()
-                        .actual_usage
-                        .as_ref()
-                        .map(|u| u.total_tokens)
-                        .unwrap_or(predicted_tokens);
-
-                    let denom = (actual_tokens as f64).max(1.0);
-                    let rel = ((predicted_tokens as f64) - (actual_tokens as f64)).abs() / denom;
-                    rel_errors.push(rel);
-                }
-
-                let mean_token_rel_error = if rel_errors.is_empty() {
-                    1.0
-                } else {
-                    rel_errors.iter().sum::<f64>() / (rel_errors.len() as f64)
-                };
-
-                let split_accuracy = (correct_split as f64) / (tasks.len() as f64);
-                let score = Score {
-                    mean_token_rel_error,
-                    split_accuracy,
-                };
-
-                let candidate = (variant, split_threshold, token_mult, score.clone());
-                let better = best
-                    .as_ref()
-                    .map(|(_, _, _, s)| score.objective() < s.objective())
-                    .unwrap_or(true);
-
-                if better {
-                    best = Some(candidate);
-                    eprintln!(
-                        "New best: variant={:?} split={:.2} mult={:.2} token_err={:.3} split_acc={:.2} obj={:.3}",
-                        variant,
-                        split_threshold,
-                        token_mult,
-                        score.mean_token_rel_error,
-                        score.split_accuracy,
-                        score.objective()
-                    );
-                }
-            }
-        }
-    }
-
-    if let Some((variant, split_threshold, token_mult, score)) = best {
-        println!("=== Recommended ComplexityEstimator Settings ===");
-        println!("prompt_variant: {:?}", variant);
-        println!("split_threshold: {:.2}", split_threshold);
-        println!("token_multiplier: {:.2}", token_mult);
-        println!("mean_token_rel_error: {:.3}", score.mean_token_rel_error);
-        println!("split_accuracy: {:.2}", score.split_accuracy);
-
-        if write_tuning {
-            let mut tuning = TuningParams::default();
-            tuning.complexity = ComplexityTuning {
-                prompt_variant: variant,
-                split_threshold,
-                token_multiplier: token_mult,
-            };
-            let path = tuning.save_to_working_dir(&working_dir_root).await?;
-            println!("Wrote tuning file to {}", path.to_string_lossy());
-        }
-    } else {
-        println!("No calibration result produced.");
-    }
-
-    Ok(())
-}
-
-
--- a/src/budget/pricing.rs
+++ b/src/budget/pricing.rs
@@ -238,7 +238,10 @@ impl ModelPricing {
            // Flagship tier
            "openai/o1",
            "openai/o1-preview",
+            "openai/gpt-5.2-pro",
            // Mid tier
+            "openai/gpt-5.2",
+            "openai/gpt-5.2-chat",
            "openai/gpt-4.1",
            "openai/gpt-4o",
            "openai/gpt-4-turbo",
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,33 +4,33 @@
 //!
 //! This library provides:
 //! - An HTTP API for task submission and monitoring
-//! - A hierarchical agent tree for complex task handling
+//! - A simple agent architecture for direct task execution
 //! - Tool-based execution for autonomous code editing
 //! - Integration with OpenRouter for LLM access
 //!
-//! ## Architecture (v2: Hierarchical Agent Tree)
+//! ## Architecture (v3: SimpleAgent)
 //!
 //! ```text
-//!                    ┌─────────────┐
-//!                    │  RootAgent  │
-//!                    └──────┬──────┘
-//!         ┌─────────────────┼─────────────────┐
-//!         ▼                 ▼                 ▼
-//! ┌───────────────┐ ┌─────────────┐ ┌─────────────┐
-//! │ Complexity    │ │   Model     │ │    Task     │
-//! │ Estimator     │ │  Selector   │ │  Executor   │
-//! └───────────────┘ └─────────────┘ └─────────────┘
+//!        ┌──────────────────────────────────┐
+//!        │          SimpleAgent             │
+//!        │  (direct execution, no overhead) │
+//!        └────────────────┬─────────────────┘
+//!                         │
+//!                         ▼
+//!                ┌─────────────────┐
+//!                │  TaskExecutor   │
+//!                │ (tool loop)     │
+//!                └─────────────────┘
 //! ```
 //!
 //! ## Task Flow
 //! 1. Receive task via API
-//! 2. Estimate complexity (should we split?)
-//! 3. Select optimal model (U-curve cost optimization)
-//! 4. Execute (directly or via subtasks)
-//! 5. Verify completion (programmatic + LLM hybrid)
+//! 2. Resolve model (user override or config default)
+//! 3. Execute via TaskExecutor (tool loop)
+//! 4. Return result (mission completion via complete_mission tool)
 //!
 //! ## Modules
-//! - `agents`: Hierarchical agent tree (Root, Node, Leaf agents)
+//! - `agents`: SimpleAgent and TaskExecutor
 //! - `task`: Task, subtask, and verification types
 //! - `budget`: Cost tracking and model pricing

--- a/src/memory/embed.rs
+++ b/src/memory/embed.rs
@@ -129,3 +129,6 @@ struct EmbeddingUsage {



+
+
+
--- a/src/tools/file_ops.rs
+++ b/src/tools/file_ops.rs
@@ -58,7 +58,27 @@ impl Tool for ReadFile {
            return Err(anyhow::anyhow!("File not found: {} (resolved to: {})", path, resolution.resolved.display()));
        }

-        let content = tokio::fs::read_to_string(&resolution.resolved).await?;
+        // Try to read as UTF-8 text, detect binary files
+        let bytes = tokio::fs::read(&resolution.resolved).await?;
+        let content = match String::from_utf8(bytes) {
+            Ok(text) => text,
+            Err(_) => {
+                // Binary file detected - don't try to display content
+                let ext = resolution.resolved.extension()
+                    .map(|e| e.to_string_lossy().to_lowercase())
+                    .unwrap_or_default();
+                return Ok(format!(
+                    "Binary file detected: {} ({} bytes)\n\n\
+                    Cannot display binary content directly. For this file type:\n\
+                    - .jar/.zip: Use `run_command` with `unzip -l` to list contents, or `jar tf` for JAR files\n\
+                    - .class: Use `run_command` with a Java decompiler like `javap -c` or `cfr`\n\
+                    - Images: Use appropriate tools to process\n\
+                    - Executables: Use `file` command to identify, `strings` to extract text",
+                    resolution.resolved.display(),
+                    resolution.resolved.metadata().map(|m| m.len()).unwrap_or(0)
+                ));
+            }
+        };

        // Handle optional line range
        let start_line = args["start_line"].as_u64().map(|n| n as usize);
--- a/src/tools/github.rs
+++ b/src/tools/github.rs
@@ -632,3 +632,6 @@ Note: GitHub code search requires authentication. Set GH_TOKEN env var."
        Ok(format!("## Search results for '{}'\n\n{}", query, stdout))
    }
 }
+
+
+
--- a/src/tools/terminal.rs
+++ b/src/tools/terminal.rs
@@ -15,6 +15,34 @@ use tokio::process::Command;

 use super::{resolve_path_simple as resolve_path, Tool};

+/// Sanitize command output to be safe for LLM consumption.
+/// Removes binary garbage while preserving valid text.
+fn sanitize_output(bytes: &[u8]) -> String {
+    // Check if output appears to be mostly binary
+    let non_printable_count = bytes.iter()
+        .filter(|&&b| b < 0x20 && b != b'\n' && b != b'\r' && b != b'\t')
+        .count();
+    
+    // If more than 10% is non-printable (excluding newlines/tabs), it's likely binary
+    if bytes.len() > 100 && non_printable_count > bytes.len() / 10 {
+        return format!(
+            "[Binary output detected - {} bytes, {}% non-printable. \
+            Use appropriate tools to process binary data.]",
+            bytes.len(),
+            non_printable_count * 100 / bytes.len()
+        );
+    }
+    
+    // Convert to string, replacing invalid UTF-8
+    let text = String::from_utf8_lossy(bytes);
+    
+    // Remove null bytes and other problematic control characters
+    // Keep: newlines, tabs, carriage returns
+    text.chars()
+        .filter(|&c| c == '\n' || c == '\r' || c == '\t' || (c >= ' ' && c != '\u{FFFD}'))
+        .collect()
+}
+
 /// Dangerous command patterns that should be blocked.
 /// These patterns cause infinite loops or could damage the system.
 const DANGEROUS_PATTERNS: &[(&str, &str)] = &[
@@ -146,8 +174,8 @@ impl Tool for RunCommand {
            }
        };

-        let stdout = String::from_utf8_lossy(&output.stdout);
-        let stderr = String::from_utf8_lossy(&output.stderr);
+        let stdout = sanitize_output(&output.stdout);
+        let stderr = sanitize_output(&output.stderr);
        let exit_code = output.status.code().unwrap_or(-1);
        
        tracing::debug!("Command completed: exit={}, stdout_len={}, stderr_len={}", 
@@ -157,6 +185,11 @@ impl Tool for RunCommand {

        result.push_str(&format!("Exit code: {}\n", exit_code));

+        // Add hint when non-zero exit but output exists (common with tools that warn but succeed)
+        if exit_code != 0 && !stdout.is_empty() {
+            result.push_str("Note: Non-zero exit code but output was produced. The command may have succeeded with warnings - verify output files exist.\n");
+        }
+
        if !stdout.is_empty() {
            result.push_str("\n--- stdout ---\n");
            result.push_str(&stdout);
				`@@ -55,3 +55,6 @@ export function CopyButton({ text, className, label = 'Copied!', showOnHover = t`
				`@@ -51,3 +51,6 @@ export function RelativeTime({ date, className }: RelativeTimeProps) {`
				`@@ -87,3 +87,6 @@ export function ShimmerText({ lines = 3, className }: ShimmerProps & { lines?: n`
				`@@ -56,3 +56,6 @@ export function getRuntimeTaskDefaults(): { model?: string; budget_cents?: numbe`
				`@@ -38,3 +38,6 @@ export function formatRelativeTime(date: Date): string {`